diff --git a/SOURCES/0044-tc-m_tunnel_key-reformat-the-usage-text.patch b/SOURCES/0044-tc-m_tunnel_key-reformat-the-usage-text.patch new file mode 100644 index 0000000..5530aaf --- /dev/null +++ b/SOURCES/0044-tc-m_tunnel_key-reformat-the-usage-text.patch @@ -0,0 +1,44 @@ +From 04ecd76fa66c7745529b3b007ad04a307d2b7518 Mon Sep 17 00:00:00 2001 +From: Phil Sutter +Date: Wed, 6 Feb 2019 14:31:10 +0100 +Subject: [PATCH] tc: m_tunnel_key: reformat the usage text + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1658506 +Upstream Status: iproute2.git commit 50907a8245ea3 + +commit 50907a8245ea37875fb877d6f21f51a1f247b167 +Author: Jiri Benc +Date: Wed Jun 14 21:29:49 2017 +0200 + + tc: m_tunnel_key: reformat the usage text + + Adding new tunnel key fields would cause the usage line overflow 80 chars. + Make the usage text similar to other commands. + + Signed-off-by: Jiri Benc +--- + tc/m_tunnel_key.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/tc/m_tunnel_key.c b/tc/m_tunnel_key.c +index 3ceec1c..5222c25 100644 +--- a/tc/m_tunnel_key.c ++++ b/tc/m_tunnel_key.c +@@ -22,7 +22,13 @@ + static void explain(void) + { + fprintf(stderr, "Usage: tunnel_key unset\n"); +- fprintf(stderr, " tunnel_key set id TUNNELID src_ip IP dst_ip IP dst_port UDP_PORT\n"); ++ fprintf(stderr, " tunnel_key set \n"); ++ fprintf(stderr, ++ "Where TUNNEL_KEY is a combination of:\n" ++ "id (mandatory)\n" ++ "src_ip (mandatory)\n" ++ "dst_ip (mandatory)\n" ++ "dst_port \n"); + } + + static void usage(void) +-- +1.8.3.1 + diff --git a/SOURCES/0045-tc-m_tunnel_key-Allow-key-less-tunnels.patch b/SOURCES/0045-tc-m_tunnel_key-Allow-key-less-tunnels.patch new file mode 100644 index 0000000..76c2a4f --- /dev/null +++ b/SOURCES/0045-tc-m_tunnel_key-Allow-key-less-tunnels.patch @@ -0,0 +1,88 @@ +From f43f500151a6261e24d89674b0a44f2d84c9e207 Mon Sep 17 00:00:00 2001 +From: Phil Sutter +Date: Wed, 6 Feb 2019 14:21:09 +0100 +Subject: [PATCH] tc: m_tunnel_key: Allow key-less tunnels + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1658506 +Upstream Status: iproute2.git commit dc0332b1e8e4a +Conflicts: Context change due to missing commit 59eb271d1d259 + ("tc: m_tunnel_key: add csum/nocsum option"). + +commit dc0332b1e8e4ab8771562128993d512986f856e2 +Author: Adi Nissim +Date: Thu Jan 10 15:03:50 2019 +0200 + + tc: m_tunnel_key: Allow key-less tunnels + + Change the id parameter of the tunnel_key set action from mandatory to + optional. + + Some tunneling protocols (e.g. GRE) specify the id as an optional field. + + Signed-off-by: Adi Nissim + Signed-off-by: Stephen Hemminger +--- + man/man8/tc-tunnel_key.8 | 4 ++-- + tc/m_tunnel_key.c | 6 ++---- + 2 files changed, 4 insertions(+), 6 deletions(-) + +diff --git a/man/man8/tc-tunnel_key.8 b/man/man8/tc-tunnel_key.8 +index 2e56973..52fa585 100644 +--- a/man/man8/tc-tunnel_key.8 ++++ b/man/man8/tc-tunnel_key.8 +@@ -56,12 +56,12 @@ above). + .TP + .B set + Set tunnel metadata to be used by the IP tunnel device. Requires +-.B id +-, + .B src_ip + and + .B dst_ip + options. ++.B id ++, + .B dst_port + is optional. + .RS +diff --git a/tc/m_tunnel_key.c b/tc/m_tunnel_key.c +index 5222c25..acbcfc1 100644 +--- a/tc/m_tunnel_key.c ++++ b/tc/m_tunnel_key.c +@@ -25,7 +25,7 @@ static void explain(void) + fprintf(stderr, " tunnel_key set \n"); + fprintf(stderr, + "Where TUNNEL_KEY is a combination of:\n" +- "id (mandatory)\n" ++ "id \n" + "src_ip (mandatory)\n" + "dst_ip (mandatory)\n" + "dst_port \n"); +@@ -91,7 +91,6 @@ static int parse_tunnel_key(struct action_util *a, int *argc_p, char ***argv_p, + int ret; + int has_src_ip = 0; + int has_dst_ip = 0; +- int has_key_id = 0; + + if (matches(*argv, "tunnel_key") != 0) + return -1; +@@ -147,7 +146,6 @@ static int parse_tunnel_key(struct action_util *a, int *argc_p, char ***argv_p, + fprintf(stderr, "Illegal \"id\"\n"); + return -1; + } +- has_key_id = 1; + } else if (matches(*argv, "dst_port") == 0) { + NEXT_ARG(); + ret = tunnel_key_parse_dst_port(*argv, +@@ -180,7 +178,7 @@ static int parse_tunnel_key(struct action_util *a, int *argc_p, char ***argv_p, + } + + if (action == TCA_TUNNEL_KEY_ACT_SET && +- (!has_src_ip || !has_dst_ip || !has_key_id)) { ++ (!has_src_ip || !has_dst_ip)) { + fprintf(stderr, "set needs tunnel_key parameters\n"); + explain(); + return -1; +-- +1.8.3.1 + diff --git a/SOURCES/0046-tc-include-stdint.h-explicitly-for-UINT16_MAX.patch b/SOURCES/0046-tc-include-stdint.h-explicitly-for-UINT16_MAX.patch new file mode 100644 index 0000000..ed0d248 --- /dev/null +++ b/SOURCES/0046-tc-include-stdint.h-explicitly-for-UINT16_MAX.patch @@ -0,0 +1,39 @@ +From 7a77e4df94a48c35f9a4bf1fc3f8e9d1f72a77b7 Mon Sep 17 00:00:00 2001 +From: Phil Sutter +Date: Wed, 6 Feb 2019 14:50:24 +0100 +Subject: [PATCH] tc: include stdint.h explicitly for UINT16_MAX + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1641909 +Upstream Status: iproute2.git commit ae717baf15fb4 + +commit ae717baf15fb4d30749ada3948d9445892bac239 +Author: Khem Raj +Date: Sat May 20 14:28:46 2017 -0700 + + tc: include stdint.h explicitly for UINT16_MAX + + Fixes + | tc_core.c:190:29: error: 'UINT16_MAX' undeclared (first use in this function); did you mean '__INT16_MAX__'? + | if ((sz >> s->size_log) > UINT16_MAX) { + | ^~~~~~~~~~ + + Signed-off-by: Khem Raj +--- + tc/tc_core.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tc/tc_core.c b/tc/tc_core.c +index 7bbe0d7..821b741 100644 +--- a/tc/tc_core.c ++++ b/tc/tc_core.c +@@ -12,6 +12,7 @@ + + #include + #include ++#include + #include + #include + #include +-- +1.8.3.1 + diff --git a/SOURCES/0047-Update-kernel-headers.patch b/SOURCES/0047-Update-kernel-headers.patch new file mode 100644 index 0000000..916d932 --- /dev/null +++ b/SOURCES/0047-Update-kernel-headers.patch @@ -0,0 +1,5651 @@ +From 007c76937f34c11c4c827373f081a9c4eebf1fc3 Mon Sep 17 00:00:00 2001 +From: Phil Sutter +Date: Wed, 6 Feb 2019 14:50:24 +0100 +Subject: [PATCH] Update kernel headers + +This updates kernel headers to upstream commit +761ec9e29ff867452057f59dc6ca430688b409ea. Update was done via: + +| git checkout 761ec9e29ff867452057f59dc6ca430688b409ea -- include/uapi + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1641909 +Upstream Status: RHEL-only +--- + include/uapi/linux/atm.h | 1 + + include/uapi/linux/atmapi.h | 1 + + include/uapi/linux/atmarp.h | 1 + + include/uapi/linux/atmdev.h | 1 + + include/uapi/linux/atmioc.h | 1 + + include/uapi/linux/atmsap.h | 1 + + include/uapi/linux/bpf.h | 2481 ++++++++++++++++++++---- + include/uapi/linux/bpf_common.h | 8 +- + include/uapi/linux/btf.h | 113 ++ + include/uapi/linux/can.h | 1 + + include/uapi/linux/can/netlink.h | 2 + + include/uapi/linux/can/vxcan.h | 1 + + include/uapi/linux/devlink.h | 82 + + include/uapi/linux/elf-em.h | 1 + + include/uapi/linux/fib_rules.h | 12 +- + include/uapi/linux/filter.h | 1 + + include/uapi/linux/fou.h | 1 + + include/uapi/linux/gen_stats.h | 1 + + include/uapi/linux/genetlink.h | 1 + + include/uapi/linux/hdlc/ioctl.h | 1 + + include/uapi/linux/icmpv6.h | 1 + + include/uapi/linux/if.h | 1 + + include/uapi/linux/if_addr.h | 2 + + include/uapi/linux/if_addrlabel.h | 1 + + include/uapi/linux/if_alg.h | 1 + + include/uapi/linux/if_arp.h | 1 + + include/uapi/linux/if_bonding.h | 1 + + include/uapi/linux/if_bridge.h | 1 + + include/uapi/linux/if_ether.h | 11 + + include/uapi/linux/if_link.h | 59 + + include/uapi/linux/if_macsec.h | 10 +- + include/uapi/linux/if_packet.h | 1 + + include/uapi/linux/if_tun.h | 5 + + include/uapi/linux/if_tunnel.h | 5 + + include/uapi/linux/if_vlan.h | 1 + + include/uapi/linux/ife.h | 1 + + include/uapi/linux/ila.h | 23 + + include/uapi/linux/in.h | 1 + + include/uapi/linux/in6.h | 2 + + include/uapi/linux/in_route.h | 1 + + include/uapi/linux/inet_diag.h | 3 + + include/uapi/linux/ip.h | 1 + + include/uapi/linux/ip6_tunnel.h | 3 + + include/uapi/linux/ipsec.h | 1 + + include/uapi/linux/kernel.h | 1 + + include/uapi/linux/l2tp.h | 7 +- + include/uapi/linux/libc-compat.h | 56 +- + include/uapi/linux/limits.h | 1 + + include/uapi/linux/lwtunnel.h | 1 + + include/uapi/linux/magic.h | 2 + + include/uapi/linux/mpls.h | 1 + + include/uapi/linux/mpls_iptunnel.h | 1 + + include/uapi/linux/neighbour.h | 1 + + include/uapi/linux/net_namespace.h | 1 + + include/uapi/linux/netconf.h | 1 + + include/uapi/linux/netdevice.h | 1 + + include/uapi/linux/netfilter.h | 1 + + include/uapi/linux/netfilter/ipset/ip_set.h | 1 + + include/uapi/linux/netfilter/x_tables.h | 1 + + include/uapi/linux/netfilter/xt_set.h | 1 + + include/uapi/linux/netfilter/xt_tcpudp.h | 1 + + include/uapi/linux/netfilter_ipv4.h | 2 + + include/uapi/linux/netfilter_ipv4/ip_tables.h | 1 + + include/uapi/linux/netfilter_ipv6.h | 2 + + include/uapi/linux/netfilter_ipv6/ip6_tables.h | 1 + + include/uapi/linux/netlink.h | 1 + + include/uapi/linux/netlink_diag.h | 1 + + include/uapi/linux/packet_diag.h | 1 + + include/uapi/linux/param.h | 1 + + include/uapi/linux/pfkeyv2.h | 1 + + include/uapi/linux/pkt_cls.h | 15 +- + include/uapi/linux/pkt_sched.h | 198 ++ + include/uapi/linux/posix_types.h | 1 + + include/uapi/linux/rtnetlink.h | 24 + + include/uapi/linux/sctp.h | 71 +- + include/uapi/linux/seg6.h | 5 +- + include/uapi/linux/seg6_genl.h | 1 + + include/uapi/linux/seg6_hmac.h | 1 + + include/uapi/linux/seg6_iptunnel.h | 1 + + include/uapi/linux/seg6_local.h | 12 + + include/uapi/linux/sock_diag.h | 1 + + include/uapi/linux/socket.h | 1 + + include/uapi/linux/sockios.h | 1 + + include/uapi/linux/stddef.h | 1 + + include/uapi/linux/sysinfo.h | 1 + + include/uapi/linux/tc_act/tc_bpf.h | 1 + + include/uapi/linux/tc_act/tc_connmark.h | 1 + + include/uapi/linux/tc_act/tc_csum.h | 1 + + include/uapi/linux/tc_act/tc_defact.h | 1 + + include/uapi/linux/tc_act/tc_gact.h | 1 + + include/uapi/linux/tc_act/tc_ife.h | 1 + + include/uapi/linux/tc_act/tc_ipt.h | 1 + + include/uapi/linux/tc_act/tc_mirred.h | 7 +- + include/uapi/linux/tc_act/tc_nat.h | 1 + + include/uapi/linux/tc_act/tc_pedit.h | 10 +- + include/uapi/linux/tc_act/tc_sample.h | 1 + + include/uapi/linux/tc_act/tc_skbedit.h | 3 + + include/uapi/linux/tc_act/tc_skbmod.h | 1 + + include/uapi/linux/tc_act/tc_tunnel_key.h | 29 + + include/uapi/linux/tc_act/tc_vlan.h | 1 + + include/uapi/linux/tc_ematch/tc_em_cmp.h | 1 + + include/uapi/linux/tc_ematch/tc_em_ipt.h | 20 + + include/uapi/linux/tc_ematch/tc_em_meta.h | 1 + + include/uapi/linux/tc_ematch/tc_em_nbyte.h | 1 + + include/uapi/linux/tcp.h | 22 + + include/uapi/linux/tcp_metrics.h | 1 + + include/uapi/linux/tipc.h | 188 +- + include/uapi/linux/tipc_netlink.h | 37 + + include/uapi/linux/tipc_sockets_diag.h | 17 + + include/uapi/linux/types.h | 3 + + include/uapi/linux/unix_diag.h | 1 + + include/uapi/linux/veth.h | 1 + + include/uapi/linux/vm_sockets_diag.h | 1 + + include/uapi/linux/xfrm.h | 1 + + 114 files changed, 3201 insertions(+), 427 deletions(-) + create mode 100644 include/uapi/linux/btf.h + create mode 100644 include/uapi/linux/tc_ematch/tc_em_ipt.h + create mode 100644 include/uapi/linux/tipc_sockets_diag.h + +diff --git a/include/uapi/linux/atm.h b/include/uapi/linux/atm.h +index 08e27be..e33ff6b 100644 +--- a/include/uapi/linux/atm.h ++++ b/include/uapi/linux/atm.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* atm.h - general ATM declarations */ + + /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ +diff --git a/include/uapi/linux/atmapi.h b/include/uapi/linux/atmapi.h +index 8fe54d9..c9bf5c2 100644 +--- a/include/uapi/linux/atmapi.h ++++ b/include/uapi/linux/atmapi.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* atmapi.h - ATM API user space/kernel compatibility */ + + /* Written 1999,2000 by Werner Almesberger, EPFL ICA */ +diff --git a/include/uapi/linux/atmarp.h b/include/uapi/linux/atmarp.h +index 231f4bd..8e44d12 100644 +--- a/include/uapi/linux/atmarp.h ++++ b/include/uapi/linux/atmarp.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* atmarp.h - ATM ARP protocol and kernel-demon interface definitions */ + + /* Written 1995-1999 by Werner Almesberger, EPFL LRC/ICA */ +diff --git a/include/uapi/linux/atmdev.h b/include/uapi/linux/atmdev.h +index 8faa8b9..9bdb96a 100644 +--- a/include/uapi/linux/atmdev.h ++++ b/include/uapi/linux/atmdev.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* atmdev.h - ATM device driver declarations and various related items */ + + /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ +diff --git a/include/uapi/linux/atmioc.h b/include/uapi/linux/atmioc.h +index 37f67aa..cd7655e 100644 +--- a/include/uapi/linux/atmioc.h ++++ b/include/uapi/linux/atmioc.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* atmioc.h - ranges for ATM-related ioctl numbers */ + + /* Written 1995-1999 by Werner Almesberger, EPFL LRC/ICA */ +diff --git a/include/uapi/linux/atmsap.h b/include/uapi/linux/atmsap.h +index 799b104..fc05248 100644 +--- a/include/uapi/linux/atmsap.h ++++ b/include/uapi/linux/atmsap.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* atmsap.h - ATM Service Access Point addressing definitions */ + + /* Written 1995-1999 by Werner Almesberger, EPFL LRC/ICA */ +diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h +index 0895a52..b9a6367 100644 +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or +@@ -16,7 +17,7 @@ + #define BPF_ALU64 0x07 /* alu mode in double word width */ + + /* ld/ldx fields */ +-#define BPF_DW 0x18 /* double word */ ++#define BPF_DW 0x18 /* double word (64-bit) */ + #define BPF_XADD 0xc0 /* exclusive add */ + + /* alu/jmp fields */ +@@ -92,6 +93,11 @@ enum bpf_cmd { + BPF_PROG_GET_FD_BY_ID, + BPF_MAP_GET_FD_BY_ID, + BPF_OBJ_GET_INFO_BY_FD, ++ BPF_PROG_QUERY, ++ BPF_RAW_TRACEPOINT_OPEN, ++ BPF_BTF_LOAD, ++ BPF_BTF_GET_FD_BY_ID, ++ BPF_TASK_FD_QUERY, + }; + + enum bpf_map_type { +@@ -111,6 +117,9 @@ enum bpf_map_type { + BPF_MAP_TYPE_HASH_OF_MAPS, + BPF_MAP_TYPE_DEVMAP, + BPF_MAP_TYPE_SOCKMAP, ++ BPF_MAP_TYPE_CPUMAP, ++ BPF_MAP_TYPE_XSKMAP, ++ BPF_MAP_TYPE_SOCKHASH, + }; + + enum bpf_prog_type { +@@ -129,6 +138,12 @@ enum bpf_prog_type { + BPF_PROG_TYPE_LWT_XMIT, + BPF_PROG_TYPE_SOCK_OPS, + BPF_PROG_TYPE_SK_SKB, ++ BPF_PROG_TYPE_CGROUP_DEVICE, ++ BPF_PROG_TYPE_SK_MSG, ++ BPF_PROG_TYPE_RAW_TRACEPOINT, ++ BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ++ BPF_PROG_TYPE_LWT_SEG6LOCAL, ++ BPF_PROG_TYPE_LIRC_MODE2, + }; + + enum bpf_attach_type { +@@ -138,16 +153,63 @@ enum bpf_attach_type { + BPF_CGROUP_SOCK_OPS, + BPF_SK_SKB_STREAM_PARSER, + BPF_SK_SKB_STREAM_VERDICT, ++ BPF_CGROUP_DEVICE, ++ BPF_SK_MSG_VERDICT, ++ BPF_CGROUP_INET4_BIND, ++ BPF_CGROUP_INET6_BIND, ++ BPF_CGROUP_INET4_CONNECT, ++ BPF_CGROUP_INET6_CONNECT, ++ BPF_CGROUP_INET4_POST_BIND, ++ BPF_CGROUP_INET6_POST_BIND, ++ BPF_CGROUP_UDP4_SENDMSG, ++ BPF_CGROUP_UDP6_SENDMSG, ++ BPF_LIRC_MODE2, + __MAX_BPF_ATTACH_TYPE + }; + + #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE + +-/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command +- * to the given target_fd cgroup the descendent cgroup will be able to +- * override effective bpf program that was inherited from this cgroup ++/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command ++ * ++ * NONE(default): No further bpf programs allowed in the subtree. ++ * ++ * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, ++ * the program in this cgroup yields to sub-cgroup program. ++ * ++ * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, ++ * that cgroup program gets run in addition to the program in this cgroup. ++ * ++ * Only one program is allowed to be attached to a cgroup with ++ * NONE or BPF_F_ALLOW_OVERRIDE flag. ++ * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will ++ * release old program and attach the new one. Attach flags has to match. ++ * ++ * Multiple programs are allowed to be attached to a cgroup with ++ * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order ++ * (those that were attached first, run first) ++ * The programs of sub-cgroup are executed first, then programs of ++ * this cgroup and then programs of parent cgroup. ++ * When children program makes decision (like picking TCP CA or sock bind) ++ * parent program has a chance to override it. ++ * ++ * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. ++ * A cgroup with NONE doesn't allow any programs in sub-cgroups. ++ * Ex1: ++ * cgrp1 (MULTI progs A, B) -> ++ * cgrp2 (OVERRIDE prog C) -> ++ * cgrp3 (MULTI prog D) -> ++ * cgrp4 (OVERRIDE prog E) -> ++ * cgrp5 (NONE prog F) ++ * the event in cgrp5 triggers execution of F,D,A,B in that order. ++ * if prog F is detached, the execution is E,D,A,B ++ * if prog F and D are detached, the execution is E,A,B ++ * if prog F, E and D are detached, the execution is C,A,B ++ * ++ * All eligible programs are executed regardless of return code from ++ * earlier programs. + */ + #define BPF_F_ALLOW_OVERRIDE (1U << 0) ++#define BPF_F_ALLOW_MULTI (1U << 1) + + /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the + * verifier will perform strict alignment checking as if the kernel +@@ -156,8 +218,14 @@ enum bpf_attach_type { + */ + #define BPF_F_STRICT_ALIGNMENT (1U << 0) + ++/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ + #define BPF_PSEUDO_MAP_FD 1 + ++/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative ++ * offset to another bpf function ++ */ ++#define BPF_PSEUDO_CALL 1 ++ + /* flags for BPF_MAP_UPDATE_ELEM command */ + #define BPF_ANY 0 /* create new element or update existing */ + #define BPF_NOEXIST 1 /* create new element if it didn't exist */ +@@ -175,6 +243,37 @@ enum bpf_attach_type { + /* Specify numa node during map creation */ + #define BPF_F_NUMA_NODE (1U << 2) + ++/* flags for BPF_PROG_QUERY */ ++#define BPF_F_QUERY_EFFECTIVE (1U << 0) ++ ++#define BPF_OBJ_NAME_LEN 16U ++ ++/* Flags for accessing BPF object */ ++#define BPF_F_RDONLY (1U << 3) ++#define BPF_F_WRONLY (1U << 4) ++ ++/* Flag for stack_map, store build_id+offset instead of pointer */ ++#define BPF_F_STACK_BUILD_ID (1U << 5) ++ ++enum bpf_stack_build_id_status { ++ /* user space need an empty entry to identify end of a trace */ ++ BPF_STACK_BUILD_ID_EMPTY = 0, ++ /* with valid build_id and offset */ ++ BPF_STACK_BUILD_ID_VALID = 1, ++ /* couldn't get build_id, fallback to ip */ ++ BPF_STACK_BUILD_ID_IP = 2, ++}; ++ ++#define BPF_BUILD_ID_SIZE 20 ++struct bpf_stack_build_id { ++ __s32 status; ++ unsigned char build_id[BPF_BUILD_ID_SIZE]; ++ union { ++ __u64 offset; ++ __u64 ip; ++ }; ++}; ++ + union bpf_attr { + struct { /* anonymous struct used by BPF_MAP_CREATE command */ + __u32 map_type; /* one of enum bpf_map_type */ +@@ -188,6 +287,11 @@ union bpf_attr { + __u32 numa_node; /* numa node (effective only if + * BPF_F_NUMA_NODE is set). + */ ++ char map_name[BPF_OBJ_NAME_LEN]; ++ __u32 map_ifindex; /* ifindex of netdev to create on */ ++ __u32 btf_fd; /* fd pointing to a BTF type data */ ++ __u32 btf_key_type_id; /* BTF type_id of the key */ ++ __u32 btf_value_type_id; /* BTF type_id of the value */ + }; + + struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ +@@ -210,11 +314,19 @@ union bpf_attr { + __aligned_u64 log_buf; /* user supplied buffer */ + __u32 kern_version; /* checked when prog_type=kprobe */ + __u32 prog_flags; ++ char prog_name[BPF_OBJ_NAME_LEN]; ++ __u32 prog_ifindex; /* ifindex of netdev to prep for */ ++ /* For some prog types expected attach type must be known at ++ * load time to verify attach type specific parts of prog ++ * (context accesses, allowed helpers, etc). ++ */ ++ __u32 expected_attach_type; + }; + + struct { /* anonymous struct used by BPF_OBJ_* commands */ + __aligned_u64 pathname; + __u32 bpf_fd; ++ __u32 file_flags; + }; + + struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ +@@ -240,8 +352,10 @@ union bpf_attr { + __u32 start_id; + __u32 prog_id; + __u32 map_id; ++ __u32 btf_id; + }; + __u32 next_id; ++ __u32 open_flags; + }; + + struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ +@@ -249,339 +363,1718 @@ union bpf_attr { + __u32 info_len; + __aligned_u64 info; + } info; ++ ++ struct { /* anonymous struct used by BPF_PROG_QUERY command */ ++ __u32 target_fd; /* container object to query */ ++ __u32 attach_type; ++ __u32 query_flags; ++ __u32 attach_flags; ++ __aligned_u64 prog_ids; ++ __u32 prog_cnt; ++ } query; ++ ++ struct { ++ __u64 name; ++ __u32 prog_fd; ++ } raw_tracepoint; ++ ++ struct { /* anonymous struct for BPF_BTF_LOAD */ ++ __aligned_u64 btf; ++ __aligned_u64 btf_log_buf; ++ __u32 btf_size; ++ __u32 btf_log_size; ++ __u32 btf_log_level; ++ }; ++ ++ struct { ++ __u32 pid; /* input: pid */ ++ __u32 fd; /* input: fd */ ++ __u32 flags; /* input: flags */ ++ __u32 buf_len; /* input/output: buf len */ ++ __aligned_u64 buf; /* input/output: ++ * tp_name for tracepoint ++ * symbol for kprobe ++ * filename for uprobe ++ */ ++ __u32 prog_id; /* output: prod_id */ ++ __u32 fd_type; /* output: BPF_FD_TYPE_* */ ++ __u64 probe_offset; /* output: probe_offset */ ++ __u64 probe_addr; /* output: probe_addr */ ++ } task_fd_query; + } __attribute__((aligned(8))); + +-/* BPF helper function descriptions: ++/* The description below is an attempt at providing documentation to eBPF ++ * developers about the multiple available eBPF helper functions. It can be ++ * parsed and used to produce a manual page. The workflow is the following, ++ * and requires the rst2man utility: ++ * ++ * $ ./scripts/bpf_helpers_doc.py \ ++ * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst ++ * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7 ++ * $ man /tmp/bpf-helpers.7 ++ * ++ * Note that in order to produce this external documentation, some RST ++ * formatting is used in the descriptions to get "bold" and "italics" in ++ * manual pages. Also note that the few trailing white spaces are ++ * intentional, removing them would break paragraphs for rst2man. ++ * ++ * Start of BPF helper function descriptions: ++ * ++ * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key) ++ * Description ++ * Perform a lookup in *map* for an entry associated to *key*. ++ * Return ++ * Map value associated to *key*, or **NULL** if no entry was ++ * found. + * +- * void *bpf_map_lookup_elem(&map, &key) +- * Return: Map value or NULL ++ * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) ++ * Description ++ * Add or update the value of the entry associated to *key* in ++ * *map* with *value*. *flags* is one of: + * +- * int bpf_map_update_elem(&map, &key, &value, flags) +- * Return: 0 on success or negative error ++ * **BPF_NOEXIST** ++ * The entry for *key* must not exist in the map. ++ * **BPF_EXIST** ++ * The entry for *key* must already exist in the map. ++ * **BPF_ANY** ++ * No condition on the existence of the entry for *key*. + * +- * int bpf_map_delete_elem(&map, &key) +- * Return: 0 on success or negative error ++ * Flag value **BPF_NOEXIST** cannot be used for maps of types ++ * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all ++ * elements always exist), the helper would return an error. ++ * Return ++ * 0 on success, or a negative error in case of failure. + * +- * int bpf_probe_read(void *dst, int size, void *src) +- * Return: 0 on success or negative error ++ * int bpf_map_delete_elem(struct bpf_map *map, const void *key) ++ * Description ++ * Delete entry with *key* from *map*. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_probe_read(void *dst, u32 size, const void *src) ++ * Description ++ * For tracing programs, safely attempt to read *size* bytes from ++ * address *src* and store the data in *dst*. ++ * Return ++ * 0 on success, or a negative error in case of failure. + * + * u64 bpf_ktime_get_ns(void) +- * Return: current ktime +- * +- * int bpf_trace_printk(const char *fmt, int fmt_size, ...) +- * Return: length of buffer written or negative error +- * +- * u32 bpf_prandom_u32(void) +- * Return: random value +- * +- * u32 bpf_raw_smp_processor_id(void) +- * Return: SMP processor ID +- * +- * int bpf_skb_store_bytes(skb, offset, from, len, flags) +- * store bytes into packet +- * @skb: pointer to skb +- * @offset: offset within packet from skb->mac_header +- * @from: pointer where to copy bytes from +- * @len: number of bytes to store into packet +- * @flags: bit 0 - if true, recompute skb->csum +- * other bits - reserved +- * Return: 0 on success or negative error +- * +- * int bpf_l3_csum_replace(skb, offset, from, to, flags) +- * recompute IP checksum +- * @skb: pointer to skb +- * @offset: offset within packet where IP checksum is located +- * @from: old value of header field +- * @to: new value of header field +- * @flags: bits 0-3 - size of header field +- * other bits - reserved +- * Return: 0 on success or negative error +- * +- * int bpf_l4_csum_replace(skb, offset, from, to, flags) +- * recompute TCP/UDP checksum +- * @skb: pointer to skb +- * @offset: offset within packet where TCP/UDP checksum is located +- * @from: old value of header field +- * @to: new value of header field +- * @flags: bits 0-3 - size of header field +- * bit 4 - is pseudo header +- * other bits - reserved +- * Return: 0 on success or negative error +- * +- * int bpf_tail_call(ctx, prog_array_map, index) +- * jump into another BPF program +- * @ctx: context pointer passed to next program +- * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY +- * @index: index inside array that selects specific program to run +- * Return: 0 on success or negative error +- * +- * int bpf_clone_redirect(skb, ifindex, flags) +- * redirect to another netdev +- * @skb: pointer to skb +- * @ifindex: ifindex of the net device +- * @flags: bit 0 - if set, redirect to ingress instead of egress +- * other bits - reserved +- * Return: 0 on success or negative error ++ * Description ++ * Return the time elapsed since system boot, in nanoseconds. ++ * Return ++ * Current *ktime*. ++ * ++ * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...) ++ * Description ++ * This helper is a "printk()-like" facility for debugging. It ++ * prints a message defined by format *fmt* (of size *fmt_size*) ++ * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if ++ * available. It can take up to three additional **u64** ++ * arguments (as an eBPF helpers, the total number of arguments is ++ * limited to five). ++ * ++ * Each time the helper is called, it appends a line to the trace. ++ * The format of the trace is customizable, and the exact output ++ * one will get depends on the options set in ++ * *\/sys/kernel/debug/tracing/trace_options* (see also the ++ * *README* file under the same directory). However, it usually ++ * defaults to something like: ++ * ++ * :: ++ * ++ * telnet-470 [001] .N.. 419421.045894: 0x00000001: ++ * ++ * In the above: ++ * ++ * * ``telnet`` is the name of the current task. ++ * * ``470`` is the PID of the current task. ++ * * ``001`` is the CPU number on which the task is ++ * running. ++ * * In ``.N..``, each character refers to a set of ++ * options (whether irqs are enabled, scheduling ++ * options, whether hard/softirqs are running, level of ++ * preempt_disabled respectively). **N** means that ++ * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED** ++ * are set. ++ * * ``419421.045894`` is a timestamp. ++ * * ``0x00000001`` is a fake value used by BPF for the ++ * instruction pointer register. ++ * * ```` is the message formatted with ++ * *fmt*. ++ * ++ * The conversion specifiers supported by *fmt* are similar, but ++ * more limited than for printk(). They are **%d**, **%i**, ++ * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**, ++ * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size ++ * of field, padding with zeroes, etc.) is available, and the ++ * helper will return **-EINVAL** (but print nothing) if it ++ * encounters an unknown specifier. ++ * ++ * Also, note that **bpf_trace_printk**\ () is slow, and should ++ * only be used for debugging purposes. For this reason, a notice ++ * bloc (spanning several lines) is printed to kernel logs and ++ * states that the helper should not be used "for production use" ++ * the first time this helper is used (or more precisely, when ++ * **trace_printk**\ () buffers are allocated). For passing values ++ * to user space, perf events should be preferred. ++ * Return ++ * The number of bytes written to the buffer, or a negative error ++ * in case of failure. ++ * ++ * u32 bpf_get_prandom_u32(void) ++ * Description ++ * Get a pseudo-random number. ++ * ++ * From a security point of view, this helper uses its own ++ * pseudo-random internal state, and cannot be used to infer the ++ * seed of other random functions in the kernel. However, it is ++ * essential to note that the generator used by the helper is not ++ * cryptographically secure. ++ * Return ++ * A random 32-bit unsigned value. ++ * ++ * u32 bpf_get_smp_processor_id(void) ++ * Description ++ * Get the SMP (symmetric multiprocessing) processor id. Note that ++ * all programs run with preemption disabled, which means that the ++ * SMP processor id is stable during all the execution of the ++ * program. ++ * Return ++ * The SMP id of the processor running the program. ++ * ++ * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) ++ * Description ++ * Store *len* bytes from address *from* into the packet ++ * associated to *skb*, at *offset*. *flags* are a combination of ++ * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the ++ * checksum for the packet after storing the bytes) and ++ * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ ++ * **->swhash** and *skb*\ **->l4hash** to 0). ++ * ++ * A call to this helper is susceptible to change the underlaying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) ++ * Description ++ * Recompute the layer 3 (e.g. IP) checksum for the packet ++ * associated to *skb*. Computation is incremental, so the helper ++ * must know the former value of the header field that was ++ * modified (*from*), the new value of this field (*to*), and the ++ * number of bytes (2 or 4) for this field, stored in *size*. ++ * Alternatively, it is possible to store the difference between ++ * the previous and the new values of the header field in *to*, by ++ * setting *from* and *size* to 0. For both methods, *offset* ++ * indicates the location of the IP checksum within the packet. ++ * ++ * This helper works in combination with **bpf_csum_diff**\ (), ++ * which does not update the checksum in-place, but offers more ++ * flexibility and can handle sizes larger than 2 or 4 for the ++ * checksum to update. ++ * ++ * A call to this helper is susceptible to change the underlaying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) ++ * Description ++ * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the ++ * packet associated to *skb*. Computation is incremental, so the ++ * helper must know the former value of the header field that was ++ * modified (*from*), the new value of this field (*to*), and the ++ * number of bytes (2 or 4) for this field, stored on the lowest ++ * four bits of *flags*. Alternatively, it is possible to store ++ * the difference between the previous and the new values of the ++ * header field in *to*, by setting *from* and the four lowest ++ * bits of *flags* to 0. For both methods, *offset* indicates the ++ * location of the IP checksum within the packet. In addition to ++ * the size of the field, *flags* can be added (bitwise OR) actual ++ * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left ++ * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and ++ * for updates resulting in a null checksum the value is set to ++ * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates ++ * the checksum is to be computed against a pseudo-header. ++ * ++ * This helper works in combination with **bpf_csum_diff**\ (), ++ * which does not update the checksum in-place, but offers more ++ * flexibility and can handle sizes larger than 2 or 4 for the ++ * checksum to update. ++ * ++ * A call to this helper is susceptible to change the underlaying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) ++ * Description ++ * This special helper is used to trigger a "tail call", or in ++ * other words, to jump into another eBPF program. The same stack ++ * frame is used (but values on stack and in registers for the ++ * caller are not accessible to the callee). This mechanism allows ++ * for program chaining, either for raising the maximum number of ++ * available eBPF instructions, or to execute given programs in ++ * conditional blocks. For security reasons, there is an upper ++ * limit to the number of successive tail calls that can be ++ * performed. ++ * ++ * Upon call of this helper, the program attempts to jump into a ++ * program referenced at index *index* in *prog_array_map*, a ++ * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes ++ * *ctx*, a pointer to the context. ++ * ++ * If the call succeeds, the kernel immediately runs the first ++ * instruction of the new program. This is not a function call, ++ * and it never returns to the previous program. If the call ++ * fails, then the helper has no effect, and the caller continues ++ * to run its subsequent instructions. A call can fail if the ++ * destination program for the jump does not exist (i.e. *index* ++ * is superior to the number of entries in *prog_array_map*), or ++ * if the maximum number of tail calls has been reached for this ++ * chain of programs. This limit is defined in the kernel by the ++ * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), ++ * which is currently set to 32. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) ++ * Description ++ * Clone and redirect the packet associated to *skb* to another ++ * net device of index *ifindex*. Both ingress and egress ++ * interfaces can be used for redirection. The **BPF_F_INGRESS** ++ * value in *flags* is used to make the distinction (ingress path ++ * is selected if the flag is present, egress path otherwise). ++ * This is the only flag supported for now. ++ * ++ * In comparison with **bpf_redirect**\ () helper, ++ * **bpf_clone_redirect**\ () has the associated cost of ++ * duplicating the packet buffer, but this can be executed out of ++ * the eBPF program. Conversely, **bpf_redirect**\ () is more ++ * efficient, but it is handled through an action code where the ++ * redirection happens only after the eBPF program has returned. ++ * ++ * A call to this helper is susceptible to change the underlaying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. + * + * u64 bpf_get_current_pid_tgid(void) +- * Return: current->tgid << 32 | current->pid ++ * Return ++ * A 64-bit integer containing the current tgid and pid, and ++ * created as such: ++ * *current_task*\ **->tgid << 32 \|** ++ * *current_task*\ **->pid**. + * + * u64 bpf_get_current_uid_gid(void) +- * Return: current_gid << 32 | current_uid +- * +- * int bpf_get_current_comm(char *buf, int size_of_buf) +- * stores current->comm into buf +- * Return: 0 on success or negative error +- * +- * u32 bpf_get_cgroup_classid(skb) +- * retrieve a proc's classid +- * @skb: pointer to skb +- * Return: classid if != 0 +- * +- * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) +- * Return: 0 on success or negative error +- * +- * int bpf_skb_vlan_pop(skb) +- * Return: 0 on success or negative error +- * +- * int bpf_skb_get_tunnel_key(skb, key, size, flags) +- * int bpf_skb_set_tunnel_key(skb, key, size, flags) +- * retrieve or populate tunnel metadata +- * @skb: pointer to skb +- * @key: pointer to 'struct bpf_tunnel_key' +- * @size: size of 'struct bpf_tunnel_key' +- * @flags: room for future extensions +- * Return: 0 on success or negative error +- * +- * u64 bpf_perf_event_read(map, flags) +- * read perf event counter value +- * @map: pointer to perf_event_array map +- * @flags: index of event in the map or bitmask flags +- * Return: value of perf event counter read or error code +- * +- * int bpf_redirect(ifindex, flags) +- * redirect to another netdev +- * @ifindex: ifindex of the net device +- * @flags: +- * cls_bpf: +- * bit 0 - if set, redirect to ingress instead of egress +- * other bits - reserved +- * xdp_bpf: +- * all bits - reserved +- * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error +- * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error +- * int bpf_redirect_map(map, key, flags) +- * redirect to endpoint in map +- * @map: pointer to dev map +- * @key: index in map to lookup +- * @flags: -- +- * Return: XDP_REDIRECT on success or XDP_ABORT on error +- * +- * u32 bpf_get_route_realm(skb) +- * retrieve a dst's tclassid +- * @skb: pointer to skb +- * Return: realm if != 0 +- * +- * int bpf_perf_event_output(ctx, map, flags, data, size) +- * output perf raw sample +- * @ctx: struct pt_regs* +- * @map: pointer to perf_event_array map +- * @flags: index of event in the map or bitmask flags +- * @data: data on stack to be output as raw data +- * @size: size of data +- * Return: 0 on success or negative error +- * +- * int bpf_get_stackid(ctx, map, flags) +- * walk user or kernel stack and return id +- * @ctx: struct pt_regs* +- * @map: pointer to stack_trace map +- * @flags: bits 0-7 - numer of stack frames to skip +- * bit 8 - collect user stack instead of kernel +- * bit 9 - compare stacks by hash only +- * bit 10 - if two different stacks hash into the same stackid +- * discard old +- * other bits - reserved +- * Return: >= 0 stackid on success or negative error +- * +- * s64 bpf_csum_diff(from, from_size, to, to_size, seed) +- * calculate csum diff +- * @from: raw from buffer +- * @from_size: length of from buffer +- * @to: raw to buffer +- * @to_size: length of to buffer +- * @seed: optional seed +- * Return: csum result or negative error code +- * +- * int bpf_skb_get_tunnel_opt(skb, opt, size) +- * retrieve tunnel options metadata +- * @skb: pointer to skb +- * @opt: pointer to raw tunnel option data +- * @size: size of @opt +- * Return: option size +- * +- * int bpf_skb_set_tunnel_opt(skb, opt, size) +- * populate tunnel options metadata +- * @skb: pointer to skb +- * @opt: pointer to raw tunnel option data +- * @size: size of @opt +- * Return: 0 on success or negative error +- * +- * int bpf_skb_change_proto(skb, proto, flags) +- * Change protocol of the skb. Currently supported is v4 -> v6, +- * v6 -> v4 transitions. The helper will also resize the skb. eBPF +- * program is expected to fill the new headers via skb_store_bytes +- * and lX_csum_replace. +- * @skb: pointer to skb +- * @proto: new skb->protocol type +- * @flags: reserved +- * Return: 0 on success or negative error +- * +- * int bpf_skb_change_type(skb, type) +- * Change packet type of skb. +- * @skb: pointer to skb +- * @type: new skb->pkt_type type +- * Return: 0 on success or negative error +- * +- * int bpf_skb_under_cgroup(skb, map, index) +- * Check cgroup2 membership of skb +- * @skb: pointer to skb +- * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type +- * @index: index of the cgroup in the bpf_map +- * Return: +- * == 0 skb failed the cgroup2 descendant test +- * == 1 skb succeeded the cgroup2 descendant test +- * < 0 error +- * +- * u32 bpf_get_hash_recalc(skb) +- * Retrieve and possibly recalculate skb->hash. +- * @skb: pointer to skb +- * Return: hash ++ * Return ++ * A 64-bit integer containing the current GID and UID, and ++ * created as such: *current_gid* **<< 32 \|** *current_uid*. ++ * ++ * int bpf_get_current_comm(char *buf, u32 size_of_buf) ++ * Description ++ * Copy the **comm** attribute of the current task into *buf* of ++ * *size_of_buf*. The **comm** attribute contains the name of ++ * the executable (excluding the path) for the current task. The ++ * *size_of_buf* must be strictly positive. On success, the ++ * helper makes sure that the *buf* is NUL-terminated. On failure, ++ * it is filled with zeroes. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * u32 bpf_get_cgroup_classid(struct sk_buff *skb) ++ * Description ++ * Retrieve the classid for the current task, i.e. for the net_cls ++ * cgroup to which *skb* belongs. ++ * ++ * This helper can be used on TC egress path, but not on ingress. ++ * ++ * The net_cls cgroup provides an interface to tag network packets ++ * based on a user-provided identifier for all traffic coming from ++ * the tasks belonging to the related cgroup. See also the related ++ * kernel documentation, available from the Linux sources in file ++ * *Documentation/cgroup-v1/net_cls.txt*. ++ * ++ * The Linux kernel has two versions for cgroups: there are ++ * cgroups v1 and cgroups v2. Both are available to users, who can ++ * use a mixture of them, but note that the net_cls cgroup is for ++ * cgroup v1 only. This makes it incompatible with BPF programs ++ * run on cgroups, which is a cgroup-v2-only feature (a socket can ++ * only hold data for one version of cgroups at a time). ++ * ++ * This helper is only available is the kernel was compiled with ++ * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to ++ * "**y**" or to "**m**". ++ * Return ++ * The classid, or 0 for the default unconfigured classid. ++ * ++ * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) ++ * Description ++ * Push a *vlan_tci* (VLAN tag control information) of protocol ++ * *vlan_proto* to the packet associated to *skb*, then update ++ * the checksum. Note that if *vlan_proto* is different from ++ * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to ++ * be **ETH_P_8021Q**. ++ * ++ * A call to this helper is susceptible to change the underlaying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_vlan_pop(struct sk_buff *skb) ++ * Description ++ * Pop a VLAN header from the packet associated to *skb*. ++ * ++ * A call to this helper is susceptible to change the underlaying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) ++ * Description ++ * Get tunnel metadata. This helper takes a pointer *key* to an ++ * empty **struct bpf_tunnel_key** of **size**, that will be ++ * filled with tunnel metadata for the packet associated to *skb*. ++ * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which ++ * indicates that the tunnel is based on IPv6 protocol instead of ++ * IPv4. ++ * ++ * The **struct bpf_tunnel_key** is an object that generalizes the ++ * principal parameters used by various tunneling protocols into a ++ * single struct. This way, it can be used to easily make a ++ * decision based on the contents of the encapsulation header, ++ * "summarized" in this struct. In particular, it holds the IP ++ * address of the remote end (IPv4 or IPv6, depending on the case) ++ * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also, ++ * this struct exposes the *key*\ **->tunnel_id**, which is ++ * generally mapped to a VNI (Virtual Network Identifier), making ++ * it programmable together with the **bpf_skb_set_tunnel_key**\ ++ * () helper. ++ * ++ * Let's imagine that the following code is part of a program ++ * attached to the TC ingress interface, on one end of a GRE ++ * tunnel, and is supposed to filter out all messages coming from ++ * remote ends with IPv4 address other than 10.0.0.1: ++ * ++ * :: ++ * ++ * int ret; ++ * struct bpf_tunnel_key key = {}; ++ * ++ * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); ++ * if (ret < 0) ++ * return TC_ACT_SHOT; // drop packet ++ * ++ * if (key.remote_ipv4 != 0x0a000001) ++ * return TC_ACT_SHOT; // drop packet ++ * ++ * return TC_ACT_OK; // accept packet ++ * ++ * This interface can also be used with all encapsulation devices ++ * that can operate in "collect metadata" mode: instead of having ++ * one network device per specific configuration, the "collect ++ * metadata" mode only requires a single device where the ++ * configuration can be extracted from this helper. ++ * ++ * This can be used together with various tunnels such as VXLan, ++ * Geneve, GRE or IP in IP (IPIP). ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) ++ * Description ++ * Populate tunnel metadata for packet associated to *skb.* The ++ * tunnel metadata is set to the contents of *key*, of *size*. The ++ * *flags* can be set to a combination of the following values: ++ * ++ * **BPF_F_TUNINFO_IPV6** ++ * Indicate that the tunnel is based on IPv6 protocol ++ * instead of IPv4. ++ * **BPF_F_ZERO_CSUM_TX** ++ * For IPv4 packets, add a flag to tunnel metadata ++ * indicating that checksum computation should be skipped ++ * and checksum set to zeroes. ++ * **BPF_F_DONT_FRAGMENT** ++ * Add a flag to tunnel metadata indicating that the ++ * packet should not be fragmented. ++ * **BPF_F_SEQ_NUMBER** ++ * Add a flag to tunnel metadata indicating that a ++ * sequence number should be added to tunnel header before ++ * sending the packet. This flag was added for GRE ++ * encapsulation, but might be used with other protocols ++ * as well in the future. ++ * ++ * Here is a typical usage on the transmit path: ++ * ++ * :: ++ * ++ * struct bpf_tunnel_key key; ++ * populate key ... ++ * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); ++ * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0); ++ * ++ * See also the description of the **bpf_skb_get_tunnel_key**\ () ++ * helper for additional information. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags) ++ * Description ++ * Read the value of a perf event counter. This helper relies on a ++ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of ++ * the perf event counter is selected when *map* is updated with ++ * perf event file descriptors. The *map* is an array whose size ++ * is the number of available CPUs, and each cell contains a value ++ * relative to one CPU. The value to retrieve is indicated by ++ * *flags*, that contains the index of the CPU to look up, masked ++ * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to ++ * **BPF_F_CURRENT_CPU** to indicate that the value for the ++ * current CPU should be retrieved. ++ * ++ * Note that before Linux 4.13, only hardware perf event can be ++ * retrieved. ++ * ++ * Also, be aware that the newer helper ++ * **bpf_perf_event_read_value**\ () is recommended over ++ * **bpf_perf_event_read**\ () in general. The latter has some ABI ++ * quirks where error and counter value are used as a return code ++ * (which is wrong to do since ranges may overlap). This issue is ++ * fixed with **bpf_perf_event_read_value**\ (), which at the same ++ * time provides more features over the **bpf_perf_event_read**\ ++ * () interface. Please refer to the description of ++ * **bpf_perf_event_read_value**\ () for details. ++ * Return ++ * The value of the perf event counter read from the map, or a ++ * negative error code in case of failure. ++ * ++ * int bpf_redirect(u32 ifindex, u64 flags) ++ * Description ++ * Redirect the packet to another net device of index *ifindex*. ++ * This helper is somewhat similar to **bpf_clone_redirect**\ ++ * (), except that the packet is not cloned, which provides ++ * increased performance. ++ * ++ * Except for XDP, both ingress and egress interfaces can be used ++ * for redirection. The **BPF_F_INGRESS** value in *flags* is used ++ * to make the distinction (ingress path is selected if the flag ++ * is present, egress path otherwise). Currently, XDP only ++ * supports redirection to the egress interface, and accepts no ++ * flag at all. ++ * ++ * The same effect can be attained with the more generic ++ * **bpf_redirect_map**\ (), which requires specific maps to be ++ * used but offers better performance. ++ * Return ++ * For XDP, the helper returns **XDP_REDIRECT** on success or ++ * **XDP_ABORTED** on error. For other program types, the values ++ * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on ++ * error. ++ * ++ * u32 bpf_get_route_realm(struct sk_buff *skb) ++ * Description ++ * Retrieve the realm or the route, that is to say the ++ * **tclassid** field of the destination for the *skb*. The ++ * indentifier retrieved is a user-provided tag, similar to the ++ * one used with the net_cls cgroup (see description for ++ * **bpf_get_cgroup_classid**\ () helper), but here this tag is ++ * held by a route (a destination entry), not by a task. ++ * ++ * Retrieving this identifier works with the clsact TC egress hook ++ * (see also **tc-bpf(8)**), or alternatively on conventional ++ * classful egress qdiscs, but not on TC ingress path. In case of ++ * clsact TC egress hook, this has the advantage that, internally, ++ * the destination entry has not been dropped yet in the transmit ++ * path. Therefore, the destination entry does not need to be ++ * artificially held via **netif_keep_dst**\ () for a classful ++ * qdisc until the *skb* is freed. ++ * ++ * This helper is available only if the kernel was compiled with ++ * **CONFIG_IP_ROUTE_CLASSID** configuration option. ++ * Return ++ * The realm of the route for the packet associated to *skb*, or 0 ++ * if none was found. ++ * ++ * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) ++ * Description ++ * Write raw *data* blob into a special BPF perf event held by ++ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf ++ * event must have the following attributes: **PERF_SAMPLE_RAW** ++ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and ++ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. ++ * ++ * The *flags* are used to indicate the index in *map* for which ++ * the value must be put, masked with **BPF_F_INDEX_MASK**. ++ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** ++ * to indicate that the index of the current CPU core should be ++ * used. ++ * ++ * The value to write, of *size*, is passed through eBPF stack and ++ * pointed by *data*. ++ * ++ * The context of the program *ctx* needs also be passed to the ++ * helper. ++ * ++ * On user space, a program willing to read the values needs to ++ * call **perf_event_open**\ () on the perf event (either for ++ * one or for all CPUs) and to store the file descriptor into the ++ * *map*. This must be done before the eBPF program can send data ++ * into it. An example is available in file ++ * *samples/bpf/trace_output_user.c* in the Linux kernel source ++ * tree (the eBPF program counterpart is in ++ * *samples/bpf/trace_output_kern.c*). ++ * ++ * **bpf_perf_event_output**\ () achieves better performance ++ * than **bpf_trace_printk**\ () for sharing data with user ++ * space, and is much better suitable for streaming data from eBPF ++ * programs. ++ * ++ * Note that this helper is not restricted to tracing use cases ++ * and can be used with programs attached to TC or XDP as well, ++ * where it allows for passing data to user space listeners. Data ++ * can be: ++ * ++ * * Only custom structs, ++ * * Only the packet payload, or ++ * * A combination of both. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len) ++ * Description ++ * This helper was provided as an easy way to load data from a ++ * packet. It can be used to load *len* bytes from *offset* from ++ * the packet associated to *skb*, into the buffer pointed by ++ * *to*. ++ * ++ * Since Linux 4.7, usage of this helper has mostly been replaced ++ * by "direct packet access", enabling packet data to be ++ * manipulated with *skb*\ **->data** and *skb*\ **->data_end** ++ * pointing respectively to the first byte of packet data and to ++ * the byte after the last byte of packet data. However, it ++ * remains useful if one wishes to read large quantities of data ++ * at once from a packet into the eBPF stack. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags) ++ * Description ++ * Walk a user or a kernel stack and return its id. To achieve ++ * this, the helper needs *ctx*, which is a pointer to the context ++ * on which the tracing program is executed, and a pointer to a ++ * *map* of type **BPF_MAP_TYPE_STACK_TRACE**. ++ * ++ * The last argument, *flags*, holds the number of stack frames to ++ * skip (from 0 to 255), masked with ++ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set ++ * a combination of the following flags: ++ * ++ * **BPF_F_USER_STACK** ++ * Collect a user space stack instead of a kernel stack. ++ * **BPF_F_FAST_STACK_CMP** ++ * Compare stacks by hash only. ++ * **BPF_F_REUSE_STACKID** ++ * If two different stacks hash into the same *stackid*, ++ * discard the old one. ++ * ++ * The stack id retrieved is a 32 bit long integer handle which ++ * can be further combined with other data (including other stack ++ * ids) and used as a key into maps. This can be useful for ++ * generating a variety of graphs (such as flame graphs or off-cpu ++ * graphs). ++ * ++ * For walking a stack, this helper is an improvement over ++ * **bpf_probe_read**\ (), which can be used with unrolled loops ++ * but is not efficient and consumes a lot of eBPF instructions. ++ * Instead, **bpf_get_stackid**\ () can collect up to ++ * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that ++ * this limit can be controlled with the **sysctl** program, and ++ * that it should be manually increased in order to profile long ++ * user stacks (such as stacks for Java programs). To do so, use: ++ * ++ * :: ++ * ++ * # sysctl kernel.perf_event_max_stack= ++ * Return ++ * The positive or null stack id on success, or a negative error ++ * in case of failure. ++ * ++ * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed) ++ * Description ++ * Compute a checksum difference, from the raw buffer pointed by ++ * *from*, of length *from_size* (that must be a multiple of 4), ++ * towards the raw buffer pointed by *to*, of size *to_size* ++ * (same remark). An optional *seed* can be added to the value ++ * (this can be cascaded, the seed may come from a previous call ++ * to the helper). ++ * ++ * This is flexible enough to be used in several ways: ++ * ++ * * With *from_size* == 0, *to_size* > 0 and *seed* set to ++ * checksum, it can be used when pushing new data. ++ * * With *from_size* > 0, *to_size* == 0 and *seed* set to ++ * checksum, it can be used when removing data from a packet. ++ * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it ++ * can be used to compute a diff. Note that *from_size* and ++ * *to_size* do not need to be equal. ++ * ++ * This helper can be used in combination with ++ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to ++ * which one can feed in the difference computed with ++ * **bpf_csum_diff**\ (). ++ * Return ++ * The checksum result, or a negative error code in case of ++ * failure. ++ * ++ * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size) ++ * Description ++ * Retrieve tunnel options metadata for the packet associated to ++ * *skb*, and store the raw tunnel option data to the buffer *opt* ++ * of *size*. ++ * ++ * This helper can be used with encapsulation devices that can ++ * operate in "collect metadata" mode (please refer to the related ++ * note in the description of **bpf_skb_get_tunnel_key**\ () for ++ * more details). A particular example where this can be used is ++ * in combination with the Geneve encapsulation protocol, where it ++ * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper) ++ * and retrieving arbitrary TLVs (Type-Length-Value headers) from ++ * the eBPF program. This allows for full customization of these ++ * headers. ++ * Return ++ * The size of the option data retrieved. ++ * ++ * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size) ++ * Description ++ * Set tunnel options metadata for the packet associated to *skb* ++ * to the option data contained in the raw buffer *opt* of *size*. ++ * ++ * See also the description of the **bpf_skb_get_tunnel_opt**\ () ++ * helper for additional information. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) ++ * Description ++ * Change the protocol of the *skb* to *proto*. Currently ++ * supported are transition from IPv4 to IPv6, and from IPv6 to ++ * IPv4. The helper takes care of the groundwork for the ++ * transition, including resizing the socket buffer. The eBPF ++ * program is expected to fill the new headers, if any, via ++ * **skb_store_bytes**\ () and to recompute the checksums with ++ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ ++ * (). The main case for this helper is to perform NAT64 ++ * operations out of an eBPF program. ++ * ++ * Internally, the GSO type is marked as dodgy so that headers are ++ * checked and segments are recalculated by the GSO/GRO engine. ++ * The size for GSO target is adapted as well. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * A call to this helper is susceptible to change the underlaying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_change_type(struct sk_buff *skb, u32 type) ++ * Description ++ * Change the packet type for the packet associated to *skb*. This ++ * comes down to setting *skb*\ **->pkt_type** to *type*, except ++ * the eBPF program does not have a write access to *skb*\ ++ * **->pkt_type** beside this helper. Using a helper here allows ++ * for graceful handling of errors. ++ * ++ * The major use case is to change incoming *skb*s to ++ * **PACKET_HOST** in a programmatic way instead of having to ++ * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for ++ * example. ++ * ++ * Note that *type* only allows certain values. At this time, they ++ * are: ++ * ++ * **PACKET_HOST** ++ * Packet is for us. ++ * **PACKET_BROADCAST** ++ * Send packet to all. ++ * **PACKET_MULTICAST** ++ * Send packet to group. ++ * **PACKET_OTHERHOST** ++ * Send packet to someone else. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) ++ * Description ++ * Check whether *skb* is a descendant of the cgroup2 held by ++ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. ++ * Return ++ * The return value depends on the result of the test, and can be: ++ * ++ * * 0, if the *skb* failed the cgroup2 descendant test. ++ * * 1, if the *skb* succeeded the cgroup2 descendant test. ++ * * A negative error code, if an error occurred. ++ * ++ * u32 bpf_get_hash_recalc(struct sk_buff *skb) ++ * Description ++ * Retrieve the hash of the packet, *skb*\ **->hash**. If it is ++ * not set, in particular if the hash was cleared due to mangling, ++ * recompute this hash. Later accesses to the hash can be done ++ * directly with *skb*\ **->hash**. ++ * ++ * Calling **bpf_set_hash_invalid**\ (), changing a packet ++ * prototype with **bpf_skb_change_proto**\ (), or calling ++ * **bpf_skb_store_bytes**\ () with the ++ * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear ++ * the hash and to trigger a new computation for the next call to ++ * **bpf_get_hash_recalc**\ (). ++ * Return ++ * The 32-bit hash. + * + * u64 bpf_get_current_task(void) +- * Returns current task_struct +- * Return: current +- * +- * int bpf_probe_write_user(void *dst, void *src, int len) +- * safely attempt to write to a location +- * @dst: destination address in userspace +- * @src: source address on stack +- * @len: number of bytes to copy +- * Return: 0 on success or negative error +- * +- * int bpf_current_task_under_cgroup(map, index) +- * Check cgroup2 membership of current task +- * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type +- * @index: index of the cgroup in the bpf_map +- * Return: +- * == 0 current failed the cgroup2 descendant test +- * == 1 current succeeded the cgroup2 descendant test +- * < 0 error +- * +- * int bpf_skb_change_tail(skb, len, flags) +- * The helper will resize the skb to the given new size, to be used f.e. +- * with control messages. +- * @skb: pointer to skb +- * @len: new skb length +- * @flags: reserved +- * Return: 0 on success or negative error +- * +- * int bpf_skb_pull_data(skb, len) +- * The helper will pull in non-linear data in case the skb is non-linear +- * and not all of len are part of the linear section. Only needed for +- * read/write with direct packet access. +- * @skb: pointer to skb +- * @len: len to make read/writeable +- * Return: 0 on success or negative error +- * +- * s64 bpf_csum_update(skb, csum) +- * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. +- * @skb: pointer to skb +- * @csum: csum to add +- * Return: csum on success or negative error +- * +- * void bpf_set_hash_invalid(skb) +- * Invalidate current skb->hash. +- * @skb: pointer to skb +- * +- * int bpf_get_numa_node_id() +- * Return: Id of current NUMA node. +- * +- * int bpf_skb_change_head() +- * Grows headroom of skb and adjusts MAC header offset accordingly. +- * Will extends/reallocae as required automatically. +- * May change skb data pointer and will thus invalidate any check +- * performed for direct packet access. +- * @skb: pointer to skb +- * @len: length of header to be pushed in front +- * @flags: Flags (unused for now) +- * Return: 0 on success or negative error +- * +- * int bpf_xdp_adjust_head(xdp_md, delta) +- * Adjust the xdp_md.data by delta +- * @xdp_md: pointer to xdp_md +- * @delta: An positive/negative integer to be added to xdp_md.data +- * Return: 0 on success or negative on error ++ * Return ++ * A pointer to the current task struct. ++ * ++ * int bpf_probe_write_user(void *dst, const void *src, u32 len) ++ * Description ++ * Attempt in a safe way to write *len* bytes from the buffer ++ * *src* to *dst* in memory. It only works for threads that are in ++ * user context, and *dst* must be a valid user space address. ++ * ++ * This helper should not be used to implement any kind of ++ * security mechanism because of TOC-TOU attacks, but rather to ++ * debug, divert, and manipulate execution of semi-cooperative ++ * processes. ++ * ++ * Keep in mind that this feature is meant for experiments, and it ++ * has a risk of crashing the system and running programs. ++ * Therefore, when an eBPF program using this helper is attached, ++ * a warning including PID and process name is printed to kernel ++ * logs. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) ++ * Description ++ * Check whether the probe is being run is the context of a given ++ * subset of the cgroup2 hierarchy. The cgroup2 to test is held by ++ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. ++ * Return ++ * The return value depends on the result of the test, and can be: ++ * ++ * * 0, if the *skb* task belongs to the cgroup2. ++ * * 1, if the *skb* task does not belong to the cgroup2. ++ * * A negative error code, if an error occurred. ++ * ++ * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) ++ * Description ++ * Resize (trim or grow) the packet associated to *skb* to the ++ * new *len*. The *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * The basic idea is that the helper performs the needed work to ++ * change the size of the packet, then the eBPF program rewrites ++ * the rest via helpers like **bpf_skb_store_bytes**\ (), ++ * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ () ++ * and others. This helper is a slow path utility intended for ++ * replies with control messages. And because it is targeted for ++ * slow path, the helper itself can afford to be slow: it ++ * implicitly linearizes, unclones and drops offloads from the ++ * *skb*. ++ * ++ * A call to this helper is susceptible to change the underlaying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_pull_data(struct sk_buff *skb, u32 len) ++ * Description ++ * Pull in non-linear data in case the *skb* is non-linear and not ++ * all of *len* are part of the linear section. Make *len* bytes ++ * from *skb* readable and writable. If a zero value is passed for ++ * *len*, then the whole length of the *skb* is pulled. ++ * ++ * This helper is only needed for reading and writing with direct ++ * packet access. ++ * ++ * For direct packet access, testing that offsets to access ++ * are within packet boundaries (test on *skb*\ **->data_end**) is ++ * susceptible to fail if offsets are invalid, or if the requested ++ * data is in non-linear parts of the *skb*. On failure the ++ * program can just bail out, or in the case of a non-linear ++ * buffer, use a helper to make the data available. The ++ * **bpf_skb_load_bytes**\ () helper is a first solution to access ++ * the data. Another one consists in using **bpf_skb_pull_data** ++ * to pull in once the non-linear parts, then retesting and ++ * eventually access the data. ++ * ++ * At the same time, this also makes sure the *skb* is uncloned, ++ * which is a necessary condition for direct write. As this needs ++ * to be an invariant for the write part only, the verifier ++ * detects writes and adds a prologue that is calling ++ * **bpf_skb_pull_data()** to effectively unclone the *skb* from ++ * the very beginning in case it is indeed cloned. ++ * ++ * A call to this helper is susceptible to change the underlaying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum) ++ * Description ++ * Add the checksum *csum* into *skb*\ **->csum** in case the ++ * driver has supplied a checksum for the entire packet into that ++ * field. Return an error otherwise. This helper is intended to be ++ * used in combination with **bpf_csum_diff**\ (), in particular ++ * when the checksum needs to be updated after data has been ++ * written into the packet through direct packet access. ++ * Return ++ * The checksum on success, or a negative error code in case of ++ * failure. ++ * ++ * void bpf_set_hash_invalid(struct sk_buff *skb) ++ * Description ++ * Invalidate the current *skb*\ **->hash**. It can be used after ++ * mangling on headers through direct packet access, in order to ++ * indicate that the hash is outdated and to trigger a ++ * recalculation the next time the kernel tries to access this ++ * hash or when the **bpf_get_hash_recalc**\ () helper is called. ++ * ++ * int bpf_get_numa_node_id(void) ++ * Description ++ * Return the id of the current NUMA node. The primary use case ++ * for this helper is the selection of sockets for the local NUMA ++ * node, when the program is attached to sockets using the ++ * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**), ++ * but the helper is also available to other eBPF program types, ++ * similarly to **bpf_get_smp_processor_id**\ (). ++ * Return ++ * The id of current NUMA node. ++ * ++ * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) ++ * Description ++ * Grows headroom of packet associated to *skb* and adjusts the ++ * offset of the MAC header accordingly, adding *len* bytes of ++ * space. It automatically extends and reallocates memory as ++ * required. ++ * ++ * This helper can be used on a layer 3 *skb* to push a MAC header ++ * for redirection into a layer 2 device. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * A call to this helper is susceptible to change the underlaying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) ++ * Description ++ * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that ++ * it is possible to use a negative value for *delta*. This helper ++ * can be used to prepare the packet for pushing or popping ++ * headers. ++ * ++ * A call to this helper is susceptible to change the underlaying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. + * + * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr) +- * Copy a NUL terminated string from unsafe address. In case the string +- * length is smaller than size, the target is not padded with further NUL +- * bytes. In case the string length is larger than size, just count-1 +- * bytes are copied and the last byte is set to NUL. +- * @dst: destination address +- * @size: maximum number of bytes to copy, including the trailing NUL +- * @unsafe_ptr: unsafe address +- * Return: +- * > 0 length of the string including the trailing NUL on success +- * < 0 error +- * +- * u64 bpf_get_socket_cookie(skb) +- * Get the cookie for the socket stored inside sk_buff. +- * @skb: pointer to skb +- * Return: 8 Bytes non-decreasing number on success or 0 if the socket +- * field is missing inside sk_buff +- * +- * u32 bpf_get_socket_uid(skb) +- * Get the owner uid of the socket stored inside sk_buff. +- * @skb: pointer to skb +- * Return: uid of the socket owner on success or overflowuid if failed. +- * +- * u32 bpf_set_hash(skb, hash) +- * Set full skb->hash. +- * @skb: pointer to skb +- * @hash: hash to set +- * +- * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen) +- * Calls setsockopt. Not all opts are available, only those with +- * integer optvals plus TCP_CONGESTION. +- * Supported levels: SOL_SOCKET and IPROTO_TCP +- * @bpf_socket: pointer to bpf_socket +- * @level: SOL_SOCKET or IPROTO_TCP +- * @optname: option name +- * @optval: pointer to option value +- * @optlen: length of optval in byes +- * Return: 0 or negative error +- * +- * int bpf_skb_adjust_room(skb, len_diff, mode, flags) +- * Grow or shrink room in sk_buff. +- * @skb: pointer to skb +- * @len_diff: (signed) amount of room to grow/shrink +- * @mode: operation mode (enum bpf_adj_room_mode) +- * @flags: reserved for future use +- * Return: 0 on success or negative error code +- * +- * int bpf_sk_redirect_map(map, key, flags) +- * Redirect skb to a sock in map using key as a lookup key for the +- * sock in map. +- * @map: pointer to sockmap +- * @key: key to lookup sock in map +- * @flags: reserved for future use +- * Return: SK_REDIRECT +- * +- * int bpf_sock_map_update(skops, map, key, flags) +- * @skops: pointer to bpf_sock_ops +- * @map: pointer to sockmap to update +- * @key: key to insert/update sock in map +- * @flags: same flags as map update elem ++ * Description ++ * Copy a NUL terminated string from an unsafe address ++ * *unsafe_ptr* to *dst*. The *size* should include the ++ * terminating NUL byte. In case the string length is smaller than ++ * *size*, the target is not padded with further NUL bytes. If the ++ * string length is larger than *size*, just *size*-1 bytes are ++ * copied and the last byte is set to NUL. ++ * ++ * On success, the length of the copied string is returned. This ++ * makes this helper useful in tracing programs for reading ++ * strings, and more importantly to get its length at runtime. See ++ * the following snippet: ++ * ++ * :: ++ * ++ * SEC("kprobe/sys_open") ++ * void bpf_sys_open(struct pt_regs *ctx) ++ * { ++ * char buf[PATHLEN]; // PATHLEN is defined to 256 ++ * int res = bpf_probe_read_str(buf, sizeof(buf), ++ * ctx->di); ++ * ++ * // Consume buf, for example push it to ++ * // userspace via bpf_perf_event_output(); we ++ * // can use res (the string length) as event ++ * // size, after checking its boundaries. ++ * } ++ * ++ * In comparison, using **bpf_probe_read()** helper here instead ++ * to read the string would require to estimate the length at ++ * compile time, and would often result in copying more memory ++ * than necessary. ++ * ++ * Another useful use case is when parsing individual process ++ * arguments or individual environment variables navigating ++ * *current*\ **->mm->arg_start** and *current*\ ++ * **->mm->env_start**: using this helper and the return value, ++ * one can quickly iterate at the right offset of the memory area. ++ * Return ++ * On success, the strictly positive length of the string, ++ * including the trailing NUL character. On error, a negative ++ * value. ++ * ++ * u64 bpf_get_socket_cookie(struct sk_buff *skb) ++ * Description ++ * If the **struct sk_buff** pointed by *skb* has a known socket, ++ * retrieve the cookie (generated by the kernel) of this socket. ++ * If no cookie has been set yet, generate a new cookie. Once ++ * generated, the socket cookie remains stable for the life of the ++ * socket. This helper can be useful for monitoring per socket ++ * networking traffic statistics as it provides a unique socket ++ * identifier per namespace. ++ * Return ++ * A 8-byte long non-decreasing number on success, or 0 if the ++ * socket field is missing inside *skb*. ++ * ++ * u32 bpf_get_socket_uid(struct sk_buff *skb) ++ * Return ++ * The owner UID of the socket associated to *skb*. If the socket ++ * is **NULL**, or if it is not a full socket (i.e. if it is a ++ * time-wait or a request socket instead), **overflowuid** value ++ * is returned (note that **overflowuid** might also be the actual ++ * UID value for the socket). ++ * ++ * u32 bpf_set_hash(struct sk_buff *skb, u32 hash) ++ * Description ++ * Set the full hash for *skb* (set the field *skb*\ **->hash**) ++ * to value *hash*. ++ * Return ++ * 0 ++ * ++ * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) ++ * Description ++ * Emulate a call to **setsockopt()** on the socket associated to ++ * *bpf_socket*, which must be a full socket. The *level* at ++ * which the option resides and the name *optname* of the option ++ * must be specified, see **setsockopt(2)** for more information. ++ * The option value of length *optlen* is pointed by *optval*. ++ * ++ * This helper actually implements a subset of **setsockopt()**. ++ * It supports the following *level*\ s: ++ * ++ * * **SOL_SOCKET**, which supports the following *optname*\ s: ++ * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, ++ * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**. ++ * * **IPPROTO_TCP**, which supports the following *optname*\ s: ++ * **TCP_CONGESTION**, **TCP_BPF_IW**, ++ * **TCP_BPF_SNDCWND_CLAMP**. ++ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. ++ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags) ++ * Description ++ * Grow or shrink the room for data in the packet associated to ++ * *skb* by *len_diff*, and according to the selected *mode*. ++ * ++ * There is a single supported mode at this time: ++ * ++ * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer ++ * (room space is added or removed below the layer 3 header). ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * A call to this helper is susceptible to change the underlaying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) ++ * Description ++ * Redirect the packet to the endpoint referenced by *map* at ++ * index *key*. Depending on its type, this *map* can contain ++ * references to net devices (for forwarding packets through other ++ * ports), or to CPUs (for redirecting XDP frames to another CPU; ++ * but this is only implemented for native XDP (with driver ++ * support) as of this writing). ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * When used to redirect packets to net devices, this helper ++ * provides a high performance increase over **bpf_redirect**\ (). ++ * This is due to various implementation details of the underlying ++ * mechanisms, one of which is the fact that **bpf_redirect_map**\ ++ * () tries to send packet as a "bulk" to the device. ++ * Return ++ * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error. ++ * ++ * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags) ++ * Description ++ * Redirect the packet to the socket referenced by *map* (of type ++ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and ++ * egress interfaces can be used for redirection. The ++ * **BPF_F_INGRESS** value in *flags* is used to make the ++ * distinction (ingress path is selected if the flag is present, ++ * egress path otherwise). This is the only flag supported for now. ++ * Return ++ * **SK_PASS** on success, or **SK_DROP** on error. ++ * ++ * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) ++ * Description ++ * Add an entry to, or update a *map* referencing sockets. The ++ * *skops* is used as a new value for the entry associated to ++ * *key*. *flags* is one of: ++ * ++ * **BPF_NOEXIST** ++ * The entry for *key* must not exist in the map. ++ * **BPF_EXIST** ++ * The entry for *key* must already exist in the map. ++ * **BPF_ANY** ++ * No condition on the existence of the entry for *key*. ++ * ++ * If the *map* has eBPF programs (parser and verdict), those will ++ * be inherited by the socket being added. If the socket is ++ * already attached to eBPF programs, this results in an error. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) ++ * Description ++ * Adjust the address pointed by *xdp_md*\ **->data_meta** by ++ * *delta* (which can be positive or negative). Note that this ++ * operation modifies the address stored in *xdp_md*\ **->data**, ++ * so the latter must be loaded only after the helper has been ++ * called. ++ * ++ * The use of *xdp_md*\ **->data_meta** is optional and programs ++ * are not required to use it. The rationale is that when the ++ * packet is processed with XDP (e.g. as DoS filter), it is ++ * possible to push further meta data along with it before passing ++ * to the stack, and to give the guarantee that an ingress eBPF ++ * program attached as a TC classifier on the same device can pick ++ * this up for further post-processing. Since TC works with socket ++ * buffers, it remains possible to set from XDP the **mark** or ++ * **priority** pointers, or other pointers for the socket buffer. ++ * Having this scratch space generic and programmable allows for ++ * more flexibility as the user is free to store whatever meta ++ * data they need. ++ * ++ * A call to this helper is susceptible to change the underlaying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) ++ * Description ++ * Read the value of a perf event counter, and store it into *buf* ++ * of size *buf_size*. This helper relies on a *map* of type ++ * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event ++ * counter is selected when *map* is updated with perf event file ++ * descriptors. The *map* is an array whose size is the number of ++ * available CPUs, and each cell contains a value relative to one ++ * CPU. The value to retrieve is indicated by *flags*, that ++ * contains the index of the CPU to look up, masked with ++ * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to ++ * **BPF_F_CURRENT_CPU** to indicate that the value for the ++ * current CPU should be retrieved. ++ * ++ * This helper behaves in a way close to ++ * **bpf_perf_event_read**\ () helper, save that instead of ++ * just returning the value observed, it fills the *buf* ++ * structure. This allows for additional data to be retrieved: in ++ * particular, the enabled and running times (in *buf*\ ++ * **->enabled** and *buf*\ **->running**, respectively) are ++ * copied. In general, **bpf_perf_event_read_value**\ () is ++ * recommended over **bpf_perf_event_read**\ (), which has some ++ * ABI issues and provides fewer functionalities. ++ * ++ * These values are interesting, because hardware PMU (Performance ++ * Monitoring Unit) counters are limited resources. When there are ++ * more PMU based perf events opened than available counters, ++ * kernel will multiplex these events so each event gets certain ++ * percentage (but not all) of the PMU time. In case that ++ * multiplexing happens, the number of samples or counter value ++ * will not reflect the case compared to when no multiplexing ++ * occurs. This makes comparison between different runs difficult. ++ * Typically, the counter value should be normalized before ++ * comparing to other experiments. The usual normalization is done ++ * as follows. ++ * ++ * :: ++ * ++ * normalized_counter = counter * t_enabled / t_running ++ * ++ * Where t_enabled is the time enabled for event and t_running is ++ * the time running for event since last normalization. The ++ * enabled and running times are accumulated since the perf event ++ * open. To achieve scaling factor between two invocations of an ++ * eBPF program, users can can use CPU id as the key (which is ++ * typical for perf array usage model) to remember the previous ++ * value and do the calculation inside the eBPF program. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) ++ * Description ++ * For en eBPF program attached to a perf event, retrieve the ++ * value of the event counter associated to *ctx* and store it in ++ * the structure pointed by *buf* and of size *buf_size*. Enabled ++ * and running times are also stored in the structure (see ++ * description of helper **bpf_perf_event_read_value**\ () for ++ * more details). ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) ++ * Description ++ * Emulate a call to **getsockopt()** on the socket associated to ++ * *bpf_socket*, which must be a full socket. The *level* at ++ * which the option resides and the name *optname* of the option ++ * must be specified, see **getsockopt(2)** for more information. ++ * The retrieved value is stored in the structure pointed by ++ * *opval* and of length *optlen*. ++ * ++ * This helper actually implements a subset of **getsockopt()**. ++ * It supports the following *level*\ s: ++ * ++ * * **IPPROTO_TCP**, which supports *optname* ++ * **TCP_CONGESTION**. ++ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. ++ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_override_return(struct pt_reg *regs, u64 rc) ++ * Description ++ * Used for error injection, this helper uses kprobes to override ++ * the return value of the probed function, and to set it to *rc*. ++ * The first argument is the context *regs* on which the kprobe ++ * works. ++ * ++ * This helper works by setting setting the PC (program counter) ++ * to an override function which is run in place of the original ++ * probed function. This means the probed function is not run at ++ * all. The replacement function just returns with the required ++ * value. ++ * ++ * This helper has security implications, and thus is subject to ++ * restrictions. It is only available if the kernel was compiled ++ * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration ++ * option, and in this case it only works on functions tagged with ++ * **ALLOW_ERROR_INJECTION** in the kernel code. ++ * ++ * Also, the helper is only available for the architectures having ++ * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, ++ * x86 architecture is the only one to support this feature. ++ * Return ++ * 0 ++ * ++ * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) ++ * Description ++ * Attempt to set the value of the **bpf_sock_ops_cb_flags** field ++ * for the full TCP socket associated to *bpf_sock_ops* to ++ * *argval*. ++ * ++ * The primary use of this field is to determine if there should ++ * be calls to eBPF programs of type ++ * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP ++ * code. A program of the same type can change its value, per ++ * connection and as necessary, when the connection is ++ * established. This field is directly accessible for reading, but ++ * this helper must be used for updates in order to return an ++ * error if an eBPF program tries to set a callback that is not ++ * supported in the current kernel. ++ * ++ * The supported callback values that *argval* can combine are: ++ * ++ * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) ++ * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) ++ * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) ++ * ++ * Here are some examples of where one could call such eBPF ++ * program: ++ * ++ * * When RTO fires. ++ * * When a packet is retransmitted. ++ * * When the connection terminates. ++ * * When a packet is sent. ++ * * When a packet is received. ++ * Return ++ * Code **-EINVAL** if the socket is not a full TCP socket; ++ * otherwise, a positive number containing the bits that could not ++ * be set is returned (which comes down to 0 if all bits were set ++ * as required). ++ * ++ * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) ++ * Description ++ * This helper is used in programs implementing policies at the ++ * socket level. If the message *msg* is allowed to pass (i.e. if ++ * the verdict eBPF program returns **SK_PASS**), redirect it to ++ * the socket referenced by *map* (of type ++ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and ++ * egress interfaces can be used for redirection. The ++ * **BPF_F_INGRESS** value in *flags* is used to make the ++ * distinction (ingress path is selected if the flag is present, ++ * egress path otherwise). This is the only flag supported for now. ++ * Return ++ * **SK_PASS** on success, or **SK_DROP** on error. ++ * ++ * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) ++ * Description ++ * For socket policies, apply the verdict of the eBPF program to ++ * the next *bytes* (number of bytes) of message *msg*. ++ * ++ * For example, this helper can be used in the following cases: ++ * ++ * * A single **sendmsg**\ () or **sendfile**\ () system call ++ * contains multiple logical messages that the eBPF program is ++ * supposed to read and for which it should apply a verdict. ++ * * An eBPF program only cares to read the first *bytes* of a ++ * *msg*. If the message has a large payload, then setting up ++ * and calling the eBPF program repeatedly for all bytes, even ++ * though the verdict is already known, would create unnecessary ++ * overhead. ++ * ++ * When called from within an eBPF program, the helper sets a ++ * counter internal to the BPF infrastructure, that is used to ++ * apply the last verdict to the next *bytes*. If *bytes* is ++ * smaller than the current data being processed from a ++ * **sendmsg**\ () or **sendfile**\ () system call, the first ++ * *bytes* will be sent and the eBPF program will be re-run with ++ * the pointer for start of data pointing to byte number *bytes* ++ * **+ 1**. If *bytes* is larger than the current data being ++ * processed, then the eBPF verdict will be applied to multiple ++ * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are ++ * consumed. ++ * ++ * Note that if a socket closes with the internal counter holding ++ * a non-zero value, this is not a problem because data is not ++ * being buffered for *bytes* and is sent as it is received. ++ * Return ++ * 0 ++ * ++ * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) ++ * Description ++ * For socket policies, prevent the execution of the verdict eBPF ++ * program for message *msg* until *bytes* (byte number) have been ++ * accumulated. ++ * ++ * This can be used when one needs a specific number of bytes ++ * before a verdict can be assigned, even if the data spans ++ * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme ++ * case would be a user calling **sendmsg**\ () repeatedly with ++ * 1-byte long message segments. Obviously, this is bad for ++ * performance, but it is still valid. If the eBPF program needs ++ * *bytes* bytes to validate a header, this helper can be used to ++ * prevent the eBPF program to be called again until *bytes* have ++ * been accumulated. ++ * Return ++ * 0 ++ * ++ * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) ++ * Description ++ * For socket policies, pull in non-linear data from user space ++ * for *msg* and set pointers *msg*\ **->data** and *msg*\ ++ * **->data_end** to *start* and *end* bytes offsets into *msg*, ++ * respectively. ++ * ++ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a ++ * *msg* it can only parse data that the (**data**, **data_end**) ++ * pointers have already consumed. For **sendmsg**\ () hooks this ++ * is likely the first scatterlist element. But for calls relying ++ * on the **sendpage** handler (e.g. **sendfile**\ ()) this will ++ * be the range (**0**, **0**) because the data is shared with ++ * user space and by default the objective is to avoid allowing ++ * user space to modify data while (or after) eBPF verdict is ++ * being decided. This helper can be used to pull in data and to ++ * set the start and end pointer to given values. Data will be ++ * copied if necessary (i.e. if data was not linear and if start ++ * and end pointers do not point to the same chunk). ++ * ++ * A call to this helper is susceptible to change the underlaying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) ++ * Description ++ * Bind the socket associated to *ctx* to the address pointed by ++ * *addr*, of length *addr_len*. This allows for making outgoing ++ * connection from the desired IP address, which can be useful for ++ * example when all processes inside a cgroup should use one ++ * single IP address on a host that has multiple IP configured. ++ * ++ * This helper works for IPv4 and IPv6, TCP and UDP sockets. The ++ * domain (*addr*\ **->sa_family**) must be **AF_INET** (or ++ * **AF_INET6**). Looking for a free port to bind to can be ++ * expensive, therefore binding to port is not permitted by the ++ * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively) ++ * must be set to zero. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) ++ * Description ++ * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is ++ * only possible to shrink the packet as of this writing, ++ * therefore *delta* must be a negative integer. ++ * ++ * A call to this helper is susceptible to change the underlaying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) ++ * Description ++ * Retrieve the XFRM state (IP transform framework, see also ++ * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. ++ * ++ * The retrieved value is stored in the **struct bpf_xfrm_state** ++ * pointed by *xfrm_state* and of length *size*. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * This helper is available only if the kernel was compiled with ++ * **CONFIG_XFRM** configuration option. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags) ++ * Description ++ * Return a user or a kernel stack in bpf program provided buffer. ++ * To achieve this, the helper needs *ctx*, which is a pointer ++ * to the context on which the tracing program is executed. ++ * To store the stacktrace, the bpf program provides *buf* with ++ * a nonnegative *size*. ++ * ++ * The last argument, *flags*, holds the number of stack frames to ++ * skip (from 0 to 255), masked with ++ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set ++ * the following flags: ++ * ++ * **BPF_F_USER_STACK** ++ * Collect a user space stack instead of a kernel stack. ++ * **BPF_F_USER_BUILD_ID** ++ * Collect buildid+offset instead of ips for user stack, ++ * only valid if **BPF_F_USER_STACK** is also specified. ++ * ++ * **bpf_get_stack**\ () can collect up to ++ * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject ++ * to sufficient large buffer size. Note that ++ * this limit can be controlled with the **sysctl** program, and ++ * that it should be manually increased in order to profile long ++ * user stacks (such as stacks for Java programs). To do so, use: ++ * ++ * :: ++ * ++ * # sysctl kernel.perf_event_max_stack= ++ * Return ++ * A non-negative value equal to or less than *size* on success, ++ * or a negative error in case of failure. ++ * ++ * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header) ++ * Description ++ * This helper is similar to **bpf_skb_load_bytes**\ () in that ++ * it provides an easy way to load *len* bytes from *offset* ++ * from the packet associated to *skb*, into the buffer pointed ++ * by *to*. The difference to **bpf_skb_load_bytes**\ () is that ++ * a fifth argument *start_header* exists in order to select a ++ * base offset to start from. *start_header* can be one of: ++ * ++ * **BPF_HDR_START_MAC** ++ * Base offset to load data from is *skb*'s mac header. ++ * **BPF_HDR_START_NET** ++ * Base offset to load data from is *skb*'s network header. ++ * ++ * In general, "direct packet access" is the preferred method to ++ * access packet data, however, this helper is in particular useful ++ * in socket filters where *skb*\ **->data** does not always point ++ * to the start of the mac header and where "direct packet access" ++ * is not available. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) ++ * Description ++ * Do FIB lookup in kernel tables using parameters in *params*. ++ * If lookup is successful and result shows packet is to be ++ * forwarded, the neighbor tables are searched for the nexthop. ++ * If successful (ie., FIB lookup shows forwarding and nexthop ++ * is resolved), the nexthop address is returned in ipv4_dst ++ * or ipv6_dst based on family, smac is set to mac address of ++ * egress device, dmac is set to nexthop mac address, rt_metric ++ * is set to metric from route (IPv4/IPv6 only), and ifindex ++ * is set to the device index of the nexthop from the FIB lookup. ++ * ++ * *plen* argument is the size of the passed in struct. ++ * *flags* argument can be a combination of one or more of the ++ * following values: ++ * ++ * **BPF_FIB_LOOKUP_DIRECT** ++ * Do a direct table lookup vs full lookup using FIB ++ * rules. ++ * **BPF_FIB_LOOKUP_OUTPUT** ++ * Perform lookup from an egress perspective (default is ++ * ingress). ++ * ++ * *ctx* is either **struct xdp_md** for XDP programs or ++ * **struct sk_buff** tc cls_act programs. ++ * Return ++ * * < 0 if any input argument is invalid ++ * * 0 on success (packet is forwarded, nexthop neighbor exists) ++ * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the ++ * packet is not forwarded or needs assist from full stack ++ * ++ * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) ++ * Description ++ * Add an entry to, or update a sockhash *map* referencing sockets. ++ * The *skops* is used as a new value for the entry associated to ++ * *key*. *flags* is one of: ++ * ++ * **BPF_NOEXIST** ++ * The entry for *key* must not exist in the map. ++ * **BPF_EXIST** ++ * The entry for *key* must already exist in the map. ++ * **BPF_ANY** ++ * No condition on the existence of the entry for *key*. ++ * ++ * If the *map* has eBPF programs (parser and verdict), those will ++ * be inherited by the socket being added. If the socket is ++ * already attached to eBPF programs, this results in an error. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) ++ * Description ++ * This helper is used in programs implementing policies at the ++ * socket level. If the message *msg* is allowed to pass (i.e. if ++ * the verdict eBPF program returns **SK_PASS**), redirect it to ++ * the socket referenced by *map* (of type ++ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and ++ * egress interfaces can be used for redirection. The ++ * **BPF_F_INGRESS** value in *flags* is used to make the ++ * distinction (ingress path is selected if the flag is present, ++ * egress path otherwise). This is the only flag supported for now. ++ * Return ++ * **SK_PASS** on success, or **SK_DROP** on error. ++ * ++ * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) ++ * Description ++ * This helper is used in programs implementing policies at the ++ * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. ++ * if the verdeict eBPF program returns **SK_PASS**), redirect it ++ * to the socket referenced by *map* (of type ++ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and ++ * egress interfaces can be used for redirection. The ++ * **BPF_F_INGRESS** value in *flags* is used to make the ++ * distinction (ingress path is selected if the flag is present, ++ * egress otherwise). This is the only flag supported for now. ++ * Return ++ * **SK_PASS** on success, or **SK_DROP** on error. ++ * ++ * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) ++ * Description ++ * Encapsulate the packet associated to *skb* within a Layer 3 ++ * protocol header. This header is provided in the buffer at ++ * address *hdr*, with *len* its size in bytes. *type* indicates ++ * the protocol of the header and can be one of: ++ * ++ * **BPF_LWT_ENCAP_SEG6** ++ * IPv6 encapsulation with Segment Routing Header ++ * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH, ++ * the IPv6 header is computed by the kernel. ++ * **BPF_LWT_ENCAP_SEG6_INLINE** ++ * Only works if *skb* contains an IPv6 packet. Insert a ++ * Segment Routing Header (**struct ipv6_sr_hdr**) inside ++ * the IPv6 header. ++ * ++ * A call to this helper is susceptible to change the underlaying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) ++ * Description ++ * Store *len* bytes from address *from* into the packet ++ * associated to *skb*, at *offset*. Only the flags, tag and TLVs ++ * inside the outermost IPv6 Segment Routing Header can be ++ * modified through this helper. ++ * ++ * A call to this helper is susceptible to change the underlaying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) ++ * Description ++ * Adjust the size allocated to TLVs in the outermost IPv6 ++ * Segment Routing Header contained in the packet associated to ++ * *skb*, at position *offset* by *delta* bytes. Only offsets ++ * after the segments are accepted. *delta* can be as well ++ * positive (growing) as negative (shrinking). ++ * ++ * A call to this helper is susceptible to change the underlaying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) ++ * Description ++ * Apply an IPv6 Segment Routing action of type *action* to the ++ * packet associated to *skb*. Each action takes a parameter ++ * contained at address *param*, and of length *param_len* bytes. ++ * *action* can be one of: ++ * ++ * **SEG6_LOCAL_ACTION_END_X** ++ * End.X action: Endpoint with Layer-3 cross-connect. ++ * Type of *param*: **struct in6_addr**. ++ * **SEG6_LOCAL_ACTION_END_T** ++ * End.T action: Endpoint with specific IPv6 table lookup. ++ * Type of *param*: **int**. ++ * **SEG6_LOCAL_ACTION_END_B6** ++ * End.B6 action: Endpoint bound to an SRv6 policy. ++ * Type of param: **struct ipv6_sr_hdr**. ++ * **SEG6_LOCAL_ACTION_END_B6_ENCAP** ++ * End.B6.Encap action: Endpoint bound to an SRv6 ++ * encapsulation policy. ++ * Type of param: **struct ipv6_sr_hdr**. ++ * ++ * A call to this helper is susceptible to change the underlaying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) ++ * Description ++ * This helper is used in programs implementing IR decoding, to ++ * report a successfully decoded key press with *scancode*, ++ * *toggle* value in the given *protocol*. The scancode will be ++ * translated to a keycode using the rc keymap, and reported as ++ * an input key down event. After a period a key up event is ++ * generated. This period can be extended by calling either ++ * **bpf_rc_keydown** () again with the same values, or calling ++ * **bpf_rc_repeat** (). ++ * ++ * Some protocols include a toggle bit, in case the button was ++ * released and pressed again between consecutive scancodes. ++ * ++ * The *ctx* should point to the lirc sample as passed into ++ * the program. ++ * ++ * The *protocol* is the decoded protocol number (see ++ * **enum rc_proto** for some predefined values). ++ * ++ * This helper is only available is the kernel was compiled with ++ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to ++ * "**y**". ++ * Return ++ * 0 ++ * ++ * int bpf_rc_repeat(void *ctx) ++ * Description ++ * This helper is used in programs implementing IR decoding, to ++ * report a successfully decoded repeat key message. This delays ++ * the generation of a key up event for previously generated ++ * key down event. ++ * ++ * Some IR protocols like NEC have a special IR message for ++ * repeating last button, for when a button is held down. ++ * ++ * The *ctx* should point to the lirc sample as passed into ++ * the program. ++ * ++ * This helper is only available is the kernel was compiled with ++ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to ++ * "**y**". ++ * Return ++ * 0 ++ * ++ * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb) ++ * Description ++ * Return the cgroup v2 id of the socket associated with the *skb*. ++ * This is roughly similar to the **bpf_get_cgroup_classid**\ () ++ * helper for cgroup v1 by providing a tag resp. identifier that ++ * can be matched on or used for map lookups e.g. to implement ++ * policy. The cgroup v2 id of a given path in the hierarchy is ++ * exposed in user space through the f_handle API in order to get ++ * to the same 64-bit id. ++ * ++ * This helper can be used on TC egress path, but not on ingress, ++ * and is available only if the kernel was compiled with the ++ * **CONFIG_SOCK_CGROUP_DATA** configuration option. ++ * Return ++ * The id is returned or 0 in case the id could not be retrieved. ++ * ++ * u64 bpf_get_current_cgroup_id(void) ++ * Return ++ * A 64-bit integer containing the current cgroup id based ++ * on the cgroup within which the current task is running. + */ + #define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ +@@ -638,6 +2131,33 @@ union bpf_attr { + FN(redirect_map), \ + FN(sk_redirect_map), \ + FN(sock_map_update), \ ++ FN(xdp_adjust_meta), \ ++ FN(perf_event_read_value), \ ++ FN(perf_prog_read_value), \ ++ FN(getsockopt), \ ++ FN(override_return), \ ++ FN(sock_ops_cb_flags_set), \ ++ FN(msg_redirect_map), \ ++ FN(msg_apply_bytes), \ ++ FN(msg_cork_bytes), \ ++ FN(msg_pull_data), \ ++ FN(bind), \ ++ FN(xdp_adjust_tail), \ ++ FN(skb_get_xfrm_state), \ ++ FN(get_stack), \ ++ FN(skb_load_bytes_relative), \ ++ FN(fib_lookup), \ ++ FN(sock_hash_update), \ ++ FN(msg_redirect_hash), \ ++ FN(sk_redirect_hash), \ ++ FN(lwt_push_encap), \ ++ FN(lwt_seg6_store_bytes), \ ++ FN(lwt_seg6_adjust_srh), \ ++ FN(lwt_seg6_action), \ ++ FN(rc_repeat), \ ++ FN(rc_keydown), \ ++ FN(skb_cgroup_id), \ ++ FN(get_current_cgroup_id), + + /* integer value in 'imm' field of BPF_CALL instruction selects which helper + * function eBPF program intends to call +@@ -671,17 +2191,23 @@ enum bpf_func_id { + /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ + #define BPF_F_TUNINFO_IPV6 (1ULL << 0) + +-/* BPF_FUNC_get_stackid flags. */ ++/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ + #define BPF_F_SKIP_FIELD_MASK 0xffULL + #define BPF_F_USER_STACK (1ULL << 8) ++/* flags used by BPF_FUNC_get_stackid only. */ + #define BPF_F_FAST_STACK_CMP (1ULL << 9) + #define BPF_F_REUSE_STACKID (1ULL << 10) ++/* flags used by BPF_FUNC_get_stack only. */ ++#define BPF_F_USER_BUILD_ID (1ULL << 11) + + /* BPF_FUNC_skb_set_tunnel_key flags. */ + #define BPF_F_ZERO_CSUM_TX (1ULL << 1) + #define BPF_F_DONT_FRAGMENT (1ULL << 2) ++#define BPF_F_SEQ_NUMBER (1ULL << 3) + +-/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */ ++/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and ++ * BPF_FUNC_perf_event_read_value flags. ++ */ + #define BPF_F_INDEX_MASK 0xffffffffULL + #define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK + /* BPF_FUNC_perf_event_output for sk_buff input context. */ +@@ -692,6 +2218,18 @@ enum bpf_adj_room_mode { + BPF_ADJ_ROOM_NET, + }; + ++/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ ++enum bpf_hdr_start_off { ++ BPF_HDR_START_MAC, ++ BPF_HDR_START_NET, ++}; ++ ++/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */ ++enum bpf_lwt_encap_mode { ++ BPF_LWT_ENCAP_SEG6, ++ BPF_LWT_ENCAP_SEG6_INLINE ++}; ++ + /* user accessible mirror of in-kernel sk_buff. + * new fields can only be added to the end of this structure + */ +@@ -715,7 +2253,7 @@ struct __sk_buff { + __u32 data_end; + __u32 napi_id; + +- /* accessed by BPF_PROG_TYPE_sk_skb types */ ++ /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */ + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ +@@ -723,6 +2261,9 @@ struct __sk_buff { + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ ++ /* ... here. */ ++ ++ __u32 data_meta; + }; + + struct bpf_tunnel_key { +@@ -733,10 +2274,24 @@ struct bpf_tunnel_key { + }; + __u8 tunnel_tos; + __u8 tunnel_ttl; +- __u16 tunnel_ext; ++ __u16 tunnel_ext; /* Padding, future use. */ + __u32 tunnel_label; + }; + ++/* user accessible mirror of in-kernel xfrm_state. ++ * new fields can only be added to the end of this structure ++ */ ++struct bpf_xfrm_state { ++ __u32 reqid; ++ __u32 spi; /* Stored in network byte order */ ++ __u16 family; ++ __u16 ext; /* Padding, future use. */ ++ union { ++ __u32 remote_ipv4; /* Stored in network byte order */ ++ __u32 remote_ipv6[4]; /* Stored in network byte order */ ++ }; ++}; ++ + /* Generic BPF return codes which all BPF program types may support. + * The values are binary compatible with their TC_ACT_* counter-part to + * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT +@@ -760,6 +2315,15 @@ struct bpf_sock { + __u32 protocol; + __u32 mark; + __u32 priority; ++ __u32 src_ip4; /* Allows 1,2,4-byte read. ++ * Stored in network byte order. ++ */ ++ __u32 src_ip6[4]; /* Allows 1,2,4-byte read. ++ * Stored in network byte order. ++ */ ++ __u32 src_port; /* Allows 4-byte read. ++ * Stored in host byte order ++ */ + }; + + #define XDP_PACKET_HEADROOM 256 +@@ -783,12 +2347,31 @@ enum xdp_action { + struct xdp_md { + __u32 data; + __u32 data_end; ++ __u32 data_meta; ++ /* Below access go through struct xdp_rxq_info */ ++ __u32 ingress_ifindex; /* rxq->dev->ifindex */ ++ __u32 rx_queue_index; /* rxq->queue_index */ + }; + + enum sk_action { +- SK_ABORTED = 0, +- SK_DROP, +- SK_REDIRECT, ++ SK_DROP = 0, ++ SK_PASS, ++}; ++ ++/* user accessible metadata for SK_MSG packet hook, new fields must ++ * be added to the end of this structure ++ */ ++struct sk_msg_md { ++ void *data; ++ void *data_end; ++ ++ __u32 family; ++ __u32 remote_ip4; /* Stored in network byte order */ ++ __u32 local_ip4; /* Stored in network byte order */ ++ __u32 remote_ip6[4]; /* Stored in network byte order */ ++ __u32 local_ip6[4]; /* Stored in network byte order */ ++ __u32 remote_port; /* Stored in network byte order */ ++ __u32 local_port; /* stored in host byte order */ + }; + + #define BPF_TAG_SIZE 8 +@@ -801,6 +2384,19 @@ struct bpf_prog_info { + __u32 xlated_prog_len; + __aligned_u64 jited_prog_insns; + __aligned_u64 xlated_prog_insns; ++ __u64 load_time; /* ns since boottime */ ++ __u32 created_by_uid; ++ __u32 nr_map_ids; ++ __aligned_u64 map_ids; ++ char name[BPF_OBJ_NAME_LEN]; ++ __u32 ifindex; ++ __u32 gpl_compatible:1; ++ __u64 netns_dev; ++ __u64 netns_ino; ++ __u32 nr_jited_ksyms; ++ __u32 nr_jited_func_lens; ++ __aligned_u64 jited_ksyms; ++ __aligned_u64 jited_func_lens; + } __attribute__((aligned(8))); + + struct bpf_map_info { +@@ -810,8 +2406,48 @@ struct bpf_map_info { + __u32 value_size; + __u32 max_entries; + __u32 map_flags; ++ char name[BPF_OBJ_NAME_LEN]; ++ __u32 ifindex; ++ __u32 :32; ++ __u64 netns_dev; ++ __u64 netns_ino; ++ __u32 btf_id; ++ __u32 btf_key_type_id; ++ __u32 btf_value_type_id; ++} __attribute__((aligned(8))); ++ ++struct bpf_btf_info { ++ __aligned_u64 btf; ++ __u32 btf_size; ++ __u32 id; + } __attribute__((aligned(8))); + ++/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed ++ * by user and intended to be used by socket (e.g. to bind to, depends on ++ * attach attach type). ++ */ ++struct bpf_sock_addr { ++ __u32 user_family; /* Allows 4-byte read, but no write. */ ++ __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write. ++ * Stored in network byte order. ++ */ ++ __u32 user_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write. ++ * Stored in network byte order. ++ */ ++ __u32 user_port; /* Allows 4-byte read and write. ++ * Stored in network byte order ++ */ ++ __u32 family; /* Allows 4-byte read, but no write */ ++ __u32 type; /* Allows 4-byte read, but no write */ ++ __u32 protocol; /* Allows 4-byte read, but no write */ ++ __u32 msg_src_ip4; /* Allows 1,2,4-byte read an 4-byte write. ++ * Stored in network byte order. ++ */ ++ __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write. ++ * Stored in network byte order. ++ */ ++}; ++ + /* User bpf_sock_ops struct to access socket values and specify request ops + * and their replies. + * Some of this fields are in network (bigendian) byte order and may need +@@ -821,8 +2457,9 @@ struct bpf_map_info { + struct bpf_sock_ops { + __u32 op; + union { +- __u32 reply; +- __u32 replylong[4]; ++ __u32 args[4]; /* Optionally passed to bpf program */ ++ __u32 reply; /* Returned by bpf program */ ++ __u32 replylong[4]; /* Optionally returned by bpf prog */ + }; + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ +@@ -831,8 +2468,45 @@ struct bpf_sock_ops { + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ ++ __u32 is_fullsock; /* Some TCP fields are only valid if ++ * there is a full socket. If not, the ++ * fields read as zero. ++ */ ++ __u32 snd_cwnd; ++ __u32 srtt_us; /* Averaged RTT << 3 in usecs */ ++ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ ++ __u32 state; ++ __u32 rtt_min; ++ __u32 snd_ssthresh; ++ __u32 rcv_nxt; ++ __u32 snd_nxt; ++ __u32 snd_una; ++ __u32 mss_cache; ++ __u32 ecn_flags; ++ __u32 rate_delivered; ++ __u32 rate_interval_us; ++ __u32 packets_out; ++ __u32 retrans_out; ++ __u32 total_retrans; ++ __u32 segs_in; ++ __u32 data_segs_in; ++ __u32 segs_out; ++ __u32 data_segs_out; ++ __u32 lost_out; ++ __u32 sacked_out; ++ __u32 sk_txhash; ++ __u64 bytes_received; ++ __u64 bytes_acked; + }; + ++/* Definitions for bpf_sock_ops_cb_flags */ ++#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) ++#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) ++#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) ++#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently ++ * supported cb flags ++ */ ++ + /* List of known BPF sock_ops operators. + * New entries can only be added at the end + */ +@@ -859,9 +2533,156 @@ enum { + BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control + * needs ECN + */ ++ BPF_SOCK_OPS_BASE_RTT, /* Get base RTT. The correct value is ++ * based on the path and may be ++ * dependent on the congestion control ++ * algorithm. In general it indicates ++ * a congestion threshold. RTTs above ++ * this indicate congestion ++ */ ++ BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered. ++ * Arg1: value of icsk_retransmits ++ * Arg2: value of icsk_rto ++ * Arg3: whether RTO has expired ++ */ ++ BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted. ++ * Arg1: sequence number of 1st byte ++ * Arg2: # segments ++ * Arg3: return value of ++ * tcp_transmit_skb (0 => success) ++ */ ++ BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state. ++ * Arg1: old_state ++ * Arg2: new_state ++ */ ++ BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after ++ * socket transition to LISTEN state. ++ */ ++}; ++ ++/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect ++ * changes between the TCP and BPF versions. Ideally this should never happen. ++ * If it does, we need to add code to convert them before calling ++ * the BPF sock_ops function. ++ */ ++enum { ++ BPF_TCP_ESTABLISHED = 1, ++ BPF_TCP_SYN_SENT, ++ BPF_TCP_SYN_RECV, ++ BPF_TCP_FIN_WAIT1, ++ BPF_TCP_FIN_WAIT2, ++ BPF_TCP_TIME_WAIT, ++ BPF_TCP_CLOSE, ++ BPF_TCP_CLOSE_WAIT, ++ BPF_TCP_LAST_ACK, ++ BPF_TCP_LISTEN, ++ BPF_TCP_CLOSING, /* Now a valid state */ ++ BPF_TCP_NEW_SYN_RECV, ++ ++ BPF_TCP_MAX_STATES /* Leave at the end! */ + }; + + #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ + #define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ + ++struct bpf_perf_event_value { ++ __u64 counter; ++ __u64 enabled; ++ __u64 running; ++}; ++ ++#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) ++#define BPF_DEVCG_ACC_READ (1ULL << 1) ++#define BPF_DEVCG_ACC_WRITE (1ULL << 2) ++ ++#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) ++#define BPF_DEVCG_DEV_CHAR (1ULL << 1) ++ ++struct bpf_cgroup_dev_ctx { ++ /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ ++ __u32 access_type; ++ __u32 major; ++ __u32 minor; ++}; ++ ++struct bpf_raw_tracepoint_args { ++ __u64 args[0]; ++}; ++ ++/* DIRECT: Skip the FIB rules and go to FIB table associated with device ++ * OUTPUT: Do lookup from egress perspective; default is ingress ++ */ ++#define BPF_FIB_LOOKUP_DIRECT BIT(0) ++#define BPF_FIB_LOOKUP_OUTPUT BIT(1) ++ ++enum { ++ BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ ++ BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */ ++ BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */ ++ BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */ ++ BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */ ++ BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */ ++ BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ ++ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ ++ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ ++}; ++ ++struct bpf_fib_lookup { ++ /* input: network family for lookup (AF_INET, AF_INET6) ++ * output: network family of egress nexthop ++ */ ++ __u8 family; ++ ++ /* set if lookup is to consider L4 data - e.g., FIB rules */ ++ __u8 l4_protocol; ++ __be16 sport; ++ __be16 dport; ++ ++ /* total length of packet from network header - used for MTU check */ ++ __u16 tot_len; ++ ++ /* input: L3 device index for lookup ++ * output: device index from FIB lookup ++ */ ++ __u32 ifindex; ++ ++ union { ++ /* inputs to lookup */ ++ __u8 tos; /* AF_INET */ ++ __be32 flowinfo; /* AF_INET6, flow_label + priority */ ++ ++ /* output: metric of fib result (IPv4/IPv6 only) */ ++ __u32 rt_metric; ++ }; ++ ++ union { ++ __be32 ipv4_src; ++ __u32 ipv6_src[4]; /* in6_addr; network order */ ++ }; ++ ++ /* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in ++ * network header. output: bpf_fib_lookup sets to gateway address ++ * if FIB lookup returns gateway route ++ */ ++ union { ++ __be32 ipv4_dst; ++ __u32 ipv6_dst[4]; /* in6_addr; network order */ ++ }; ++ ++ /* output */ ++ __be16 h_vlan_proto; ++ __be16 h_vlan_TCI; ++ __u8 smac[6]; /* ETH_ALEN */ ++ __u8 dmac[6]; /* ETH_ALEN */ ++}; ++ ++enum bpf_task_fd_type { ++ BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ ++ BPF_FD_TYPE_TRACEPOINT, /* tp name */ ++ BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */ ++ BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */ ++ BPF_FD_TYPE_UPROBE, /* filename + offset */ ++ BPF_FD_TYPE_URETPROBE, /* filename + offset */ ++}; ++ + #endif /* __LINUX_BPF_H__ */ +diff --git a/include/uapi/linux/bpf_common.h b/include/uapi/linux/bpf_common.h +index afe7433..f0fe139 100644 +--- a/include/uapi/linux/bpf_common.h ++++ b/include/uapi/linux/bpf_common.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_BPF_COMMON_H__ + #define __LINUX_BPF_COMMON_H__ + +@@ -14,9 +15,10 @@ + + /* ld/ldx fields */ + #define BPF_SIZE(code) ((code) & 0x18) +-#define BPF_W 0x00 +-#define BPF_H 0x08 +-#define BPF_B 0x10 ++#define BPF_W 0x00 /* 32-bit */ ++#define BPF_H 0x08 /* 16-bit */ ++#define BPF_B 0x10 /* 8-bit */ ++/* eBPF BPF_DW 0x18 64-bit */ + #define BPF_MODE(code) ((code) & 0xe0) + #define BPF_IMM 0x00 + #define BPF_ABS 0x20 +diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h +new file mode 100644 +index 0000000..5dd580a +--- /dev/null ++++ b/include/uapi/linux/btf.h +@@ -0,0 +1,113 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* Copyright (c) 2018 Facebook */ ++#ifndef __LINUX_BTF_H__ ++#define __LINUX_BTF_H__ ++ ++#include ++ ++#define BTF_MAGIC 0xeB9F ++#define BTF_VERSION 1 ++ ++struct btf_header { ++ __u16 magic; ++ __u8 version; ++ __u8 flags; ++ __u32 hdr_len; ++ ++ /* All offsets are in bytes relative to the end of this header */ ++ __u32 type_off; /* offset of type section */ ++ __u32 type_len; /* length of type section */ ++ __u32 str_off; /* offset of string section */ ++ __u32 str_len; /* length of string section */ ++}; ++ ++/* Max # of type identifier */ ++#define BTF_MAX_TYPE 0x0000ffff ++/* Max offset into the string section */ ++#define BTF_MAX_NAME_OFFSET 0x0000ffff ++/* Max # of struct/union/enum members or func args */ ++#define BTF_MAX_VLEN 0xffff ++ ++struct btf_type { ++ __u32 name_off; ++ /* "info" bits arrangement ++ * bits 0-15: vlen (e.g. # of struct's members) ++ * bits 16-23: unused ++ * bits 24-27: kind (e.g. int, ptr, array...etc) ++ * bits 28-31: unused ++ */ ++ __u32 info; ++ /* "size" is used by INT, ENUM, STRUCT and UNION. ++ * "size" tells the size of the type it is describing. ++ * ++ * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT. ++ * "type" is a type_id referring to another type. ++ */ ++ union { ++ __u32 size; ++ __u32 type; ++ }; ++}; ++ ++#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f) ++#define BTF_INFO_VLEN(info) ((info) & 0xffff) ++ ++#define BTF_KIND_UNKN 0 /* Unknown */ ++#define BTF_KIND_INT 1 /* Integer */ ++#define BTF_KIND_PTR 2 /* Pointer */ ++#define BTF_KIND_ARRAY 3 /* Array */ ++#define BTF_KIND_STRUCT 4 /* Struct */ ++#define BTF_KIND_UNION 5 /* Union */ ++#define BTF_KIND_ENUM 6 /* Enumeration */ ++#define BTF_KIND_FWD 7 /* Forward */ ++#define BTF_KIND_TYPEDEF 8 /* Typedef */ ++#define BTF_KIND_VOLATILE 9 /* Volatile */ ++#define BTF_KIND_CONST 10 /* Const */ ++#define BTF_KIND_RESTRICT 11 /* Restrict */ ++#define BTF_KIND_MAX 11 ++#define NR_BTF_KINDS 12 ++ ++/* For some specific BTF_KIND, "struct btf_type" is immediately ++ * followed by extra data. ++ */ ++ ++/* BTF_KIND_INT is followed by a u32 and the following ++ * is the 32 bits arrangement: ++ */ ++#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24) ++#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16) ++#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff) ++ ++/* Attributes stored in the BTF_INT_ENCODING */ ++#define BTF_INT_SIGNED (1 << 0) ++#define BTF_INT_CHAR (1 << 1) ++#define BTF_INT_BOOL (1 << 2) ++ ++/* BTF_KIND_ENUM is followed by multiple "struct btf_enum". ++ * The exact number of btf_enum is stored in the vlen (of the ++ * info in "struct btf_type"). ++ */ ++struct btf_enum { ++ __u32 name_off; ++ __s32 val; ++}; ++ ++/* BTF_KIND_ARRAY is followed by one "struct btf_array" */ ++struct btf_array { ++ __u32 type; ++ __u32 index_type; ++ __u32 nelems; ++}; ++ ++/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed ++ * by multiple "struct btf_member". The exact number ++ * of btf_member is stored in the vlen (of the info in ++ * "struct btf_type"). ++ */ ++struct btf_member { ++ __u32 name_off; ++ __u32 type; ++ __u32 offset; /* offset in bits */ ++}; ++ ++#endif /* __LINUX_BTF_H__ */ +diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h +index f7a810d..4d1ab8e 100644 +--- a/include/uapi/linux/can.h ++++ b/include/uapi/linux/can.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ + /* + * linux/can.h + * +diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h +index b9214bd..f0c5e58 100644 +--- a/include/uapi/linux/can/netlink.h ++++ b/include/uapi/linux/can/netlink.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * linux/can/netlink.h + * +@@ -131,6 +132,7 @@ enum { + IFLA_CAN_TERMINATION_CONST, + IFLA_CAN_BITRATE_CONST, + IFLA_CAN_DATA_BITRATE_CONST, ++ IFLA_CAN_BITRATE_MAX, + __IFLA_CAN_MAX + }; + +diff --git a/include/uapi/linux/can/vxcan.h b/include/uapi/linux/can/vxcan.h +index 5b29e8a..b364d77 100644 +--- a/include/uapi/linux/can/vxcan.h ++++ b/include/uapi/linux/can/vxcan.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _CAN_VXCAN_H + #define _CAN_VXCAN_H + +diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h +index a62695e..5ee0e73 100644 +--- a/include/uapi/linux/devlink.h ++++ b/include/uapi/linux/devlink.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * include/uapi/linux/devlink.h - Network physical device Netlink interface + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. +@@ -69,6 +70,24 @@ enum devlink_command { + DEVLINK_CMD_DPIPE_ENTRIES_GET, + DEVLINK_CMD_DPIPE_HEADERS_GET, + DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, ++ DEVLINK_CMD_RESOURCE_SET, ++ DEVLINK_CMD_RESOURCE_DUMP, ++ ++ /* Hot driver reload, makes configuration changes take place. The ++ * devlink instance is not released during the process. ++ */ ++ DEVLINK_CMD_RELOAD, ++ ++ DEVLINK_CMD_PARAM_GET, /* can dump */ ++ DEVLINK_CMD_PARAM_SET, ++ DEVLINK_CMD_PARAM_NEW, ++ DEVLINK_CMD_PARAM_DEL, ++ ++ DEVLINK_CMD_REGION_GET, ++ DEVLINK_CMD_REGION_SET, ++ DEVLINK_CMD_REGION_NEW, ++ DEVLINK_CMD_REGION_DEL, ++ DEVLINK_CMD_REGION_READ, + + /* add new commands above here */ + __DEVLINK_CMD_MAX, +@@ -124,6 +143,26 @@ enum devlink_eswitch_encap_mode { + DEVLINK_ESWITCH_ENCAP_MODE_BASIC, + }; + ++enum devlink_port_flavour { ++ DEVLINK_PORT_FLAVOUR_PHYSICAL, /* Any kind of a port physically ++ * facing the user. ++ */ ++ DEVLINK_PORT_FLAVOUR_CPU, /* CPU port */ ++ DEVLINK_PORT_FLAVOUR_DSA, /* Distributed switch architecture ++ * interconnect port. ++ */ ++}; ++ ++enum devlink_param_cmode { ++ DEVLINK_PARAM_CMODE_RUNTIME, ++ DEVLINK_PARAM_CMODE_DRIVERINIT, ++ DEVLINK_PARAM_CMODE_PERMANENT, ++ ++ /* Add new configuration modes above */ ++ __DEVLINK_PARAM_CMODE_MAX, ++ DEVLINK_PARAM_CMODE_MAX = __DEVLINK_PARAM_CMODE_MAX - 1 ++}; ++ + enum devlink_attr { + /* don't change the order or add anything between, this is ABI! */ + DEVLINK_ATTR_UNSPEC, +@@ -201,6 +240,45 @@ enum devlink_attr { + DEVLINK_ATTR_PAD, + + DEVLINK_ATTR_ESWITCH_ENCAP_MODE, /* u8 */ ++ DEVLINK_ATTR_RESOURCE_LIST, /* nested */ ++ DEVLINK_ATTR_RESOURCE, /* nested */ ++ DEVLINK_ATTR_RESOURCE_NAME, /* string */ ++ DEVLINK_ATTR_RESOURCE_ID, /* u64 */ ++ DEVLINK_ATTR_RESOURCE_SIZE, /* u64 */ ++ DEVLINK_ATTR_RESOURCE_SIZE_NEW, /* u64 */ ++ DEVLINK_ATTR_RESOURCE_SIZE_VALID, /* u8 */ ++ DEVLINK_ATTR_RESOURCE_SIZE_MIN, /* u64 */ ++ DEVLINK_ATTR_RESOURCE_SIZE_MAX, /* u64 */ ++ DEVLINK_ATTR_RESOURCE_SIZE_GRAN, /* u64 */ ++ DEVLINK_ATTR_RESOURCE_UNIT, /* u8 */ ++ DEVLINK_ATTR_RESOURCE_OCC, /* u64 */ ++ DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID, /* u64 */ ++ DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,/* u64 */ ++ ++ DEVLINK_ATTR_PORT_FLAVOUR, /* u16 */ ++ DEVLINK_ATTR_PORT_NUMBER, /* u32 */ ++ DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER, /* u32 */ ++ ++ DEVLINK_ATTR_PARAM, /* nested */ ++ DEVLINK_ATTR_PARAM_NAME, /* string */ ++ DEVLINK_ATTR_PARAM_GENERIC, /* flag */ ++ DEVLINK_ATTR_PARAM_TYPE, /* u8 */ ++ DEVLINK_ATTR_PARAM_VALUES_LIST, /* nested */ ++ DEVLINK_ATTR_PARAM_VALUE, /* nested */ ++ DEVLINK_ATTR_PARAM_VALUE_DATA, /* dynamic */ ++ DEVLINK_ATTR_PARAM_VALUE_CMODE, /* u8 */ ++ ++ DEVLINK_ATTR_REGION_NAME, /* string */ ++ DEVLINK_ATTR_REGION_SIZE, /* u64 */ ++ DEVLINK_ATTR_REGION_SNAPSHOTS, /* nested */ ++ DEVLINK_ATTR_REGION_SNAPSHOT, /* nested */ ++ DEVLINK_ATTR_REGION_SNAPSHOT_ID, /* u32 */ ++ ++ DEVLINK_ATTR_REGION_CHUNKS, /* nested */ ++ DEVLINK_ATTR_REGION_CHUNK, /* nested */ ++ DEVLINK_ATTR_REGION_CHUNK_DATA, /* binary */ ++ DEVLINK_ATTR_REGION_CHUNK_ADDR, /* u64 */ ++ DEVLINK_ATTR_REGION_CHUNK_LEN, /* u64 */ + + /* add new attributes above here, update the policy in devlink.c */ + +@@ -244,4 +322,8 @@ enum devlink_dpipe_header_id { + DEVLINK_DPIPE_HEADER_IPV6, + }; + ++enum devlink_resource_unit { ++ DEVLINK_RESOURCE_UNIT_ENTRY, ++}; ++ + #endif /* _LINUX_DEVLINK_H_ */ +diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h +index 9cd1de9..31aa101 100644 +--- a/include/uapi/linux/elf-em.h ++++ b/include/uapi/linux/elf-em.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _LINUX_ELF_EM_H + #define _LINUX_ELF_EM_H + +diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h +index bbf02a6..232df14 100644 +--- a/include/uapi/linux/fib_rules.h ++++ b/include/uapi/linux/fib_rules.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_FIB_RULES_H + #define __LINUX_FIB_RULES_H + +@@ -22,7 +23,7 @@ struct fib_rule_hdr { + __u8 tos; + + __u8 table; +- __u8 res1; /* reserved */ ++ __u8 res1; /* reserved */ + __u8 res2; /* reserved */ + __u8 action; + +@@ -34,6 +35,11 @@ struct fib_rule_uid_range { + __u32 end; + }; + ++struct fib_rule_port_range { ++ __u16 start; ++ __u16 end; ++}; ++ + enum { + FRA_UNSPEC, + FRA_DST, /* destination address */ +@@ -57,6 +63,10 @@ enum { + FRA_PAD, + FRA_L3MDEV, /* iif or oif is l3mdev goto its table */ + FRA_UID_RANGE, /* UID range */ ++ FRA_PROTOCOL, /* Originator of the rule */ ++ FRA_IP_PROTO, /* ip proto */ ++ FRA_SPORT_RANGE, /* sport */ ++ FRA_DPORT_RANGE, /* dport */ + __FRA_MAX + }; + +diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h +index e4f2f74..eaef459 100644 +--- a/include/uapi/linux/filter.h ++++ b/include/uapi/linux/filter.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * Linux Socket Filter Data Structures + */ +diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h +index 744c323..bf022c6 100644 +--- a/include/uapi/linux/fou.h ++++ b/include/uapi/linux/fou.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* fou.h - FOU Interface */ + + #ifndef _LINUX_FOU_H +diff --git a/include/uapi/linux/gen_stats.h b/include/uapi/linux/gen_stats.h +index 52deccc..24a861c 100644 +--- a/include/uapi/linux/gen_stats.h ++++ b/include/uapi/linux/gen_stats.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_GEN_STATS_H + #define __LINUX_GEN_STATS_H + +diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h +index 08239d8..1317119 100644 +--- a/include/uapi/linux/genetlink.h ++++ b/include/uapi/linux/genetlink.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_GENERIC_NETLINK_H + #define __LINUX_GENERIC_NETLINK_H + +diff --git a/include/uapi/linux/hdlc/ioctl.h b/include/uapi/linux/hdlc/ioctl.h +index 04bc027..0fe4238 100644 +--- a/include/uapi/linux/hdlc/ioctl.h ++++ b/include/uapi/linux/hdlc/ioctl.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __HDLC_IOCTL_H__ + #define __HDLC_IOCTL_H__ + +diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h +index a2e839e..cb247a5 100644 +--- a/include/uapi/linux/icmpv6.h ++++ b/include/uapi/linux/icmpv6.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _LINUX_ICMPV6_H + #define _LINUX_ICMPV6_H + +diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h +index b4ba020..495cdd2 100644 +--- a/include/uapi/linux/if.h ++++ b/include/uapi/linux/if.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket +diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h +index 26f0ecf..a924606 100644 +--- a/include/uapi/linux/if_addr.h ++++ b/include/uapi/linux/if_addr.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_IF_ADDR_H + #define __LINUX_IF_ADDR_H + +@@ -32,6 +33,7 @@ enum { + IFA_CACHEINFO, + IFA_MULTICAST, + IFA_FLAGS, ++ IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */ + __IFA_MAX, + }; + +diff --git a/include/uapi/linux/if_addrlabel.h b/include/uapi/linux/if_addrlabel.h +index 54580c2..d1f5974 100644 +--- a/include/uapi/linux/if_addrlabel.h ++++ b/include/uapi/linux/if_addrlabel.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * if_addrlabel.h - netlink interface for address labels + * +diff --git a/include/uapi/linux/if_alg.h b/include/uapi/linux/if_alg.h +index f2acd2f..bc2bcde 100644 +--- a/include/uapi/linux/if_alg.h ++++ b/include/uapi/linux/if_alg.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * if_alg: User-space algorithm interface + * +diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h +index 199f253..cd136a6 100644 +--- a/include/uapi/linux/if_arp.h ++++ b/include/uapi/linux/if_arp.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket +diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h +index 9635a62..61a1bf6 100644 +--- a/include/uapi/linux/if_bonding.h ++++ b/include/uapi/linux/if_bonding.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */ + /* + * Bond several ethernet interfaces into a Cisco, running 'Etherchannel'. + * +diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h +index 156f443..bdfecf9 100644 +--- a/include/uapi/linux/if_bridge.h ++++ b/include/uapi/linux/if_bridge.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * Linux ethernet bridge + * +diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h +index 7dde037..8c36f63 100644 +--- a/include/uapi/linux/if_ether.h ++++ b/include/uapi/linux/if_ether.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket +@@ -29,6 +30,7 @@ + */ + + #define ETH_ALEN 6 /* Octets in one ethernet addr */ ++#define ETH_TLEN 2 /* Octets in ethernet type field */ + #define ETH_HLEN 14 /* Total octets in header. */ + #define ETH_ZLEN 60 /* Min. octets in frame sans FCS */ + #define ETH_DATA_LEN 1500 /* Max. octets in payload */ +@@ -46,6 +48,7 @@ + #define ETH_P_PUP 0x0200 /* Xerox PUP packet */ + #define ETH_P_PUPAT 0x0201 /* Xerox PUP Addr Trans packet */ + #define ETH_P_TSN 0x22F0 /* TSN (IEEE 1722) packet */ ++#define ETH_P_ERSPAN2 0x22EB /* ERSPAN version 2 (type III) */ + #define ETH_P_IP 0x0800 /* Internet Protocol packet */ + #define ETH_P_X25 0x0805 /* CCITT X.25 */ + #define ETH_P_ARP 0x0806 /* Address Resolution packet */ +@@ -86,6 +89,7 @@ + #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ + #define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ + #define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */ ++#define ETH_P_PREAUTH 0x88C7 /* 802.11 Preauthentication */ + #define ETH_P_TIPC 0x88CA /* TIPC */ + #define ETH_P_MACSEC 0x88E5 /* 802.1ae MACsec */ + #define ETH_P_8021AH 0x88E7 /* 802.1ah Backbone Service Tag */ +@@ -148,11 +152,18 @@ + * This is an Ethernet frame header. + */ + ++/* allow libcs like musl to deactivate this, glibc does not implement this. */ ++#ifndef __UAPI_DEF_ETHHDR ++#define __UAPI_DEF_ETHHDR 1 ++#endif ++ ++#if __UAPI_DEF_ETHHDR + struct ethhdr { + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + unsigned char h_source[ETH_ALEN]; /* source ether addr */ + __be16 h_proto; /* packet type ID field */ + } __attribute__((packed)); ++#endif + + + #endif /* _LINUX_IF_ETHER_H */ +diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h +index 1f97d05..1c64ed4 100644 +--- a/include/uapi/linux/if_link.h ++++ b/include/uapi/linux/if_link.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _LINUX_IF_LINK_H + #define _LINUX_IF_LINK_H + +@@ -158,6 +159,11 @@ enum { + IFLA_PAD, + IFLA_XDP, + IFLA_EVENT, ++ IFLA_NEW_NETNSID, ++ IFLA_IF_NETNSID, ++ IFLA_CARRIER_UP_COUNT, ++ IFLA_CARRIER_DOWN_COUNT, ++ IFLA_NEW_IFINDEX, + __IFLA_MAX + }; + +@@ -323,6 +329,9 @@ enum { + IFLA_BRPORT_MCAST_TO_UCAST, + IFLA_BRPORT_VLAN_TUNNEL, + IFLA_BRPORT_BCAST_FLOOD, ++ IFLA_BRPORT_GROUP_FWD_MASK, ++ IFLA_BRPORT_NEIGH_SUPPRESS, ++ IFLA_BRPORT_ISOLATED, + __IFLA_BRPORT_MAX + }; + #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) +@@ -460,6 +469,7 @@ enum macsec_validation_type { + enum { + IFLA_IPVLAN_UNSPEC, + IFLA_IPVLAN_MODE, ++ IFLA_IPVLAN_FLAGS, + __IFLA_IPVLAN_MAX + }; + +@@ -472,6 +482,9 @@ enum ipvlan_mode { + IPVLAN_MODE_MAX + }; + ++#define IPVLAN_F_PRIVATE 0x01 ++#define IPVLAN_F_VEPA 0x02 ++ + /* VXLAN section */ + enum { + IFLA_VXLAN_UNSPEC, +@@ -502,6 +515,7 @@ enum { + IFLA_VXLAN_COLLECT_METADATA, + IFLA_VXLAN_LABEL, + IFLA_VXLAN_GPE, ++ IFLA_VXLAN_TTL_INHERIT, + __IFLA_VXLAN_MAX + }; + #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) +@@ -721,6 +735,8 @@ enum { + IFLA_VF_STATS_BROADCAST, + IFLA_VF_STATS_MULTICAST, + IFLA_VF_STATS_PAD, ++ IFLA_VF_STATS_RX_DROPPED, ++ IFLA_VF_STATS_TX_DROPPED, + __IFLA_VF_STATS_MAX, + }; + +@@ -902,6 +918,7 @@ enum { + XDP_ATTACHED_DRV, + XDP_ATTACHED_SKB, + XDP_ATTACHED_HW, ++ XDP_ATTACHED_MULTI, + }; + + enum { +@@ -910,6 +927,9 @@ enum { + IFLA_XDP_ATTACHED, + IFLA_XDP_FLAGS, + IFLA_XDP_PROG_ID, ++ IFLA_XDP_DRV_PROG_ID, ++ IFLA_XDP_SKB_PROG_ID, ++ IFLA_XDP_HW_PROG_ID, + __IFLA_XDP_MAX, + }; + +@@ -925,4 +945,43 @@ enum { + IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */ + }; + ++/* tun section */ ++ ++enum { ++ IFLA_TUN_UNSPEC, ++ IFLA_TUN_OWNER, ++ IFLA_TUN_GROUP, ++ IFLA_TUN_TYPE, ++ IFLA_TUN_PI, ++ IFLA_TUN_VNET_HDR, ++ IFLA_TUN_PERSIST, ++ IFLA_TUN_MULTI_QUEUE, ++ IFLA_TUN_NUM_QUEUES, ++ IFLA_TUN_NUM_DISABLED_QUEUES, ++ __IFLA_TUN_MAX, ++}; ++ ++#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1) ++ ++/* rmnet section */ ++ ++#define RMNET_FLAGS_INGRESS_DEAGGREGATION (1U << 0) ++#define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1) ++#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) ++#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) ++ ++enum { ++ IFLA_RMNET_UNSPEC, ++ IFLA_RMNET_MUX_ID, ++ IFLA_RMNET_FLAGS, ++ __IFLA_RMNET_MAX, ++}; ++ ++#define IFLA_RMNET_MAX (__IFLA_RMNET_MAX - 1) ++ ++struct ifla_rmnet_flags { ++ __u32 flags; ++ __u32 mask; ++}; ++ + #endif /* _LINUX_IF_LINK_H */ +diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h +index 22939a3..7743993 100644 +--- a/include/uapi/linux/if_macsec.h ++++ b/include/uapi/linux/if_macsec.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * include/uapi/linux/if_macsec.h - MACsec device + * +@@ -21,8 +22,13 @@ + + #define MACSEC_KEYID_LEN 16 + +-#define MACSEC_DEFAULT_CIPHER_ID 0x0080020001000001ULL +-#define MACSEC_DEFAULT_CIPHER_ALT 0x0080C20001000001ULL ++/* cipher IDs as per IEEE802.1AEbn-2011 */ ++#define MACSEC_CIPHER_ID_GCM_AES_128 0x0080C20001000001ULL ++#define MACSEC_CIPHER_ID_GCM_AES_256 0x0080C20001000002ULL ++ ++/* deprecated cipher ID for GCM-AES-128 */ ++#define MACSEC_DEFAULT_CIPHER_ID 0x0080020001000001ULL ++#define MACSEC_DEFAULT_CIPHER_ALT MACSEC_CIPHER_ID_GCM_AES_128 + + #define MACSEC_MIN_ICV_LEN 8 + #define MACSEC_MAX_ICV_LEN 32 +diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h +index 4df96a7..67b61d9 100644 +--- a/include/uapi/linux/if_packet.h ++++ b/include/uapi/linux/if_packet.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_IF_PACKET_H + #define __LINUX_IF_PACKET_H + +diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h +index d5ecb42..be9b744 100644 +--- a/include/uapi/linux/if_tun.h ++++ b/include/uapi/linux/if_tun.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * Universal TUN/TAP device driver. + * Copyright (C) 1999-2000 Maxim Krasnyansky +@@ -56,10 +57,14 @@ + */ + #define TUNSETVNETBE _IOW('T', 222, int) + #define TUNGETVNETBE _IOR('T', 223, int) ++#define TUNSETSTEERINGEBPF _IOR('T', 224, int) ++#define TUNSETFILTEREBPF _IOR('T', 225, int) + + /* TUNSETIFF ifr flags */ + #define IFF_TUN 0x0001 + #define IFF_TAP 0x0002 ++#define IFF_NAPI 0x0010 ++#define IFF_NAPI_FRAGS 0x0020 + #define IFF_NO_PI 0x1000 + /* This flag has no real effect */ + #define IFF_ONE_QUEUE 0x2000 +diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h +index 21834ca..ecdc766 100644 +--- a/include/uapi/linux/if_tunnel.h ++++ b/include/uapi/linux/if_tunnel.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _IF_TUNNEL_H_ + #define _IF_TUNNEL_H_ + +@@ -84,6 +85,7 @@ enum tunnel_encap_types { + TUNNEL_ENCAP_NONE, + TUNNEL_ENCAP_FOU, + TUNNEL_ENCAP_GUE, ++ TUNNEL_ENCAP_MPLS, + }; + + #define TUNNEL_ENCAP_FLAG_CSUM (1<<0) +@@ -135,6 +137,9 @@ enum { + IFLA_GRE_IGNORE_DF, + IFLA_GRE_FWMARK, + IFLA_GRE_ERSPAN_INDEX, ++ IFLA_GRE_ERSPAN_VER, ++ IFLA_GRE_ERSPAN_DIR, ++ IFLA_GRE_ERSPAN_HWID, + __IFLA_GRE_MAX, + }; + +diff --git a/include/uapi/linux/if_vlan.h b/include/uapi/linux/if_vlan.h +index 24ae007..18a15da 100644 +--- a/include/uapi/linux/if_vlan.h ++++ b/include/uapi/linux/if_vlan.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * VLAN An implementation of 802.1Q VLAN tagging. + * +diff --git a/include/uapi/linux/ife.h b/include/uapi/linux/ife.h +index 2954da3..bdd953c 100644 +--- a/include/uapi/linux/ife.h ++++ b/include/uapi/linux/ife.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __UAPI_IFE_H + #define __UAPI_IFE_H + +diff --git a/include/uapi/linux/ila.h b/include/uapi/linux/ila.h +index 7e328d7..6a6c97c 100644 +--- a/include/uapi/linux/ila.h ++++ b/include/uapi/linux/ila.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* ila.h - ILA Interface */ + + #ifndef _LINUX_ILA_H +@@ -16,6 +17,8 @@ enum { + ILA_ATTR_DIR, /* u32 */ + ILA_ATTR_PAD, + ILA_ATTR_CSUM_MODE, /* u8 */ ++ ILA_ATTR_IDENT_TYPE, /* u8 */ ++ ILA_ATTR_HOOK_TYPE, /* u8 */ + + __ILA_ATTR_MAX, + }; +@@ -27,6 +30,7 @@ enum { + ILA_CMD_ADD, + ILA_CMD_DEL, + ILA_CMD_GET, ++ ILA_CMD_FLUSH, + + __ILA_CMD_MAX, + }; +@@ -40,6 +44,25 @@ enum { + ILA_CSUM_ADJUST_TRANSPORT, + ILA_CSUM_NEUTRAL_MAP, + ILA_CSUM_NO_ACTION, ++ ILA_CSUM_NEUTRAL_MAP_AUTO, ++}; ++ ++enum { ++ ILA_ATYPE_IID = 0, ++ ILA_ATYPE_LUID, ++ ILA_ATYPE_VIRT_V4, ++ ILA_ATYPE_VIRT_UNI_V6, ++ ILA_ATYPE_VIRT_MULTI_V6, ++ ILA_ATYPE_NONLOCAL_ADDR, ++ ILA_ATYPE_RSVD_1, ++ ILA_ATYPE_RSVD_2, ++ ++ ILA_ATYPE_USE_FORMAT = 32, /* Get type from type field in identifier */ ++}; ++ ++enum { ++ ILA_HOOK_ROUTE_OUTPUT, ++ ILA_HOOK_ROUTE_INPUT, + }; + + #endif /* _LINUX_ILA_H */ +diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h +index 9439efa..a4f143b 100644 +--- a/include/uapi/linux/in.h ++++ b/include/uapi/linux/in.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket +diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h +index 6f3bdee..409bb3f 100644 +--- a/include/uapi/linux/in6.h ++++ b/include/uapi/linux/in6.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * Types and definitions for AF_INET6 + * Linux INET6 implementation +@@ -284,6 +285,7 @@ struct in6_flowlabel_req { + #define IPV6_TRANSPARENT 75 + #define IPV6_UNICAST_IF 76 + #define IPV6_RECVFRAGSIZE 77 ++#define IPV6_FREEBIND 78 + + /* + * Multicast Routing: +diff --git a/include/uapi/linux/in_route.h b/include/uapi/linux/in_route.h +index b261b8c..0cc2c23 100644 +--- a/include/uapi/linux/in_route.h ++++ b/include/uapi/linux/in_route.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _LINUX_IN_ROUTE_H + #define _LINUX_IN_ROUTE_H + +diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h +index bada4d7..f98d82d 100644 +--- a/include/uapi/linux/inet_diag.h ++++ b/include/uapi/linux/inet_diag.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _INET_DIAG_H_ + #define _INET_DIAG_H_ + +@@ -91,6 +92,8 @@ enum { + INET_DIAG_BC_D_COND, + INET_DIAG_BC_DEV_COND, /* u32 ifindex */ + INET_DIAG_BC_MARK_COND, ++ INET_DIAG_BC_S_EQ, ++ INET_DIAG_BC_D_EQ, + }; + + struct inet_diag_hostcond { +diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h +index 1907284..883fd33 100644 +--- a/include/uapi/linux/ip.h ++++ b/include/uapi/linux/ip.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket +diff --git a/include/uapi/linux/ip6_tunnel.h b/include/uapi/linux/ip6_tunnel.h +index 425926c..0245269 100644 +--- a/include/uapi/linux/ip6_tunnel.h ++++ b/include/uapi/linux/ip6_tunnel.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _IP6_TUNNEL_H + #define _IP6_TUNNEL_H + +@@ -20,6 +21,8 @@ + #define IP6_TNL_F_RCV_DSCP_COPY 0x10 + /* copy fwmark from inner packet */ + #define IP6_TNL_F_USE_ORIG_FWMARK 0x20 ++/* allow remote endpoint on the local node */ ++#define IP6_TNL_F_ALLOW_LOCAL_REMOTE 0x40 + + struct ip6_tnl_parm { + char name[IFNAMSIZ]; /* name of tunnel device */ +diff --git a/include/uapi/linux/ipsec.h b/include/uapi/linux/ipsec.h +index d17a630..50d8ee1 100644 +--- a/include/uapi/linux/ipsec.h ++++ b/include/uapi/linux/ipsec.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _LINUX_IPSEC_H + #define _LINUX_IPSEC_H + +diff --git a/include/uapi/linux/kernel.h b/include/uapi/linux/kernel.h +index 527549f..d99ffa1 100644 +--- a/include/uapi/linux/kernel.h ++++ b/include/uapi/linux/kernel.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _LINUX_KERNEL_H + #define _LINUX_KERNEL_H + +diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h +index 8a80007..1fe52a7 100644 +--- a/include/uapi/linux/l2tp.h ++++ b/include/uapi/linux/l2tp.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * L2TP-over-IP socket for L2TPv3. + * +@@ -64,7 +65,7 @@ struct sockaddr_l2tpip6 { + * TUNNEL_MODIFY - CONN_ID, udpcsum + * TUNNEL_GETSTATS - CONN_ID, (stats) + * TUNNEL_GET - CONN_ID, (...) +- * SESSION_CREATE - SESSION_ID, PW_TYPE, offset, data_seq, cookie, peer_cookie, offset, l2spec ++ * SESSION_CREATE - SESSION_ID, PW_TYPE, data_seq, cookie, peer_cookie, l2spec + * SESSION_DELETE - SESSION_ID + * SESSION_MODIFY - SESSION_ID, data_seq + * SESSION_GET - SESSION_ID, (...) +@@ -93,10 +94,10 @@ enum { + L2TP_ATTR_NONE, /* no data */ + L2TP_ATTR_PW_TYPE, /* u16, enum l2tp_pwtype */ + L2TP_ATTR_ENCAP_TYPE, /* u16, enum l2tp_encap_type */ +- L2TP_ATTR_OFFSET, /* u16 */ ++ L2TP_ATTR_OFFSET, /* u16 (not used) */ + L2TP_ATTR_DATA_SEQ, /* u16 */ + L2TP_ATTR_L2SPEC_TYPE, /* u8, enum l2tp_l2spec_type */ +- L2TP_ATTR_L2SPEC_LEN, /* u8, enum l2tp_l2spec_type */ ++ L2TP_ATTR_L2SPEC_LEN, /* u8 (not used) */ + L2TP_ATTR_PROTO_VERSION, /* u8 */ + L2TP_ATTR_IFNAME, /* string */ + L2TP_ATTR_CONN_ID, /* u32 */ +diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h +index f38571d..a159991 100644 +--- a/include/uapi/linux/libc-compat.h ++++ b/include/uapi/linux/libc-compat.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * Compatibility interface for userspace libc header coordination: + * +@@ -167,46 +168,99 @@ + + /* If we did not see any headers from any supported C libraries, + * or we are being included in the kernel, then define everything +- * that we need. */ ++ * that we need. Check for previous __UAPI_* definitions to give ++ * unsupported C libraries a way to opt out of any kernel definition. */ + #else /* !defined(__GLIBC__) */ + + /* Definitions for if.h */ ++#ifndef __UAPI_DEF_IF_IFCONF + #define __UAPI_DEF_IF_IFCONF 1 ++#endif ++#ifndef __UAPI_DEF_IF_IFMAP + #define __UAPI_DEF_IF_IFMAP 1 ++#endif ++#ifndef __UAPI_DEF_IF_IFNAMSIZ + #define __UAPI_DEF_IF_IFNAMSIZ 1 ++#endif ++#ifndef __UAPI_DEF_IF_IFREQ + #define __UAPI_DEF_IF_IFREQ 1 ++#endif + /* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */ ++#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS + #define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1 ++#endif + /* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */ ++#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO + #define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1 ++#endif + + /* Definitions for in.h */ ++#ifndef __UAPI_DEF_IN_ADDR + #define __UAPI_DEF_IN_ADDR 1 ++#endif ++#ifndef __UAPI_DEF_IN_IPPROTO + #define __UAPI_DEF_IN_IPPROTO 1 ++#endif ++#ifndef __UAPI_DEF_IN_PKTINFO + #define __UAPI_DEF_IN_PKTINFO 1 ++#endif ++#ifndef __UAPI_DEF_IP_MREQ + #define __UAPI_DEF_IP_MREQ 1 ++#endif ++#ifndef __UAPI_DEF_SOCKADDR_IN + #define __UAPI_DEF_SOCKADDR_IN 1 ++#endif ++#ifndef __UAPI_DEF_IN_CLASS + #define __UAPI_DEF_IN_CLASS 1 ++#endif + + /* Definitions for in6.h */ ++#ifndef __UAPI_DEF_IN6_ADDR + #define __UAPI_DEF_IN6_ADDR 1 ++#endif ++#ifndef __UAPI_DEF_IN6_ADDR_ALT + #define __UAPI_DEF_IN6_ADDR_ALT 1 ++#endif ++#ifndef __UAPI_DEF_SOCKADDR_IN6 + #define __UAPI_DEF_SOCKADDR_IN6 1 ++#endif ++#ifndef __UAPI_DEF_IPV6_MREQ + #define __UAPI_DEF_IPV6_MREQ 1 ++#endif ++#ifndef __UAPI_DEF_IPPROTO_V6 + #define __UAPI_DEF_IPPROTO_V6 1 ++#endif ++#ifndef __UAPI_DEF_IPV6_OPTIONS + #define __UAPI_DEF_IPV6_OPTIONS 1 ++#endif ++#ifndef __UAPI_DEF_IN6_PKTINFO + #define __UAPI_DEF_IN6_PKTINFO 1 ++#endif ++#ifndef __UAPI_DEF_IP6_MTUINFO + #define __UAPI_DEF_IP6_MTUINFO 1 ++#endif + + /* Definitions for ipx.h */ ++#ifndef __UAPI_DEF_SOCKADDR_IPX + #define __UAPI_DEF_SOCKADDR_IPX 1 ++#endif ++#ifndef __UAPI_DEF_IPX_ROUTE_DEFINITION + #define __UAPI_DEF_IPX_ROUTE_DEFINITION 1 ++#endif ++#ifndef __UAPI_DEF_IPX_INTERFACE_DEFINITION + #define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1 ++#endif ++#ifndef __UAPI_DEF_IPX_CONFIG_DATA + #define __UAPI_DEF_IPX_CONFIG_DATA 1 ++#endif ++#ifndef __UAPI_DEF_IPX_ROUTE_DEF + #define __UAPI_DEF_IPX_ROUTE_DEF 1 ++#endif + + /* Definitions for xattr.h */ ++#ifndef __UAPI_DEF_XATTR + #define __UAPI_DEF_XATTR 1 ++#endif + + #endif /* __GLIBC__ */ + +diff --git a/include/uapi/linux/limits.h b/include/uapi/linux/limits.h +index 2d0f941..c3547f0 100644 +--- a/include/uapi/linux/limits.h ++++ b/include/uapi/linux/limits.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _LINUX_LIMITS_H + #define _LINUX_LIMITS_H + +diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h +index 3298426..3f3fe6f 100644 +--- a/include/uapi/linux/lwtunnel.h ++++ b/include/uapi/linux/lwtunnel.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _LWTUNNEL_H_ + #define _LWTUNNEL_H_ + +diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h +index e439565..1a6fee9 100644 +--- a/include/uapi/linux/magic.h ++++ b/include/uapi/linux/magic.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_MAGIC_H__ + #define __LINUX_MAGIC_H__ + +@@ -46,6 +47,7 @@ + #define OPENPROM_SUPER_MAGIC 0x9fa1 + #define QNX4_SUPER_MAGIC 0x002f /* qnx4 fs detection */ + #define QNX6_SUPER_MAGIC 0x68191122 /* qnx6 fs detection */ ++#define AFS_FS_MAGIC 0x6B414653 + + #define REISERFS_SUPER_MAGIC 0x52654973 /* used by gcc */ + /* used by file system utilities that +diff --git a/include/uapi/linux/mpls.h b/include/uapi/linux/mpls.h +index bf5b625..9effbf9 100644 +--- a/include/uapi/linux/mpls.h ++++ b/include/uapi/linux/mpls.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _MPLS_H + #define _MPLS_H + +diff --git a/include/uapi/linux/mpls_iptunnel.h b/include/uapi/linux/mpls_iptunnel.h +index 1a0e57b..2c69b7d 100644 +--- a/include/uapi/linux/mpls_iptunnel.h ++++ b/include/uapi/linux/mpls_iptunnel.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * mpls tunnel api + * +diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h +index 3199d28..904db61 100644 +--- a/include/uapi/linux/neighbour.h ++++ b/include/uapi/linux/neighbour.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_NEIGHBOUR_H + #define __LINUX_NEIGHBOUR_H + +diff --git a/include/uapi/linux/net_namespace.h b/include/uapi/linux/net_namespace.h +index 9a92b7e..6d64d07 100644 +--- a/include/uapi/linux/net_namespace.h ++++ b/include/uapi/linux/net_namespace.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* Copyright (c) 2015 6WIND S.A. + * Author: Nicolas Dichtel + * +diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h +index 4afbd7d..86ac1eb 100644 +--- a/include/uapi/linux/netconf.h ++++ b/include/uapi/linux/netconf.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _LINUX_NETCONF_H_ + #define _LINUX_NETCONF_H_ + +diff --git a/include/uapi/linux/netdevice.h b/include/uapi/linux/netdevice.h +index 66fceb4..86d961c 100644 +--- a/include/uapi/linux/netdevice.h ++++ b/include/uapi/linux/netdevice.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket +diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h +index ff4a4a5..36378a0 100644 +--- a/include/uapi/linux/netfilter.h ++++ b/include/uapi/linux/netfilter.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_NETFILTER_H + #define __LINUX_NETFILTER_H + +diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h +index a6c96b0..13eeada 100644 +--- a/include/uapi/linux/netfilter/ipset/ip_set.h ++++ b/include/uapi/linux/netfilter/ipset/ip_set.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* Copyright (C) 2000-2002 Joakim Axelsson + * Patrick Schaaf + * Martin Josefsson +diff --git a/include/uapi/linux/netfilter/x_tables.h b/include/uapi/linux/netfilter/x_tables.h +index 4120970..ae2fd12 100644 +--- a/include/uapi/linux/netfilter/x_tables.h ++++ b/include/uapi/linux/netfilter/x_tables.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _X_TABLES_H + #define _X_TABLES_H + #include +diff --git a/include/uapi/linux/netfilter/xt_set.h b/include/uapi/linux/netfilter/xt_set.h +index d4e0234..8c1ca66 100644 +--- a/include/uapi/linux/netfilter/xt_set.h ++++ b/include/uapi/linux/netfilter/xt_set.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _XT_SET_H + #define _XT_SET_H + +diff --git a/include/uapi/linux/netfilter/xt_tcpudp.h b/include/uapi/linux/netfilter/xt_tcpudp.h +index 38aa7b3..658c169 100644 +--- a/include/uapi/linux/netfilter/xt_tcpudp.h ++++ b/include/uapi/linux/netfilter/xt_tcpudp.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _XT_TCPUDP_H + #define _XT_TCPUDP_H + +diff --git a/include/uapi/linux/netfilter_ipv4.h b/include/uapi/linux/netfilter_ipv4.h +index a5f4dc7..074e2c8 100644 +--- a/include/uapi/linux/netfilter_ipv4.h ++++ b/include/uapi/linux/netfilter_ipv4.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* IPv4-specific defines for netfilter. + * (C)1998 Rusty Russell -- This code is GPL. + */ +@@ -54,6 +55,7 @@ + + enum nf_ip_hook_priorities { + NF_IP_PRI_FIRST = INT_MIN, ++ NF_IP_PRI_RAW_BEFORE_DEFRAG = -450, + NF_IP_PRI_CONNTRACK_DEFRAG = -400, + NF_IP_PRI_RAW = -300, + NF_IP_PRI_SELINUX_FIRST = -225, +diff --git a/include/uapi/linux/netfilter_ipv4/ip_tables.h b/include/uapi/linux/netfilter_ipv4/ip_tables.h +index 456fb86..409cff7 100644 +--- a/include/uapi/linux/netfilter_ipv4/ip_tables.h ++++ b/include/uapi/linux/netfilter_ipv4/ip_tables.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * 25-Jul-1998 Major changes to allow for ip chain table + * +diff --git a/include/uapi/linux/netfilter_ipv6.h b/include/uapi/linux/netfilter_ipv6.h +index 8483d1d..92701fe 100644 +--- a/include/uapi/linux/netfilter_ipv6.h ++++ b/include/uapi/linux/netfilter_ipv6.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* IPv6-specific defines for netfilter. + * (C)1998 Rusty Russell -- This code is GPL. + * (C)1999 David Jeffery +@@ -59,6 +60,7 @@ + + enum nf_ip6_hook_priorities { + NF_IP6_PRI_FIRST = INT_MIN, ++ NF_IP6_PRI_RAW_BEFORE_DEFRAG = -450, + NF_IP6_PRI_CONNTRACK_DEFRAG = -400, + NF_IP6_PRI_RAW = -300, + NF_IP6_PRI_SELINUX_FIRST = -225, +diff --git a/include/uapi/linux/netfilter_ipv6/ip6_tables.h b/include/uapi/linux/netfilter_ipv6/ip6_tables.h +index fcc8cca..7ae314b 100644 +--- a/include/uapi/linux/netfilter_ipv6/ip6_tables.h ++++ b/include/uapi/linux/netfilter_ipv6/ip6_tables.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * 25-Jul-1998 Major changes to allow for ip chain table + * +diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h +index ec0690b..0b2c29b 100644 +--- a/include/uapi/linux/netlink.h ++++ b/include/uapi/linux/netlink.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_NETLINK_H + #define __LINUX_NETLINK_H + +diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h +index c8c8c7d..4cd0657 100644 +--- a/include/uapi/linux/netlink_diag.h ++++ b/include/uapi/linux/netlink_diag.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __NETLINK_DIAG_H__ + #define __NETLINK_DIAG_H__ + +diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h +index 0c5d5dd..349ddf0 100644 +--- a/include/uapi/linux/packet_diag.h ++++ b/include/uapi/linux/packet_diag.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __PACKET_DIAG_H__ + #define __PACKET_DIAG_H__ + +diff --git a/include/uapi/linux/param.h b/include/uapi/linux/param.h +index 092e92f..94e0c57 100644 +--- a/include/uapi/linux/param.h ++++ b/include/uapi/linux/param.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _LINUX_PARAM_H + #define _LINUX_PARAM_H + +diff --git a/include/uapi/linux/pfkeyv2.h b/include/uapi/linux/pfkeyv2.h +index ada7f01..d65b117 100644 +--- a/include/uapi/linux/pfkeyv2.h ++++ b/include/uapi/linux/pfkeyv2.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* PF_KEY user interface, this is defined by rfc2367 so + * do not make arbitrary modifications or else this header + * file will not be compliant. +diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h +index d5e2bf6..b451225 100644 +--- a/include/uapi/linux/pkt_cls.h ++++ b/include/uapi/linux/pkt_cls.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_PKT_CLS_H + #define __LINUX_PKT_CLS_H + +@@ -128,6 +129,7 @@ enum { + #define TCA_CLS_FLAGS_SKIP_SW (1 << 1) /* don't use filter in SW */ + #define TCA_CLS_FLAGS_IN_HW (1 << 2) /* filter is offloaded to HW */ + #define TCA_CLS_FLAGS_NOT_IN_HW (1 << 3) /* filter isn't offloaded to HW */ ++#define TCA_CLS_FLAGS_VERBOSE (1 << 4) /* verbose logging */ + + /* U32 filters */ + +@@ -467,6 +469,15 @@ enum { + TCA_FLOWER_KEY_IP_TTL, /* u8 */ + TCA_FLOWER_KEY_IP_TTL_MASK, /* u8 */ + ++ TCA_FLOWER_KEY_CVLAN_ID, /* be16 */ ++ TCA_FLOWER_KEY_CVLAN_PRIO, /* u8 */ ++ TCA_FLOWER_KEY_CVLAN_ETH_TYPE, /* be16 */ ++ ++ TCA_FLOWER_KEY_ENC_IP_TOS, /* u8 */ ++ TCA_FLOWER_KEY_ENC_IP_TOS_MASK, /* u8 */ ++ TCA_FLOWER_KEY_ENC_IP_TTL, /* u8 */ ++ TCA_FLOWER_KEY_ENC_IP_TTL_MASK, /* u8 */ ++ + __TCA_FLOWER_MAX, + }; + +@@ -474,6 +485,7 @@ enum { + + enum { + TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), ++ TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), + }; + + /* Match-all classifier */ +@@ -554,7 +566,8 @@ enum { + #define TCF_EM_VLAN 6 + #define TCF_EM_CANID 7 + #define TCF_EM_IPSET 8 +-#define TCF_EM_MAX 8 ++#define TCF_EM_IPT 9 ++#define TCF_EM_MAX 9 + + enum { + TCF_EM_PROG_TC +diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h +index 099bf55..d9cc9dc 100644 +--- a/include/uapi/linux/pkt_sched.h ++++ b/include/uapi/linux/pkt_sched.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_PKT_SCHED_H + #define __LINUX_PKT_SCHED_H + +@@ -74,6 +75,7 @@ struct tc_estimator { + #define TC_H_INGRESS (0xFFFFFFF1U) + #define TC_H_CLSACT TC_H_INGRESS + ++#define TC_H_MIN_PRIORITY 0xFFE0U + #define TC_H_MIN_INGRESS 0xFFF2U + #define TC_H_MIN_EGRESS 0xFFF3U + +@@ -534,6 +536,10 @@ enum { + TCA_NETEM_ECN, + TCA_NETEM_RATE64, + TCA_NETEM_PAD, ++ TCA_NETEM_LATENCY64, ++ TCA_NETEM_JITTER64, ++ TCA_NETEM_SLOT, ++ TCA_NETEM_SLOT_DIST, + __TCA_NETEM_MAX, + }; + +@@ -571,6 +577,15 @@ struct tc_netem_rate { + __s32 cell_overhead; + }; + ++struct tc_netem_slot { ++ __s64 min_delay; /* nsec */ ++ __s64 max_delay; ++ __s32 max_packets; ++ __s32 max_bytes; ++ __s64 dist_delay; /* nsec */ ++ __s64 dist_jitter; /* nsec */ ++}; ++ + enum { + NETEM_LOSS_UNSPEC, + NETEM_LOSS_GI, /* General Intuitive - 4 state model */ +@@ -625,6 +640,22 @@ enum { + + #define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1) + ++enum { ++ TC_MQPRIO_MODE_DCB, ++ TC_MQPRIO_MODE_CHANNEL, ++ __TC_MQPRIO_MODE_MAX ++}; ++ ++#define __TC_MQPRIO_MODE_MAX (__TC_MQPRIO_MODE_MAX - 1) ++ ++enum { ++ TC_MQPRIO_SHAPER_DCB, ++ TC_MQPRIO_SHAPER_BW_RATE, /* Add new shapers below */ ++ __TC_MQPRIO_SHAPER_MAX ++}; ++ ++#define __TC_MQPRIO_SHAPER_MAX (__TC_MQPRIO_SHAPER_MAX - 1) ++ + struct tc_mqprio_qopt { + __u8 num_tc; + __u8 prio_tc_map[TC_QOPT_BITMASK + 1]; +@@ -633,6 +664,22 @@ struct tc_mqprio_qopt { + __u16 offset[TC_QOPT_MAX_QUEUE]; + }; + ++#define TC_MQPRIO_F_MODE 0x1 ++#define TC_MQPRIO_F_SHAPER 0x2 ++#define TC_MQPRIO_F_MIN_RATE 0x4 ++#define TC_MQPRIO_F_MAX_RATE 0x8 ++ ++enum { ++ TCA_MQPRIO_UNSPEC, ++ TCA_MQPRIO_MODE, ++ TCA_MQPRIO_SHAPER, ++ TCA_MQPRIO_MIN_RATE64, ++ TCA_MQPRIO_MAX_RATE64, ++ __TCA_MQPRIO_MAX, ++}; ++ ++#define TCA_MQPRIO_MAX (__TCA_MQPRIO_MAX - 1) ++ + /* SFB */ + + enum { +@@ -871,4 +918,155 @@ struct tc_pie_xstats { + __u32 maxq; /* maximum queue size */ + __u32 ecn_mark; /* packets marked with ecn*/ + }; ++ ++/* CBS */ ++struct tc_cbs_qopt { ++ __u8 offload; ++ __u8 _pad[3]; ++ __s32 hicredit; ++ __s32 locredit; ++ __s32 idleslope; ++ __s32 sendslope; ++}; ++ ++enum { ++ TCA_CBS_UNSPEC, ++ TCA_CBS_PARMS, ++ __TCA_CBS_MAX, ++}; ++ ++#define TCA_CBS_MAX (__TCA_CBS_MAX - 1) ++ ++ ++/* ETF */ ++struct tc_etf_qopt { ++ __s32 delta; ++ __s32 clockid; ++ __u32 flags; ++#define TC_ETF_DEADLINE_MODE_ON BIT(0) ++#define TC_ETF_OFFLOAD_ON BIT(1) ++}; ++ ++enum { ++ TCA_ETF_UNSPEC, ++ TCA_ETF_PARMS, ++ __TCA_ETF_MAX, ++}; ++ ++#define TCA_ETF_MAX (__TCA_ETF_MAX - 1) ++ ++ ++/* CAKE */ ++enum { ++ TCA_CAKE_UNSPEC, ++ TCA_CAKE_PAD, ++ TCA_CAKE_BASE_RATE64, ++ TCA_CAKE_DIFFSERV_MODE, ++ TCA_CAKE_ATM, ++ TCA_CAKE_FLOW_MODE, ++ TCA_CAKE_OVERHEAD, ++ TCA_CAKE_RTT, ++ TCA_CAKE_TARGET, ++ TCA_CAKE_AUTORATE, ++ TCA_CAKE_MEMORY, ++ TCA_CAKE_NAT, ++ TCA_CAKE_RAW, ++ TCA_CAKE_WASH, ++ TCA_CAKE_MPU, ++ TCA_CAKE_INGRESS, ++ TCA_CAKE_ACK_FILTER, ++ TCA_CAKE_SPLIT_GSO, ++ __TCA_CAKE_MAX ++}; ++#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1) ++ ++enum { ++ __TCA_CAKE_STATS_INVALID, ++ TCA_CAKE_STATS_PAD, ++ TCA_CAKE_STATS_CAPACITY_ESTIMATE64, ++ TCA_CAKE_STATS_MEMORY_LIMIT, ++ TCA_CAKE_STATS_MEMORY_USED, ++ TCA_CAKE_STATS_AVG_NETOFF, ++ TCA_CAKE_STATS_MIN_NETLEN, ++ TCA_CAKE_STATS_MAX_NETLEN, ++ TCA_CAKE_STATS_MIN_ADJLEN, ++ TCA_CAKE_STATS_MAX_ADJLEN, ++ TCA_CAKE_STATS_TIN_STATS, ++ TCA_CAKE_STATS_DEFICIT, ++ TCA_CAKE_STATS_COBALT_COUNT, ++ TCA_CAKE_STATS_DROPPING, ++ TCA_CAKE_STATS_DROP_NEXT_US, ++ TCA_CAKE_STATS_P_DROP, ++ TCA_CAKE_STATS_BLUE_TIMER_US, ++ __TCA_CAKE_STATS_MAX ++}; ++#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1) ++ ++enum { ++ __TCA_CAKE_TIN_STATS_INVALID, ++ TCA_CAKE_TIN_STATS_PAD, ++ TCA_CAKE_TIN_STATS_SENT_PACKETS, ++ TCA_CAKE_TIN_STATS_SENT_BYTES64, ++ TCA_CAKE_TIN_STATS_DROPPED_PACKETS, ++ TCA_CAKE_TIN_STATS_DROPPED_BYTES64, ++ TCA_CAKE_TIN_STATS_ACKS_DROPPED_PACKETS, ++ TCA_CAKE_TIN_STATS_ACKS_DROPPED_BYTES64, ++ TCA_CAKE_TIN_STATS_ECN_MARKED_PACKETS, ++ TCA_CAKE_TIN_STATS_ECN_MARKED_BYTES64, ++ TCA_CAKE_TIN_STATS_BACKLOG_PACKETS, ++ TCA_CAKE_TIN_STATS_BACKLOG_BYTES, ++ TCA_CAKE_TIN_STATS_THRESHOLD_RATE64, ++ TCA_CAKE_TIN_STATS_TARGET_US, ++ TCA_CAKE_TIN_STATS_INTERVAL_US, ++ TCA_CAKE_TIN_STATS_WAY_INDIRECT_HITS, ++ TCA_CAKE_TIN_STATS_WAY_MISSES, ++ TCA_CAKE_TIN_STATS_WAY_COLLISIONS, ++ TCA_CAKE_TIN_STATS_PEAK_DELAY_US, ++ TCA_CAKE_TIN_STATS_AVG_DELAY_US, ++ TCA_CAKE_TIN_STATS_BASE_DELAY_US, ++ TCA_CAKE_TIN_STATS_SPARSE_FLOWS, ++ TCA_CAKE_TIN_STATS_BULK_FLOWS, ++ TCA_CAKE_TIN_STATS_UNRESPONSIVE_FLOWS, ++ TCA_CAKE_TIN_STATS_MAX_SKBLEN, ++ TCA_CAKE_TIN_STATS_FLOW_QUANTUM, ++ __TCA_CAKE_TIN_STATS_MAX ++}; ++#define TCA_CAKE_TIN_STATS_MAX (__TCA_CAKE_TIN_STATS_MAX - 1) ++#define TC_CAKE_MAX_TINS (8) ++ ++enum { ++ CAKE_FLOW_NONE = 0, ++ CAKE_FLOW_SRC_IP, ++ CAKE_FLOW_DST_IP, ++ CAKE_FLOW_HOSTS, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_DST_IP */ ++ CAKE_FLOW_FLOWS, ++ CAKE_FLOW_DUAL_SRC, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_FLOWS */ ++ CAKE_FLOW_DUAL_DST, /* = CAKE_FLOW_DST_IP | CAKE_FLOW_FLOWS */ ++ CAKE_FLOW_TRIPLE, /* = CAKE_FLOW_HOSTS | CAKE_FLOW_FLOWS */ ++ CAKE_FLOW_MAX, ++}; ++ ++enum { ++ CAKE_DIFFSERV_DIFFSERV3 = 0, ++ CAKE_DIFFSERV_DIFFSERV4, ++ CAKE_DIFFSERV_DIFFSERV8, ++ CAKE_DIFFSERV_BESTEFFORT, ++ CAKE_DIFFSERV_PRECEDENCE, ++ CAKE_DIFFSERV_MAX ++}; ++ ++enum { ++ CAKE_ACK_NONE = 0, ++ CAKE_ACK_FILTER, ++ CAKE_ACK_AGGRESSIVE, ++ CAKE_ACK_MAX ++}; ++ ++enum { ++ CAKE_ATM_NONE = 0, ++ CAKE_ATM_ATM, ++ CAKE_ATM_PTM, ++ CAKE_ATM_MAX ++}; ++ + #endif +diff --git a/include/uapi/linux/posix_types.h b/include/uapi/linux/posix_types.h +index 988f76e..9a7a740 100644 +--- a/include/uapi/linux/posix_types.h ++++ b/include/uapi/linux/posix_types.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _LINUX_POSIX_TYPES_H + #define _LINUX_POSIX_TYPES_H + +diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h +index 813e9e0..c3a7d8e 100644 +--- a/include/uapi/linux/rtnetlink.h ++++ b/include/uapi/linux/rtnetlink.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_RTNETLINK_H + #define __LINUX_RTNETLINK_H + +@@ -253,6 +254,11 @@ enum { + #define RTPROT_DHCP 16 /* DHCP client */ + #define RTPROT_MROUTED 17 /* Multicast daemon */ + #define RTPROT_BABEL 42 /* Babel daemon */ ++#define RTPROT_BGP 186 /* BGP Routes */ ++#define RTPROT_ISIS 187 /* ISIS Routes */ ++#define RTPROT_OSPF 188 /* OSPF Routes */ ++#define RTPROT_RIP 189 /* RIP Routes */ ++#define RTPROT_EIGRP 192 /* EIGRP Routes */ + + /* rtm_scope + +@@ -326,6 +332,9 @@ enum rtattr_type_t { + RTA_PAD, + RTA_UID, + RTA_TTL_PROPAGATE, ++ RTA_IP_PROTO, ++ RTA_SPORT, ++ RTA_DPORT, + __RTA_MAX + }; + +@@ -430,6 +439,8 @@ enum { + #define RTAX_QUICKACK RTAX_QUICKACK + RTAX_CC_ALGO, + #define RTAX_CC_ALGO RTAX_CC_ALGO ++ RTAX_FASTOPEN_NO_COOKIE, ++#define RTAX_FASTOPEN_NO_COOKIE RTAX_FASTOPEN_NO_COOKIE + __RTAX_MAX + }; + +@@ -538,9 +549,19 @@ struct tcmsg { + int tcm_ifindex; + __u32 tcm_handle; + __u32 tcm_parent; ++/* tcm_block_index is used instead of tcm_parent ++ * in case tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK ++ */ ++#define tcm_block_index tcm_parent + __u32 tcm_info; + }; + ++/* For manipulation of filters in shared block, tcm_ifindex is set to ++ * TCM_IFINDEX_MAGIC_BLOCK, and tcm_parent is aliased to tcm_block_index ++ * which is the block index. ++ */ ++#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU) ++ + enum { + TCA_UNSPEC, + TCA_KIND, +@@ -554,6 +575,9 @@ enum { + TCA_PAD, + TCA_DUMP_INVISIBLE, + TCA_CHAIN, ++ TCA_HW_OFFLOAD, ++ TCA_INGRESS_BLOCK, ++ TCA_EGRESS_BLOCK, + __TCA_MAX + }; + +diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h +index fec24c4..dd164d7 100644 +--- a/include/uapi/linux/sctp.h ++++ b/include/uapi/linux/sctp.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* SCTP kernel implementation + * (C) Copyright IBM Corp. 2001, 2004 + * Copyright (c) 1999-2000 Cisco, Inc. +@@ -98,6 +99,8 @@ typedef __s32 sctp_assoc_t; + #define SCTP_RECVRCVINFO 32 + #define SCTP_RECVNXTINFO 33 + #define SCTP_DEFAULT_SNDINFO 34 ++#define SCTP_AUTH_DEACTIVATE_KEY 35 ++#define SCTP_REUSE_PORT 36 + + /* Internal Socket Options. Some of the sctp library functions are + * implemented using these socket options. +@@ -122,6 +125,10 @@ typedef __s32 sctp_assoc_t; + #define SCTP_RESET_ASSOC 120 + #define SCTP_ADD_STREAMS 121 + #define SCTP_SOCKOPT_PEELOFF_FLAGS 122 ++#define SCTP_STREAM_SCHEDULER 123 ++#define SCTP_STREAM_SCHEDULER_VALUE 124 ++#define SCTP_INTERLEAVING_SUPPORTED 125 ++#define SCTP_SENDMSG_CONNECT 126 + + /* PR-SCTP policies */ + #define SCTP_PR_SCTP_NONE 0x0000 +@@ -256,6 +263,31 @@ struct sctp_nxtinfo { + sctp_assoc_t nxt_assoc_id; + }; + ++/* 5.3.7 SCTP PR-SCTP Information Structure (SCTP_PRINFO) ++ * ++ * This cmsghdr structure specifies SCTP options for sendmsg(). ++ * ++ * cmsg_level cmsg_type cmsg_data[] ++ * ------------ ------------ ------------------- ++ * IPPROTO_SCTP SCTP_PRINFO struct sctp_prinfo ++ */ ++struct sctp_prinfo { ++ __u16 pr_policy; ++ __u32 pr_value; ++}; ++ ++/* 5.3.8 SCTP AUTH Information Structure (SCTP_AUTHINFO) ++ * ++ * This cmsghdr structure specifies SCTP options for sendmsg(). ++ * ++ * cmsg_level cmsg_type cmsg_data[] ++ * ------------ ------------ ------------------- ++ * IPPROTO_SCTP SCTP_AUTHINFO struct sctp_authinfo ++ */ ++struct sctp_authinfo { ++ __u16 auth_keynumber; ++}; ++ + /* + * sinfo_flags: 16 bits (unsigned integer) + * +@@ -267,6 +299,8 @@ enum sctp_sinfo_flags { + SCTP_ADDR_OVER = (1 << 1), /* Override the primary destination. */ + SCTP_ABORT = (1 << 2), /* Send an ABORT message to the peer. */ + SCTP_SACK_IMMEDIATELY = (1 << 3), /* SACK should be sent without delay. */ ++ /* 2 bits here have been used by SCTP_PR_SCTP_MASK */ ++ SCTP_SENDALL = (1 << 6), + SCTP_NOTIFICATION = MSG_NOTIFICATION, /* Next message is not user msg but notification. */ + SCTP_EOF = MSG_FIN, /* Initiate graceful shutdown process. */ + }; +@@ -289,6 +323,14 @@ typedef enum sctp_cmsg_type { + #define SCTP_RCVINFO SCTP_RCVINFO + SCTP_NXTINFO, /* 5.3.6 SCTP Next Receive Information Structure */ + #define SCTP_NXTINFO SCTP_NXTINFO ++ SCTP_PRINFO, /* 5.3.7 SCTP PR-SCTP Information Structure */ ++#define SCTP_PRINFO SCTP_PRINFO ++ SCTP_AUTHINFO, /* 5.3.8 SCTP AUTH Information Structure */ ++#define SCTP_AUTHINFO SCTP_AUTHINFO ++ SCTP_DSTADDRV4, /* 5.3.9 SCTP Destination IPv4 Address Structure */ ++#define SCTP_DSTADDRV4 SCTP_DSTADDRV4 ++ SCTP_DSTADDRV6, /* 5.3.10 SCTP Destination IPv6 Address Structure */ ++#define SCTP_DSTADDRV6 SCTP_DSTADDRV6 + } sctp_cmsg_t; + + /* +@@ -376,7 +418,7 @@ struct sctp_remote_error { + __u16 sre_type; + __u16 sre_flags; + __u32 sre_length; +- __u16 sre_error; ++ __be16 sre_error; + sctp_assoc_t sre_assoc_id; + __u8 sre_data[0]; + }; +@@ -456,6 +498,8 @@ struct sctp_pdapi_event { + __u32 pdapi_length; + __u32 pdapi_indication; + sctp_assoc_t pdapi_assoc_id; ++ __u32 pdapi_stream; ++ __u32 pdapi_seq; + }; + + enum { SCTP_PARTIAL_DELIVERY_ABORTED=0, }; +@@ -476,7 +520,12 @@ struct sctp_authkey_event { + sctp_assoc_t auth_assoc_id; + }; + +-enum { SCTP_AUTH_NEWKEY = 0, }; ++enum { ++ SCTP_AUTH_NEW_KEY, ++#define SCTP_AUTH_NEWKEY SCTP_AUTH_NEW_KEY /* compatible with before */ ++ SCTP_AUTH_FREE_KEY, ++ SCTP_AUTH_NO_AUTH, ++}; + + /* + * 6.1.9. SCTP_SENDER_DRY_EVENT +@@ -714,6 +763,8 @@ enum sctp_spp_flags { + SPP_SACKDELAY_DISABLE = 1<<6, /*Disable SACK*/ + SPP_SACKDELAY = SPP_SACKDELAY_ENABLE | SPP_SACKDELAY_DISABLE, + SPP_HB_TIME_IS_ZERO = 1<<7, /* Set HB delay to 0 */ ++ SPP_IPV6_FLOWLABEL = 1<<8, ++ SPP_DSCP = 1<<9, + }; + + struct sctp_paddrparams { +@@ -724,6 +775,8 @@ struct sctp_paddrparams { + __u32 spp_pathmtu; + __u32 spp_sackdelay; + __u32 spp_flags; ++ __u32 spp_ipv6_flowlabel; ++ __u8 spp_dscp; + } __attribute__((packed, aligned(4))); + + /* +@@ -812,6 +865,12 @@ struct sctp_assoc_value { + uint32_t assoc_value; + }; + ++struct sctp_stream_value { ++ sctp_assoc_t assoc_id; ++ uint16_t stream_id; ++ uint16_t stream_value; ++}; ++ + /* + * 7.2.2 Peer Address Information + * +@@ -1082,4 +1141,12 @@ struct sctp_add_streams { + uint16_t sas_outstrms; + }; + ++/* SCTP Stream schedulers */ ++enum sctp_sched_type { ++ SCTP_SS_FCFS, ++ SCTP_SS_PRIO, ++ SCTP_SS_RR, ++ SCTP_SS_MAX = SCTP_SS_RR ++}; ++ + #endif /* _SCTP_H */ +diff --git a/include/uapi/linux/seg6.h b/include/uapi/linux/seg6.h +index 0715279..329163e 100644 +--- a/include/uapi/linux/seg6.h ++++ b/include/uapi/linux/seg6.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * SR-IPv6 implementation + * +@@ -25,9 +26,9 @@ struct ipv6_sr_hdr { + __u8 hdrlen; + __u8 type; + __u8 segments_left; +- __u8 first_segment; ++ __u8 first_segment; /* Represents the last_entry field of SRH */ + __u8 flags; +- __u16 reserved; ++ __u16 tag; + + struct in6_addr segments[0]; + }; +diff --git a/include/uapi/linux/seg6_genl.h b/include/uapi/linux/seg6_genl.h +index 99382f9..0c23052 100644 +--- a/include/uapi/linux/seg6_genl.h ++++ b/include/uapi/linux/seg6_genl.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _LINUX_SEG6_GENL_H + #define _LINUX_SEG6_GENL_H + +diff --git a/include/uapi/linux/seg6_hmac.h b/include/uapi/linux/seg6_hmac.h +index 704f93e..3fb3412 100644 +--- a/include/uapi/linux/seg6_hmac.h ++++ b/include/uapi/linux/seg6_hmac.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _LINUX_SEG6_HMAC_H + #define _LINUX_SEG6_HMAC_H + +diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h +index a5dc05a..3004e98 100644 +--- a/include/uapi/linux/seg6_iptunnel.h ++++ b/include/uapi/linux/seg6_iptunnel.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * SR-IPv6 implementation + * +diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h +index 76b90d6..5312de8 100644 +--- a/include/uapi/linux/seg6_local.h ++++ b/include/uapi/linux/seg6_local.h +@@ -25,6 +25,7 @@ enum { + SEG6_LOCAL_NH6, + SEG6_LOCAL_IIF, + SEG6_LOCAL_OIF, ++ SEG6_LOCAL_BPF, + __SEG6_LOCAL_MAX, + }; + #define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1) +@@ -59,10 +60,21 @@ enum { + SEG6_LOCAL_ACTION_END_AS = 13, + /* forward to SR-unaware VNF with masquerading */ + SEG6_LOCAL_ACTION_END_AM = 14, ++ /* custom BPF action */ ++ SEG6_LOCAL_ACTION_END_BPF = 15, + + __SEG6_LOCAL_ACTION_MAX, + }; + + #define SEG6_LOCAL_ACTION_MAX (__SEG6_LOCAL_ACTION_MAX - 1) + ++enum { ++ SEG6_LOCAL_BPF_PROG_UNSPEC, ++ SEG6_LOCAL_BPF_PROG, ++ SEG6_LOCAL_BPF_PROG_NAME, ++ __SEG6_LOCAL_BPF_PROG_MAX, ++}; ++ ++#define SEG6_LOCAL_BPF_PROG_MAX (__SEG6_LOCAL_BPF_PROG_MAX - 1) ++ + #endif +diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h +index 901231e..a69cf20 100644 +--- a/include/uapi/linux/sock_diag.h ++++ b/include/uapi/linux/sock_diag.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __SOCK_DIAG_H__ + #define __SOCK_DIAG_H__ + +diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h +index 8c1e501..268b948 100644 +--- a/include/uapi/linux/socket.h ++++ b/include/uapi/linux/socket.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _LINUX_SOCKET_H + #define _LINUX_SOCKET_H + +diff --git a/include/uapi/linux/sockios.h b/include/uapi/linux/sockios.h +index 79d029d..d393e9e 100644 +--- a/include/uapi/linux/sockios.h ++++ b/include/uapi/linux/sockios.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket +diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h +index 4bb69de..23e025f 100644 +--- a/include/uapi/linux/stddef.h ++++ b/include/uapi/linux/stddef.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + + + #ifndef __always_inline +diff --git a/include/uapi/linux/sysinfo.h b/include/uapi/linux/sysinfo.h +index 934335a..435d5c2 100644 +--- a/include/uapi/linux/sysinfo.h ++++ b/include/uapi/linux/sysinfo.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _LINUX_SYSINFO_H + #define _LINUX_SYSINFO_H + +diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h +index 8dc2ac0..6e89a5d 100644 +--- a/include/uapi/linux/tc_act/tc_bpf.h ++++ b/include/uapi/linux/tc_act/tc_bpf.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * Copyright (c) 2015 Jiri Pirko + * +diff --git a/include/uapi/linux/tc_act/tc_connmark.h b/include/uapi/linux/tc_act/tc_connmark.h +index 62a5e94..80caa47 100644 +--- a/include/uapi/linux/tc_act/tc_connmark.h ++++ b/include/uapi/linux/tc_act/tc_connmark.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __UAPI_TC_CONNMARK_H + #define __UAPI_TC_CONNMARK_H + +diff --git a/include/uapi/linux/tc_act/tc_csum.h b/include/uapi/linux/tc_act/tc_csum.h +index a11bb35..0ecf4d2 100644 +--- a/include/uapi/linux/tc_act/tc_csum.h ++++ b/include/uapi/linux/tc_act/tc_csum.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_TC_CSUM_H + #define __LINUX_TC_CSUM_H + +diff --git a/include/uapi/linux/tc_act/tc_defact.h b/include/uapi/linux/tc_act/tc_defact.h +index d2a3abb..e3ecd8b 100644 +--- a/include/uapi/linux/tc_act/tc_defact.h ++++ b/include/uapi/linux/tc_act/tc_defact.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_TC_DEF_H + #define __LINUX_TC_DEF_H + +diff --git a/include/uapi/linux/tc_act/tc_gact.h b/include/uapi/linux/tc_act/tc_gact.h +index 70b536a..94273c3 100644 +--- a/include/uapi/linux/tc_act/tc_gact.h ++++ b/include/uapi/linux/tc_act/tc_gact.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_TC_GACT_H + #define __LINUX_TC_GACT_H + +diff --git a/include/uapi/linux/tc_act/tc_ife.h b/include/uapi/linux/tc_act/tc_ife.h +index 7c28178..2f48490 100644 +--- a/include/uapi/linux/tc_act/tc_ife.h ++++ b/include/uapi/linux/tc_act/tc_ife.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __UAPI_TC_IFE_H + #define __UAPI_TC_IFE_H + +diff --git a/include/uapi/linux/tc_act/tc_ipt.h b/include/uapi/linux/tc_act/tc_ipt.h +index 7c6e155..b743c8b 100644 +--- a/include/uapi/linux/tc_act/tc_ipt.h ++++ b/include/uapi/linux/tc_act/tc_ipt.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_TC_IPT_H + #define __LINUX_TC_IPT_H + +diff --git a/include/uapi/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h +index 3d7a2b3..5dd671c 100644 +--- a/include/uapi/linux/tc_act/tc_mirred.h ++++ b/include/uapi/linux/tc_act/tc_mirred.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_TC_MIR_H + #define __LINUX_TC_MIR_H + +@@ -9,13 +10,13 @@ + #define TCA_EGRESS_MIRROR 2 /* mirror packet to EGRESS */ + #define TCA_INGRESS_REDIR 3 /* packet redirect to INGRESS*/ + #define TCA_INGRESS_MIRROR 4 /* mirror packet to INGRESS */ +- ++ + struct tc_mirred { + tc_gen; + int eaction; /* one of IN/EGRESS_MIRROR/REDIR */ + __u32 ifindex; /* ifindex of egress port */ + }; +- ++ + enum { + TCA_MIRRED_UNSPEC, + TCA_MIRRED_TM, +@@ -24,5 +25,5 @@ enum { + __TCA_MIRRED_MAX + }; + #define TCA_MIRRED_MAX (__TCA_MIRRED_MAX - 1) +- ++ + #endif +diff --git a/include/uapi/linux/tc_act/tc_nat.h b/include/uapi/linux/tc_act/tc_nat.h +index 923457c..086be84 100644 +--- a/include/uapi/linux/tc_act/tc_nat.h ++++ b/include/uapi/linux/tc_act/tc_nat.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_TC_NAT_H + #define __LINUX_TC_NAT_H + +diff --git a/include/uapi/linux/tc_act/tc_pedit.h b/include/uapi/linux/tc_act/tc_pedit.h +index 143d2b3..24ec792 100644 +--- a/include/uapi/linux/tc_act/tc_pedit.h ++++ b/include/uapi/linux/tc_act/tc_pedit.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_TC_PED_H + #define __LINUX_TC_PED_H + +@@ -16,13 +17,15 @@ enum { + TCA_PEDIT_KEY_EX, + __TCA_PEDIT_MAX + }; ++ + #define TCA_PEDIT_MAX (__TCA_PEDIT_MAX - 1) +- ++ + enum { + TCA_PEDIT_KEY_EX_HTYPE = 1, + TCA_PEDIT_KEY_EX_CMD = 2, + __TCA_PEDIT_KEY_EX_MAX + }; ++ + #define TCA_PEDIT_KEY_EX_MAX (__TCA_PEDIT_KEY_EX_MAX - 1) + + /* TCA_PEDIT_KEY_EX_HDR_TYPE_NETWROK is a special case for legacy users. It +@@ -37,6 +40,7 @@ enum pedit_header_type { + TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5, + __PEDIT_HDR_TYPE_MAX, + }; ++ + #define TCA_PEDIT_HDR_TYPE_MAX (__PEDIT_HDR_TYPE_MAX - 1) + + enum pedit_cmd { +@@ -44,6 +48,7 @@ enum pedit_cmd { + TCA_PEDIT_KEY_EX_CMD_ADD = 1, + __PEDIT_CMD_MAX, + }; ++ + #define TCA_PEDIT_CMD_MAX (__PEDIT_CMD_MAX - 1) + + struct tc_pedit_key { +@@ -54,13 +59,14 @@ struct tc_pedit_key { + __u32 offmask; + __u32 shift; + }; +- ++ + struct tc_pedit_sel { + tc_gen; + unsigned char nkeys; + unsigned char flags; + struct tc_pedit_key keys[0]; + }; ++ + #define tc_pedit tc_pedit_sel + + #endif +diff --git a/include/uapi/linux/tc_act/tc_sample.h b/include/uapi/linux/tc_act/tc_sample.h +index edc9058..bd7e9f0 100644 +--- a/include/uapi/linux/tc_act/tc_sample.h ++++ b/include/uapi/linux/tc_act/tc_sample.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_TC_SAMPLE_H + #define __LINUX_TC_SAMPLE_H + +diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h +index 2884425..6de6071 100644 +--- a/include/uapi/linux/tc_act/tc_skbedit.h ++++ b/include/uapi/linux/tc_act/tc_skbedit.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * Copyright (c) 2008, Intel Corporation. + * +@@ -29,6 +30,7 @@ + #define SKBEDIT_F_MARK 0x4 + #define SKBEDIT_F_PTYPE 0x8 + #define SKBEDIT_F_MASK 0x10 ++#define SKBEDIT_F_INHERITDSFIELD 0x20 + + struct tc_skbedit { + tc_gen; +@@ -44,6 +46,7 @@ enum { + TCA_SKBEDIT_PAD, + TCA_SKBEDIT_PTYPE, + TCA_SKBEDIT_MASK, ++ TCA_SKBEDIT_FLAGS, + __TCA_SKBEDIT_MAX + }; + #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1) +diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h +index 10fc07d..38c072f 100644 +--- a/include/uapi/linux/tc_act/tc_skbmod.h ++++ b/include/uapi/linux/tc_act/tc_skbmod.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * Copyright (c) 2016, Jamal Hadi Salim + * +diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h +index afcd4be..be384d6 100644 +--- a/include/uapi/linux/tc_act/tc_tunnel_key.h ++++ b/include/uapi/linux/tc_act/tc_tunnel_key.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * Copyright (c) 2016, Amir Vadai + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. +@@ -35,9 +36,37 @@ enum { + TCA_TUNNEL_KEY_PAD, + TCA_TUNNEL_KEY_ENC_DST_PORT, /* be16 */ + TCA_TUNNEL_KEY_NO_CSUM, /* u8 */ ++ TCA_TUNNEL_KEY_ENC_OPTS, /* Nested TCA_TUNNEL_KEY_ENC_OPTS_ ++ * attributes ++ */ ++ TCA_TUNNEL_KEY_ENC_TOS, /* u8 */ ++ TCA_TUNNEL_KEY_ENC_TTL, /* u8 */ + __TCA_TUNNEL_KEY_MAX, + }; + + #define TCA_TUNNEL_KEY_MAX (__TCA_TUNNEL_KEY_MAX - 1) + ++enum { ++ TCA_TUNNEL_KEY_ENC_OPTS_UNSPEC, ++ TCA_TUNNEL_KEY_ENC_OPTS_GENEVE, /* Nested ++ * TCA_TUNNEL_KEY_ENC_OPTS_ ++ * attributes ++ */ ++ __TCA_TUNNEL_KEY_ENC_OPTS_MAX, ++}; ++ ++#define TCA_TUNNEL_KEY_ENC_OPTS_MAX (__TCA_TUNNEL_KEY_ENC_OPTS_MAX - 1) ++ ++enum { ++ TCA_TUNNEL_KEY_ENC_OPT_GENEVE_UNSPEC, ++ TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS, /* be16 */ ++ TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE, /* u8 */ ++ TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA, /* 4 to 128 bytes */ ++ ++ __TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX, ++}; ++ ++#define TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX \ ++ (__TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX - 1) ++ + #endif +diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h +index bddb272..0d7b5fd 100644 +--- a/include/uapi/linux/tc_act/tc_vlan.h ++++ b/include/uapi/linux/tc_act/tc_vlan.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * Copyright (c) 2014 Jiri Pirko + * +diff --git a/include/uapi/linux/tc_ematch/tc_em_cmp.h b/include/uapi/linux/tc_ematch/tc_em_cmp.h +index f34bb1b..2549d9d 100644 +--- a/include/uapi/linux/tc_ematch/tc_em_cmp.h ++++ b/include/uapi/linux/tc_ematch/tc_em_cmp.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_TC_EM_CMP_H + #define __LINUX_TC_EM_CMP_H + +diff --git a/include/uapi/linux/tc_ematch/tc_em_ipt.h b/include/uapi/linux/tc_ematch/tc_em_ipt.h +new file mode 100644 +index 0000000..49a6553 +--- /dev/null ++++ b/include/uapi/linux/tc_ematch/tc_em_ipt.h +@@ -0,0 +1,20 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef __LINUX_TC_EM_IPT_H ++#define __LINUX_TC_EM_IPT_H ++ ++#include ++#include ++ ++enum { ++ TCA_EM_IPT_UNSPEC, ++ TCA_EM_IPT_HOOK, ++ TCA_EM_IPT_MATCH_NAME, ++ TCA_EM_IPT_MATCH_REVISION, ++ TCA_EM_IPT_NFPROTO, ++ TCA_EM_IPT_MATCH_DATA, ++ __TCA_EM_IPT_MAX ++}; ++ ++#define TCA_EM_IPT_MAX (__TCA_EM_IPT_MAX - 1) ++ ++#endif +diff --git a/include/uapi/linux/tc_ematch/tc_em_meta.h b/include/uapi/linux/tc_ematch/tc_em_meta.h +index b11f8ce..cf30b5b 100644 +--- a/include/uapi/linux/tc_ematch/tc_em_meta.h ++++ b/include/uapi/linux/tc_ematch/tc_em_meta.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_TC_EM_META_H + #define __LINUX_TC_EM_META_H + +diff --git a/include/uapi/linux/tc_ematch/tc_em_nbyte.h b/include/uapi/linux/tc_ematch/tc_em_nbyte.h +index 7172cfb..c76333f 100644 +--- a/include/uapi/linux/tc_ematch/tc_em_nbyte.h ++++ b/include/uapi/linux/tc_ematch/tc_em_nbyte.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __LINUX_TC_EM_NBYTE_H + #define __LINUX_TC_EM_NBYTE_H + +diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h +index 8edad3f..2e766cf 100644 +--- a/include/uapi/linux/tcp.h ++++ b/include/uapi/linux/tcp.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + /* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket +@@ -119,6 +120,12 @@ enum { + #define TCP_FASTOPEN_CONNECT 30 /* Attempt FastOpen with connect */ + #define TCP_ULP 31 /* Attach a ULP to a TCP connection */ + #define TCP_MD5SIG_EXT 32 /* TCP MD5 Signature with extensions */ ++#define TCP_FASTOPEN_KEY 33 /* Set the key for Fast Open (cookie) */ ++#define TCP_FASTOPEN_NO_COOKIE 34 /* Enable TFO without a TFO cookie */ ++#define TCP_ZEROCOPY_RECEIVE 35 ++#define TCP_INQ 36 /* Notify bytes available to read as a cmsg on read */ ++ ++#define TCP_CM_INQ TCP_INQ + + struct tcp_repair_opt { + __u32 opt_code; +@@ -221,6 +228,9 @@ struct tcp_info { + __u64 tcpi_busy_time; /* Time (usec) busy sending data */ + __u64 tcpi_rwnd_limited; /* Time (usec) limited by receive window */ + __u64 tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */ ++ ++ __u32 tcpi_delivered; ++ __u32 tcpi_delivered_ce; + }; + + /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ +@@ -238,6 +248,11 @@ enum { + TCP_NLA_MIN_RTT, /* minimum RTT */ + TCP_NLA_RECUR_RETRANS, /* Recurring retransmits for the current pkt */ + TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */ ++ TCP_NLA_SNDQ_SIZE, /* Data (bytes) pending in send queue */ ++ TCP_NLA_CA_STATE, /* ca_state of socket */ ++ TCP_NLA_SND_SSTHRESH, /* Slow start size threshold */ ++ TCP_NLA_DELIVERED, /* Data pkts delivered incl. out-of-order */ ++ TCP_NLA_DELIVERED_CE, /* Like above but only ones w/ CE marks */ + + }; + +@@ -265,4 +280,11 @@ struct tcp_diag_md5sig { + __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; + }; + ++/* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */ ++ ++struct tcp_zerocopy_receive { ++ __u64 address; /* in: address of mapping */ ++ __u32 length; /* in/out: number of bytes to map/mapped */ ++ __u32 recv_skip_hint; /* out: amount of bytes to skip */ ++}; + #endif /* _LINUX_TCP_H */ +diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h +index 80ad90d..7cb4a17 100644 +--- a/include/uapi/linux/tcp_metrics.h ++++ b/include/uapi/linux/tcp_metrics.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* tcp_metrics.h - TCP Metrics Interface */ + + #ifndef _LINUX_TCP_METRICS_H +diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h +index 924fb5c..7a166a0 100644 +--- a/include/uapi/linux/tipc.h ++++ b/include/uapi/linux/tipc.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ + /* + * include/uapi/linux/tipc.h: Header for TIPC socket interface + * +@@ -44,82 +45,38 @@ + * TIPC addressing primitives + */ + +-struct tipc_portid { ++struct tipc_socket_addr { + __u32 ref; + __u32 node; + }; + +-struct tipc_name { ++struct tipc_service_addr { + __u32 type; + __u32 instance; + }; + +-struct tipc_name_seq { ++struct tipc_service_range { + __u32 type; + __u32 lower; + __u32 upper; + }; + +-/* TIPC Address Size, Offset, Mask specification for Z.C.N +- */ +-#define TIPC_NODE_BITS 12 +-#define TIPC_CLUSTER_BITS 12 +-#define TIPC_ZONE_BITS 8 +- +-#define TIPC_NODE_OFFSET 0 +-#define TIPC_CLUSTER_OFFSET TIPC_NODE_BITS +-#define TIPC_ZONE_OFFSET (TIPC_CLUSTER_OFFSET + TIPC_CLUSTER_BITS) +- +-#define TIPC_NODE_SIZE ((1UL << TIPC_NODE_BITS) - 1) +-#define TIPC_CLUSTER_SIZE ((1UL << TIPC_CLUSTER_BITS) - 1) +-#define TIPC_ZONE_SIZE ((1UL << TIPC_ZONE_BITS) - 1) +- +-#define TIPC_NODE_MASK (TIPC_NODE_SIZE << TIPC_NODE_OFFSET) +-#define TIPC_CLUSTER_MASK (TIPC_CLUSTER_SIZE << TIPC_CLUSTER_OFFSET) +-#define TIPC_ZONE_MASK (TIPC_ZONE_SIZE << TIPC_ZONE_OFFSET) +- +-#define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK) +- +-static __inline__ __u32 tipc_addr(unsigned int zone, +- unsigned int cluster, +- unsigned int node) +-{ +- return (zone << TIPC_ZONE_OFFSET) | +- (cluster << TIPC_CLUSTER_OFFSET) | +- node; +-} +- +-static __inline__ unsigned int tipc_zone(__u32 addr) +-{ +- return addr >> TIPC_ZONE_OFFSET; +-} +- +-static __inline__ unsigned int tipc_cluster(__u32 addr) +-{ +- return (addr & TIPC_CLUSTER_MASK) >> TIPC_CLUSTER_OFFSET; +-} +- +-static __inline__ unsigned int tipc_node(__u32 addr) +-{ +- return addr & TIPC_NODE_MASK; +-} +- + /* +- * Application-accessible port name types ++ * Application-accessible service types + */ + +-#define TIPC_CFG_SRV 0 /* configuration service name type */ +-#define TIPC_TOP_SRV 1 /* topology service name type */ +-#define TIPC_LINK_STATE 2 /* link state name type */ +-#define TIPC_RESERVED_TYPES 64 /* lowest user-publishable name type */ ++#define TIPC_NODE_STATE 0 /* node state service type */ ++#define TIPC_TOP_SRV 1 /* topology server service type */ ++#define TIPC_LINK_STATE 2 /* link state service type */ ++#define TIPC_RESERVED_TYPES 64 /* lowest user-allowed service type */ + + /* +- * Publication scopes when binding port names and port name sequences ++ * Publication scopes when binding service / service range + */ +- +-#define TIPC_ZONE_SCOPE 1 +-#define TIPC_CLUSTER_SCOPE 2 +-#define TIPC_NODE_SCOPE 3 ++enum tipc_scope { ++ TIPC_CLUSTER_SCOPE = 2, /* 0 can also be used */ ++ TIPC_NODE_SCOPE = 3 ++}; + + /* + * Limiting values for messages +@@ -151,28 +108,28 @@ static __inline__ unsigned int tipc_node(__u32 addr) + * TIPC topology subscription service definitions + */ + +-#define TIPC_SUB_PORTS 0x01 /* filter for port availability */ +-#define TIPC_SUB_SERVICE 0x02 /* filter for service availability */ +-#define TIPC_SUB_CANCEL 0x04 /* cancel a subscription */ ++#define TIPC_SUB_PORTS 0x01 /* filter: evt at each match */ ++#define TIPC_SUB_SERVICE 0x02 /* filter: evt at first up/last down */ ++#define TIPC_SUB_CANCEL 0x04 /* filter: cancel a subscription */ + + #define TIPC_WAIT_FOREVER (~0) /* timeout for permanent subscription */ + + struct tipc_subscr { +- struct tipc_name_seq seq; /* name sequence of interest */ ++ struct tipc_service_range seq; /* range of interest */ + __u32 timeout; /* subscription duration (in ms) */ + __u32 filter; /* bitmask of filter options */ + char usr_handle[8]; /* available for subscriber use */ + }; + + #define TIPC_PUBLISHED 1 /* publication event */ +-#define TIPC_WITHDRAWN 2 /* withdraw event */ ++#define TIPC_WITHDRAWN 2 /* withdrawal event */ + #define TIPC_SUBSCR_TIMEOUT 3 /* subscription timeout event */ + + struct tipc_event { + __u32 event; /* event type */ +- __u32 found_lower; /* matching name seq instances */ +- __u32 found_upper; /* " " " " */ +- struct tipc_portid port; /* associated port */ ++ __u32 found_lower; /* matching range */ ++ __u32 found_upper; /* " " */ ++ struct tipc_socket_addr port; /* associated socket */ + struct tipc_subscr s; /* associated subscription */ + }; + +@@ -192,20 +149,20 @@ struct tipc_event { + #define SOL_TIPC 271 + #endif + +-#define TIPC_ADDR_NAMESEQ 1 +-#define TIPC_ADDR_MCAST 1 +-#define TIPC_ADDR_NAME 2 +-#define TIPC_ADDR_ID 3 ++#define TIPC_ADDR_MCAST 1 ++#define TIPC_SERVICE_RANGE 1 ++#define TIPC_SERVICE_ADDR 2 ++#define TIPC_SOCKET_ADDR 3 + + struct sockaddr_tipc { + unsigned short family; + unsigned char addrtype; + signed char scope; + union { +- struct tipc_portid id; +- struct tipc_name_seq nameseq; ++ struct tipc_socket_addr id; ++ struct tipc_service_range nameseq; + struct { +- struct tipc_name name; ++ struct tipc_service_addr name; + __u32 domain; + } name; + } addr; +@@ -231,26 +188,103 @@ struct sockaddr_tipc { + #define TIPC_SOCK_RECVQ_DEPTH 132 /* Default: none (read only) */ + #define TIPC_MCAST_BROADCAST 133 /* Default: TIPC selects. No arg */ + #define TIPC_MCAST_REPLICAST 134 /* Default: TIPC selects. No arg */ ++#define TIPC_GROUP_JOIN 135 /* Takes struct tipc_group_req* */ ++#define TIPC_GROUP_LEAVE 136 /* No argument */ ++ ++/* ++ * Flag values ++ */ ++#define TIPC_GROUP_LOOPBACK 0x1 /* Receive copy of sent msg when match */ ++#define TIPC_GROUP_MEMBER_EVTS 0x2 /* Receive membership events in socket */ ++ ++struct tipc_group_req { ++ __u32 type; /* group id */ ++ __u32 instance; /* member id */ ++ __u32 scope; /* cluster/node */ ++ __u32 flags; ++}; + + /* + * Maximum sizes of TIPC bearer-related names (including terminating NULL) + * The string formatting for each name element is: + * media: media + * interface: media:interface name +- * link: Z.C.N:interface-Z.C.N:interface +- * ++ * link: node:interface-node:interface + */ +- ++#define TIPC_NODEID_LEN 16 + #define TIPC_MAX_MEDIA_NAME 16 + #define TIPC_MAX_IF_NAME 16 + #define TIPC_MAX_BEARER_NAME 32 +-#define TIPC_MAX_LINK_NAME 60 ++#define TIPC_MAX_LINK_NAME 68 + +-#define SIOCGETLINKNAME SIOCPROTOPRIVATE ++#define SIOCGETLINKNAME SIOCPROTOPRIVATE ++#define SIOCGETNODEID (SIOCPROTOPRIVATE + 1) + + struct tipc_sioc_ln_req { + __u32 peer; + __u32 bearer_id; + char linkname[TIPC_MAX_LINK_NAME]; + }; ++ ++struct tipc_sioc_nodeid_req { ++ __u32 peer; ++ char node_id[TIPC_NODEID_LEN]; ++}; ++ ++/* The macros and functions below are deprecated: ++ */ ++ ++#define TIPC_CFG_SRV 0 ++#define TIPC_ZONE_SCOPE 1 ++ ++#define TIPC_ADDR_NAMESEQ 1 ++#define TIPC_ADDR_NAME 2 ++#define TIPC_ADDR_ID 3 ++ ++#define TIPC_NODE_BITS 12 ++#define TIPC_CLUSTER_BITS 12 ++#define TIPC_ZONE_BITS 8 ++ ++#define TIPC_NODE_OFFSET 0 ++#define TIPC_CLUSTER_OFFSET TIPC_NODE_BITS ++#define TIPC_ZONE_OFFSET (TIPC_CLUSTER_OFFSET + TIPC_CLUSTER_BITS) ++ ++#define TIPC_NODE_SIZE ((1UL << TIPC_NODE_BITS) - 1) ++#define TIPC_CLUSTER_SIZE ((1UL << TIPC_CLUSTER_BITS) - 1) ++#define TIPC_ZONE_SIZE ((1UL << TIPC_ZONE_BITS) - 1) ++ ++#define TIPC_NODE_MASK (TIPC_NODE_SIZE << TIPC_NODE_OFFSET) ++#define TIPC_CLUSTER_MASK (TIPC_CLUSTER_SIZE << TIPC_CLUSTER_OFFSET) ++#define TIPC_ZONE_MASK (TIPC_ZONE_SIZE << TIPC_ZONE_OFFSET) ++ ++#define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK) ++ ++#define tipc_portid tipc_socket_addr ++#define tipc_name tipc_service_addr ++#define tipc_name_seq tipc_service_range ++ ++static __inline__ __u32 tipc_addr(unsigned int zone, ++ unsigned int cluster, ++ unsigned int node) ++{ ++ return (zone << TIPC_ZONE_OFFSET) | ++ (cluster << TIPC_CLUSTER_OFFSET) | ++ node; ++} ++ ++static __inline__ unsigned int tipc_zone(__u32 addr) ++{ ++ return addr >> TIPC_ZONE_OFFSET; ++} ++ ++static __inline__ unsigned int tipc_cluster(__u32 addr) ++{ ++ return (addr & TIPC_CLUSTER_MASK) >> TIPC_CLUSTER_OFFSET; ++} ++ ++static __inline__ unsigned int tipc_node(__u32 addr) ++{ ++ return addr & TIPC_NODE_MASK; ++} ++ + #endif +diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h +index f9edd20..0ebe02e 100644 +--- a/include/uapi/linux/tipc_netlink.h ++++ b/include/uapi/linux/tipc_netlink.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ + /* + * Copyright (c) 2014, Ericsson AB + * All rights reserved. +@@ -113,6 +114,14 @@ enum { + TIPC_NLA_SOCK_REF, /* u32 */ + TIPC_NLA_SOCK_CON, /* nest */ + TIPC_NLA_SOCK_HAS_PUBL, /* flag */ ++ TIPC_NLA_SOCK_STAT, /* nest */ ++ TIPC_NLA_SOCK_TYPE, /* u32 */ ++ TIPC_NLA_SOCK_INO, /* u32 */ ++ TIPC_NLA_SOCK_UID, /* u32 */ ++ TIPC_NLA_SOCK_TIPC_STATE, /* u32 */ ++ TIPC_NLA_SOCK_COOKIE, /* u64 */ ++ TIPC_NLA_SOCK_PAD, /* flag */ ++ TIPC_NLA_SOCK_GROUP, /* nest */ + + __TIPC_NLA_SOCK_MAX, + TIPC_NLA_SOCK_MAX = __TIPC_NLA_SOCK_MAX - 1 +@@ -161,6 +170,8 @@ enum { + TIPC_NLA_NET_UNSPEC, + TIPC_NLA_NET_ID, /* u32 */ + TIPC_NLA_NET_ADDR, /* u32 */ ++ TIPC_NLA_NET_NODEID, /* u64 */ ++ TIPC_NLA_NET_NODEID_W1, /* u64 */ + + __TIPC_NLA_NET_MAX, + TIPC_NLA_NET_MAX = __TIPC_NLA_NET_MAX - 1 +@@ -223,6 +234,19 @@ enum { + TIPC_NLA_MON_PEER_MAX = __TIPC_NLA_MON_PEER_MAX - 1 + }; + ++/* Nest, socket group info */ ++enum { ++ TIPC_NLA_SOCK_GROUP_ID, /* u32 */ ++ TIPC_NLA_SOCK_GROUP_OPEN, /* flag */ ++ TIPC_NLA_SOCK_GROUP_NODE_SCOPE, /* flag */ ++ TIPC_NLA_SOCK_GROUP_CLUSTER_SCOPE, /* flag */ ++ TIPC_NLA_SOCK_GROUP_INSTANCE, /* u32 */ ++ TIPC_NLA_SOCK_GROUP_BC_SEND_NEXT, /* u32 */ ++ ++ __TIPC_NLA_SOCK_GROUP_MAX, ++ TIPC_NLA_SOCK_GROUP_MAX = __TIPC_NLA_SOCK_GROUP_MAX - 1 ++}; ++ + /* Nest, connection info */ + enum { + TIPC_NLA_CON_UNSPEC, +@@ -237,6 +261,18 @@ enum { + TIPC_NLA_CON_MAX = __TIPC_NLA_CON_MAX - 1 + }; + ++/* Nest, socket statistics info */ ++enum { ++ TIPC_NLA_SOCK_STAT_RCVQ, /* u32 */ ++ TIPC_NLA_SOCK_STAT_SENDQ, /* u32 */ ++ TIPC_NLA_SOCK_STAT_LINK_CONG, /* flag */ ++ TIPC_NLA_SOCK_STAT_CONN_CONG, /* flag */ ++ TIPC_NLA_SOCK_STAT_DROP, /* u32 */ ++ ++ __TIPC_NLA_SOCK_STAT_MAX, ++ TIPC_NLA_SOCK_STAT_MAX = __TIPC_NLA_SOCK_STAT_MAX - 1 ++}; ++ + /* Nest, link propreties. Valid for link, media and bearer */ + enum { + TIPC_NLA_PROP_UNSPEC, +@@ -244,6 +280,7 @@ enum { + TIPC_NLA_PROP_PRIO, /* u32 */ + TIPC_NLA_PROP_TOL, /* u32 */ + TIPC_NLA_PROP_WIN, /* u32 */ ++ TIPC_NLA_PROP_MTU, /* u32 */ + + __TIPC_NLA_PROP_MAX, + TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1 +diff --git a/include/uapi/linux/tipc_sockets_diag.h b/include/uapi/linux/tipc_sockets_diag.h +new file mode 100644 +index 0000000..21b766e +--- /dev/null ++++ b/include/uapi/linux/tipc_sockets_diag.h +@@ -0,0 +1,17 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* AF_TIPC sock_diag interface for querying open sockets */ ++ ++#ifndef __TIPC_SOCKETS_DIAG_H__ ++#define __TIPC_SOCKETS_DIAG_H__ ++ ++#include ++#include ++ ++/* Request */ ++struct tipc_sock_diag_req { ++ __u8 sdiag_family; /* must be AF_TIPC */ ++ __u8 sdiag_protocol; /* must be 0 */ ++ __u16 pad; /* must be 0 */ ++ __u32 tidiag_states; /* query*/ ++}; ++#endif /* __TIPC_SOCKETS_DIAG_H__ */ +diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h +index c640657..999cb0f 100644 +--- a/include/uapi/linux/types.h ++++ b/include/uapi/linux/types.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _LINUX_TYPES_H + #define _LINUX_TYPES_H + +@@ -43,5 +44,7 @@ typedef __u32 __bitwise __wsum; + #define __aligned_be64 __be64 __attribute__((aligned(8))) + #define __aligned_le64 __le64 __attribute__((aligned(8))) + ++typedef unsigned __bitwise __poll_t; ++ + #endif /* __ASSEMBLY__ */ + #endif /* _LINUX_TYPES_H */ +diff --git a/include/uapi/linux/unix_diag.h b/include/uapi/linux/unix_diag.h +index 1eb0b8d..5c502fd 100644 +--- a/include/uapi/linux/unix_diag.h ++++ b/include/uapi/linux/unix_diag.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __UNIX_DIAG_H__ + #define __UNIX_DIAG_H__ + +diff --git a/include/uapi/linux/veth.h b/include/uapi/linux/veth.h +index 3354c1e..52b58e5 100644 +--- a/include/uapi/linux/veth.h ++++ b/include/uapi/linux/veth.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef __NET_VETH_H_ + #define __NET_VETH_H_ + +diff --git a/include/uapi/linux/vm_sockets_diag.h b/include/uapi/linux/vm_sockets_diag.h +index a732a6f..6da42f9 100644 +--- a/include/uapi/linux/vm_sockets_diag.h ++++ b/include/uapi/linux/vm_sockets_diag.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* AF_VSOCK sock_diag(7) interface for querying open sockets */ + + #ifndef __VM_SOCKETS_DIAG_H__ +diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h +index 5790293..93fb192 100644 +--- a/include/uapi/linux/xfrm.h ++++ b/include/uapi/linux/xfrm.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _LINUX_XFRM_H + #define _LINUX_XFRM_H + +-- +1.8.3.1 + diff --git a/SOURCES/0048-tc-flower-Add-match-on-encapsulating-tos-ttl.patch b/SOURCES/0048-tc-flower-Add-match-on-encapsulating-tos-ttl.patch new file mode 100644 index 0000000..7c608ff --- /dev/null +++ b/SOURCES/0048-tc-flower-Add-match-on-encapsulating-tos-ttl.patch @@ -0,0 +1,129 @@ +From 738c49477eb843b37cb799115e5b562303bfcd9e Mon Sep 17 00:00:00 2001 +From: Phil Sutter +Date: Wed, 6 Feb 2019 14:51:12 +0100 +Subject: [PATCH] tc/flower: Add match on encapsulating tos/ttl + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1641909 +Upstream Status: iproute2.git commit 761ec9e29ff86 +Conflicts: Adjusted code to missing commit e28b88a464c49 + ("tc: jsonify flower filter"). + +commit 761ec9e29ff867452057f59dc6ca430688b409ea +Author: Or Gerlitz +Date: Thu Jul 19 14:02:15 2018 +0300 + + tc/flower: Add match on encapsulating tos/ttl + + Add matching on tos/ttl of the IP tunnel headers. + + For example, here's decap rule that matches on the tunnel tos: + + tc filter add dev vxlan_sys_4789 protocol ip parent ffff: prio 10 flower \ + enc_src_ip 192.168.10.2 enc_dst_ip 192.168.10.1 enc_key_id 100 enc_dst_port 4789 enc_tos 0x30 \ + src_mac e4:11:22:33:44:70 dst_mac e4:11:22:33:44:50 \ + action tunnel_key unset \ + action mirred egress redirect dev eth0_0 + + Signed-off-by: Or Gerlitz + Reviewed-by: Roi Dayan + Acked-by: Jiri Pirko + Signed-off-by: David Ahern +--- + man/man8/tc-flower.8 | 14 +++++++++++++- + tc/f_flower.c | 27 +++++++++++++++++++++++++++ + 2 files changed, 40 insertions(+), 1 deletion(-) + +diff --git a/man/man8/tc-flower.8 b/man/man8/tc-flower.8 +index be46f02..af19708 100644 +--- a/man/man8/tc-flower.8 ++++ b/man/man8/tc-flower.8 +@@ -57,6 +57,10 @@ flower \- flow based traffic control filter + .IR ipv4_address " | " ipv6_address " } | " + .B enc_dst_port + .IR port_number " | " ++.B enc_tos ++.IR TOS " | " ++.B enc_ttl ++.IR TTL " | " + .BR ip_flags + .IR IP_FLAGS + .SH DESCRIPTION +@@ -207,6 +211,10 @@ bits is assumed. + .BI enc_src_ip " PREFIX" + .TQ + .BI enc_dst_port " NUMBER" ++.TQ ++.BI enc_tos " NUMBER" ++.TQ ++.BI enc_ttl " NUMBER" + Match on IP tunnel metadata. Key id + .I NUMBER + is a 32 bit tunnel key id (e.g. VNI for VXLAN tunnel). +@@ -215,7 +223,11 @@ must be a valid IPv4 or IPv6 address optionally followed by a slash and the + prefix length. If the prefix is missing, \fBtc\fR assumes a full-length + host match. Dst port + .I NUMBER +-is a 16 bit UDP dst port. ++is a 16 bit UDP dst port. Tos ++.I NUMBER ++is an 8 bit tos (dscp+ecn) value, ttl ++.I NUMBER ++is an 8 bit time-to-live value. + .TP + .BI ip_flags " IP_FLAGS" + .I IP_FLAGS +diff --git a/tc/f_flower.c b/tc/f_flower.c +index 5be693a..5f5236c 100644 +--- a/tc/f_flower.c ++++ b/tc/f_flower.c +@@ -70,6 +70,8 @@ static void explain(void) + " enc_dst_ip [ IPV4-ADDR | IPV6-ADDR ] |\n" + " enc_src_ip [ IPV4-ADDR | IPV6-ADDR ] |\n" + " enc_key_id [ KEY-ID ] |\n" ++ " enc_tos MASKED-IP_TOS |\n" ++ " enc_ttl MASKED-IP_TTL |\n" + " ip_flags IP-FLAGS | \n" + " enc_dst_port [ port_number ] }\n" + " FILTERID := X:Y:Z\n" +@@ -883,6 +885,26 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, + fprintf(stderr, "Illegal \"enc_dst_port\"\n"); + return -1; + } ++ } else if (matches(*argv, "enc_tos") == 0) { ++ NEXT_ARG(); ++ ret = flower_parse_ip_tos_ttl(*argv, ++ TCA_FLOWER_KEY_ENC_IP_TOS, ++ TCA_FLOWER_KEY_ENC_IP_TOS_MASK, ++ n); ++ if (ret < 0) { ++ fprintf(stderr, "Illegal \"enc_tos\"\n"); ++ return -1; ++ } ++ } else if (matches(*argv, "enc_ttl") == 0) { ++ NEXT_ARG(); ++ ret = flower_parse_ip_tos_ttl(*argv, ++ TCA_FLOWER_KEY_ENC_IP_TTL, ++ TCA_FLOWER_KEY_ENC_IP_TTL_MASK, ++ n); ++ if (ret < 0) { ++ fprintf(stderr, "Illegal \"enc_ttl\"\n"); ++ return -1; ++ } + } else if (matches(*argv, "action") == 0) { + NEXT_ARG(); + ret = parse_action(&argc, &argv, TCA_FLOWER_ACT, n); +@@ -1296,6 +1318,11 @@ static int flower_print_opt(struct filter_util *qu, FILE *f, + flower_print_port(f, "enc_dst_port", + tb[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]); + ++ flower_print_ip_attr(f, "enc_tos", tb[TCA_FLOWER_KEY_ENC_IP_TOS], ++ tb[TCA_FLOWER_KEY_ENC_IP_TOS_MASK]); ++ flower_print_ip_attr(f, "enc_ttl", tb[TCA_FLOWER_KEY_ENC_IP_TTL], ++ tb[TCA_FLOWER_KEY_ENC_IP_TTL_MASK]); ++ + flower_print_matching_flags(f, "ip_flags", + FLOWER_IP_FLAGS, + tb[TCA_FLOWER_KEY_FLAGS], +-- +1.8.3.1 + diff --git a/SOURCES/0049-tc-act_tunnel_key-Enable-setup-of-tos-and-ttl.patch b/SOURCES/0049-tc-act_tunnel_key-Enable-setup-of-tos-and-ttl.patch new file mode 100644 index 0000000..c3a3eca --- /dev/null +++ b/SOURCES/0049-tc-act_tunnel_key-Enable-setup-of-tos-and-ttl.patch @@ -0,0 +1,148 @@ +From 7521695ca299ceb723dc6b17f304b91300b3b16c Mon Sep 17 00:00:00 2001 +From: Phil Sutter +Date: Wed, 6 Feb 2019 14:51:57 +0100 +Subject: [PATCH] tc/act_tunnel_key: Enable setup of tos and ttl + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1641909 +Upstream Status: iproute2.git commit 9f89b0cc0eda2 +Conflicts: +* Context change due to missing commits + 59eb271d1d259 ("tc: m_tunnel_key: add csum/nocsum option") and + 6217917a38268 ("tc: m_tunnel_key: Add tunnel option support to act_tunnel_key"). +* Adjusted tunnel_key_print_tos_ttl() to missing commit 8feb516bfcdd9 + ("tc: jsonify tunnel_key action"). + +commit 9f89b0cc0eda2ef52d8850b0610f3e2e09fd7c1c +Author: Or Gerlitz +Date: Thu Jul 19 14:02:14 2018 +0300 + + tc/act_tunnel_key: Enable setup of tos and ttl + + Allow to set tos and ttl for the tunnel. + + For example, here's encap rule that sets tos to the tunnel: + + tc filter add dev eth0_0 protocol ip parent ffff: prio 10 flower \ + src_mac e4:11:22:33:44:50 dst_mac e4:11:22:33:44:70 \ + action tunnel_key set src_ip 192.168.10.1 dst_ip 192.168.10.2 id 100 dst_port 4789 tos 0x30 \ + action mirred egress redirect dev vxlan_sys_4789 + + Signed-off-by: Or Gerlitz + Reviewed-by: Roi Dayan + Acked-by: Jiri Pirko + Signed-off-by: David Ahern +--- + man/man8/tc-tunnel_key.8 | 8 ++++++++ + tc/m_tunnel_key.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 57 insertions(+) + +diff --git a/man/man8/tc-tunnel_key.8 b/man/man8/tc-tunnel_key.8 +index 52fa585..5e93c59 100644 +--- a/man/man8/tc-tunnel_key.8 ++++ b/man/man8/tc-tunnel_key.8 +@@ -16,6 +16,8 @@ tunnel_key - Tunnel metadata manipulation + .IR ADDRESS + .BI id " KEY_ID" + .BI dst_port " UDP_PORT" ++.BI tos " TOS" ++.BI ttl " TTL" + + .SH DESCRIPTION + The +@@ -77,6 +79,12 @@ Outer header destination IP address (IPv4 or IPv6) + .TP + .B dst_port + Outer header destination UDP port ++.TP ++.B tos ++Outer header TOS ++.TP ++.B ttl ++Outer header TTL + .RE + .SH EXAMPLES + The following example encapsulates incoming ICMP packets on eth0 into a vxlan +diff --git a/tc/m_tunnel_key.c b/tc/m_tunnel_key.c +index acbcfc1..60fd1c4 100644 +--- a/tc/m_tunnel_key.c ++++ b/tc/m_tunnel_key.c +@@ -80,6 +80,22 @@ static int tunnel_key_parse_dst_port(char *str, int type, struct nlmsghdr *n) + return 0; + } + ++static int tunnel_key_parse_tos_ttl(char *str, int type, struct nlmsghdr *n) ++{ ++ int ret; ++ __u8 val; ++ ++ ret = get_u8(&val, str, 10); ++ if (ret) ++ ret = get_u8(&val, str, 16); ++ if (ret) ++ return -1; ++ ++ addattr8(n, MAX_MSG, type, val); ++ ++ return 0; ++} ++ + static int parse_tunnel_key(struct action_util *a, int *argc_p, char ***argv_p, + int tca_id, struct nlmsghdr *n) + { +@@ -154,6 +170,22 @@ static int parse_tunnel_key(struct action_util *a, int *argc_p, char ***argv_p, + fprintf(stderr, "Illegal \"dst port\"\n"); + return -1; + } ++ } else if (matches(*argv, "tos") == 0) { ++ NEXT_ARG(); ++ ret = tunnel_key_parse_tos_ttl(*argv, ++ TCA_TUNNEL_KEY_ENC_TOS, n); ++ if (ret < 0) { ++ fprintf(stderr, "Illegal \"tos\"\n"); ++ return -1; ++ } ++ } else if (matches(*argv, "ttl") == 0) { ++ NEXT_ARG(); ++ ret = tunnel_key_parse_tos_ttl(*argv, ++ TCA_TUNNEL_KEY_ENC_TTL, n); ++ if (ret < 0) { ++ fprintf(stderr, "Illegal \"ttl\"\n"); ++ return -1; ++ } + } else if (matches(*argv, "help") == 0) { + usage(); + } else { +@@ -231,6 +263,19 @@ static void tunnel_key_print_dst_port(FILE *f, char *name, + fprintf(f, "\n\t%s %d", name, rta_getattr_be16(attr)); + } + ++static void tunnel_key_print_tos_ttl(FILE *f, char *name, ++ struct rtattr *attr) ++{ ++ if (!attr) ++ return; ++ ++ if (matches(name, "tos") == 0 && rta_getattr_u8(attr) != 0) { ++ fprintf(f, "\n\t%s 0x%x", name, rta_getattr_u8(attr)); ++ } else if (matches(name, "ttl") == 0 && rta_getattr_u8(attr) != 0) { ++ fprintf(f, "\n\t%s %u", name, rta_getattr_u8(attr)); ++ } ++} ++ + static int print_tunnel_key(struct action_util *au, FILE *f, struct rtattr *arg) + { + struct rtattr *tb[TCA_TUNNEL_KEY_MAX + 1]; +@@ -267,6 +312,10 @@ static int print_tunnel_key(struct action_util *au, FILE *f, struct rtattr *arg) + tb[TCA_TUNNEL_KEY_ENC_KEY_ID]); + tunnel_key_print_dst_port(f, "dst_port", + tb[TCA_TUNNEL_KEY_ENC_DST_PORT]); ++ tunnel_key_print_tos_ttl(f, "tos", ++ tb[TCA_TUNNEL_KEY_ENC_TOS]); ++ tunnel_key_print_tos_ttl(f, "ttl", ++ tb[TCA_TUNNEL_KEY_ENC_TTL]); + break; + } + fprintf(f, " %s", action_n2a(parm->action)); +-- +1.8.3.1 + diff --git a/SOURCES/0055-ip-Add-violation-counters-to-VF-statisctics.patch b/SOURCES/0055-ip-Add-violation-counters-to-VF-statisctics.patch new file mode 100644 index 0000000..72fd54f --- /dev/null +++ b/SOURCES/0055-ip-Add-violation-counters-to-VF-statisctics.patch @@ -0,0 +1,78 @@ +From 1e22b512374d25b547212bdbe1530ac8de1defdf Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 18 Mar 2019 11:23:40 +0100 +Subject: [PATCH] ip: Add violation counters to VF statisctics + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1471680 +Upstream Status: unknown commit 8c7acf3a +Conflicts: manually applied due to JSON support + +commit 8c7acf3a7ac265badc287f064614d60119a8072d +Author: Eran Ben Elisha +Date: Sun Jul 22 13:31:12 2018 +0300 + + ip: Add violation counters to VF statisctics + + Extend VFs statistics by receive and transmit violation counters. + + Example: "ip -s link show dev enp5s0f0" + + 6: enp5s0f0: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000 + link/ether 24:8a:07:a5:28:f0 brd ff:ff:ff:ff:ff:ff + RX: bytes packets errors dropped overrun mcast + 0 0 0 0 0 2 + TX: bytes packets errors dropped carrier collsns + 1406 17 0 0 0 0 + vf 0 MAC 00:00:ca:fe:ca:fe, vlan 5, spoof checking off, link-state auto, trust off, query_rss off + RX: bytes packets mcast bcast dropped + 1666 29 14 32 0 + TX: bytes packets dropped + 2880 44 2412 + + Signed-off-by: Eran Ben Elisha + Signed-off-by: David Ahern +--- + ip/ipaddress.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/ip/ipaddress.c b/ip/ipaddress.c +index 14e9e22..44111a2 100644 +--- a/ip/ipaddress.c ++++ b/ip/ipaddress.c +@@ -471,21 +471,31 @@ static void print_vf_stats64(FILE *fp, struct rtattr *vfstats) + + /* RX stats */ + fprintf(fp, "%s", _SL_); +- fprintf(fp, " RX: bytes packets mcast bcast %s", _SL_); ++ fprintf(fp, " RX: bytes packets mcast bcast "); ++ if (vf[IFLA_VF_STATS_RX_DROPPED]) ++ fprintf(fp, " dropped "); ++ fprintf(fp, "%s", _SL_); + fprintf(fp, " "); + + print_num(fp, 10, rta_getattr_u64(vf[IFLA_VF_STATS_RX_BYTES])); + print_num(fp, 8, rta_getattr_u64(vf[IFLA_VF_STATS_RX_PACKETS])); + print_num(fp, 7, rta_getattr_u64(vf[IFLA_VF_STATS_MULTICAST])); + print_num(fp, 7, rta_getattr_u64(vf[IFLA_VF_STATS_BROADCAST])); ++ if (vf[IFLA_VF_STATS_RX_DROPPED]) ++ print_num(fp, 8, rta_getattr_u64(vf[IFLA_VF_STATS_RX_DROPPED])); + + /* TX stats */ + fprintf(fp, "%s", _SL_); +- fprintf(fp, " TX: bytes packets %s", _SL_); ++ fprintf(fp, " TX: bytes packets "); ++ if (vf[IFLA_VF_STATS_TX_DROPPED]) ++ fprintf(fp, " dropped "); ++ fprintf(fp, "%s", _SL_); + fprintf(fp, " "); + + print_num(fp, 10, rta_getattr_u64(vf[IFLA_VF_STATS_TX_BYTES])); + print_num(fp, 8, rta_getattr_u64(vf[IFLA_VF_STATS_TX_PACKETS])); ++ if (vf[IFLA_VF_STATS_TX_DROPPED]) ++ print_num(fp, 8, rta_getattr_u64(vf[IFLA_VF_STATS_TX_DROPPED])); + } + + static void print_link_stats64(FILE *fp, const struct rtnl_link_stats64 *s, +-- +2.20.1 + diff --git a/SOURCES/0056-devlink-Add-support-for-devlink-resource-abstraction.patch b/SOURCES/0056-devlink-Add-support-for-devlink-resource-abstraction.patch new file mode 100644 index 0000000..7f3c3c4 --- /dev/null +++ b/SOURCES/0056-devlink-Add-support-for-devlink-resource-abstraction.patch @@ -0,0 +1,633 @@ +From 5190aa430d198420679e53163604f7b6860bcd0f Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 25 Mar 2019 11:40:10 +0100 +Subject: [PATCH] devlink: Add support for devlink resource abstraction + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1644731 +Upstream Status: iproute2.git commit 8cd644095842a +Conflicts: adjusted help printout due to missing commit + 3e897912cbff9 ("devlink: add batch command support") + +commit 8cd644095842af3107320e86eeb01be6af6c77bb +Author: Arkadi Sharshevsky +Date: Wed Feb 14 10:55:18 2018 +0200 + + devlink: Add support for devlink resource abstraction + + Add support for devlink resource abstraction. The resources are + represented by a tree based structure and are identified by a name and + a size. Some resources can present their real time occupancy. + + First the resources exposed by the driver can be observed, for example: + + $devlink resource show pci/0000:03:00.0 + pci/0000:03:00.0: + name kvd size 245760 unit entry + resources: + name linear size 98304 occ 0 unit entry size_min 0 size_max 147456 size_gran 128 + name hash_double size 60416 unit entry size_min 32768 size_max 180224 size_gran 128 + name hash_single size 87040 unit entry size_min 65536 size_max 212992 size_gran 128 + + Some resource's size can be changed. Examples: + + $devlink resource set pci/0000:03:00.0 path /kvd/hash_single size 73088 + $devlink resource set pci/0000:03:00.0 path /kvd/hash_double size 74368 + + The changes do not apply immediately, this can be validate by the 'size_new' + attribute, which represents the pending changed size. For example + + $devlink resource show pci/0000:03:00.0 + pci/0000:03:00.0: + name kvd size 245760 unit entry size_valid false + resources: + name linear size 98304 size_new 147456 occ 0 unit entry size_min 0 size_max 147456 size_gran 128 + name hash_double size 60416 unit entry size_min 32768 size_max 180224 size_gran 128 + name hash_single size 87040 unit entry size_min 65536 size_max 212992 size_gran 128 + + In case of a pending change the nested resources present an indication + for a valid configuration of its children (sum of its children sizes + doesn't exceed the parent's size). + + In order for the changes to take place hot reload is needed. The hot + reload through devlink will be introduced in the following patch. + + Signed-off-by: Arkadi Sharshevsky + Acked-by: Jiri Pirko + Signed-off-by: Stephen Hemminger +--- + devlink/devlink.c | 490 +++++++++++++++++++++++++++++++++++++++++++++- + include/list.h | 5 + + 2 files changed, 494 insertions(+), 1 deletion(-) + +diff --git a/devlink/devlink.c b/devlink/devlink.c +index f9bc16c350c40..7f47b79450094 100644 +--- a/devlink/devlink.c ++++ b/devlink/devlink.c +@@ -177,6 +177,8 @@ static void ifname_map_free(struct ifname_map *ifname_map) + #define DL_OPT_DPIPE_TABLE_NAME BIT(13) + #define DL_OPT_DPIPE_TABLE_COUNTERS BIT(14) + #define DL_OPT_ESWITCH_ENCAP_MODE BIT(15) ++#define DL_OPT_RESOURCE_PATH BIT(16) ++#define DL_OPT_RESOURCE_SIZE BIT(17) + + struct dl_opts { + uint32_t present; /* flags of present items */ +@@ -197,6 +199,10 @@ struct dl_opts { + const char *dpipe_table_name; + bool dpipe_counters_enable; + bool eswitch_encap_mode; ++ const char *resource_path; ++ uint32_t resource_size; ++ uint32_t resource_id; ++ bool resource_id_valid; + }; + + struct dl { +@@ -937,6 +943,20 @@ static int dl_argv_parse(struct dl *dl, uint32_t o_required, + if (err) + return err; + o_found |= DL_OPT_ESWITCH_ENCAP_MODE; ++ } else if (dl_argv_match(dl, "path") && ++ (o_all & DL_OPT_RESOURCE_PATH)) { ++ dl_arg_inc(dl); ++ err = dl_argv_str(dl, &opts->resource_path); ++ if (err) ++ return err; ++ o_found |= DL_OPT_RESOURCE_PATH; ++ } else if (dl_argv_match(dl, "size") && ++ (o_all & DL_OPT_RESOURCE_SIZE)) { ++ dl_arg_inc(dl); ++ err = dl_argv_uint32_t(dl, &opts->resource_size); ++ if (err) ++ return err; ++ o_found |= DL_OPT_RESOURCE_SIZE; + } else { + pr_err("Unknown option \"%s\"\n", dl_argv(dl)); + return -EINVAL; +@@ -1079,6 +1099,12 @@ static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl) + if (opts->present & DL_OPT_ESWITCH_ENCAP_MODE) + mnl_attr_put_u8(nlh, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, + opts->eswitch_encap_mode); ++ if ((opts->present & DL_OPT_RESOURCE_PATH) && opts->resource_id_valid) ++ mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_ID, ++ opts->resource_id); ++ if (opts->present & DL_OPT_RESOURCE_SIZE) ++ mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_SIZE, ++ opts->resource_size); + } + + static int dl_argv_parse_put(struct nlmsghdr *nlh, struct dl *dl, +@@ -2666,6 +2692,91 @@ struct dpipe_header { + unsigned int fields_count; + }; + ++struct resource { ++ char *name; ++ uint64_t size; ++ uint64_t size_new; ++ uint64_t size_min; ++ uint64_t size_max; ++ uint64_t size_gran; ++ enum devlink_resource_unit unit; ++ bool size_valid; ++ uint64_t size_occ; ++ bool occ_valid; ++ uint64_t id; ++ struct list_head list; ++ struct list_head resource_list; ++ struct resource *parent; ++}; ++ ++struct resources { ++ struct list_head resource_list; ++}; ++ ++struct resource_ctx { ++ struct dl *dl; ++ int err; ++ struct resources *resources; ++ bool print_resources; ++ bool pending_change; ++}; ++ ++static struct resource *resource_alloc(void) ++{ ++ struct resource *resource; ++ ++ resource = calloc(1, sizeof(struct resource)); ++ if (!resource) ++ return NULL; ++ INIT_LIST_HEAD(&resource->resource_list); ++ return resource; ++} ++ ++static void resource_free(struct resource *resource) ++{ ++ struct resource *child_resource, *tmp; ++ ++ list_for_each_entry_safe(child_resource, tmp, &resource->resource_list, ++ list) { ++ free(child_resource->name); ++ resource_free(child_resource); ++ } ++ free(resource); ++} ++ ++static struct resources *resources_alloc(void) ++{ ++ struct resources *resources; ++ ++ resources = calloc(1, sizeof(struct resources)); ++ if (!resources) ++ return NULL; ++ INIT_LIST_HEAD(&resources->resource_list); ++ return resources; ++} ++ ++static void resources_free(struct resources *resources) ++{ ++ struct resource *resource, *tmp; ++ ++ list_for_each_entry_safe(resource, tmp, &resources->resource_list, list) ++ resource_free(resource); ++} ++ ++static int resource_ctx_init(struct resource_ctx *ctx, struct dl *dl) ++{ ++ ctx->resources = resources_alloc(); ++ if (!ctx->resources) ++ return -ENOMEM; ++ ctx->dl = dl; ++ return 0; ++} ++ ++static void resource_ctx_fini(struct resource_ctx *ctx) ++{ ++ resources_free(ctx->resources); ++} ++ + struct dpipe_ctx { + struct dl *dl; + int err; +@@ -3203,6 +3314,66 @@ err_match_show: + return -EINVAL; + } + ++static struct resource * ++resource_find(struct resources *resources, struct resource *resource, ++ uint64_t resource_id) ++{ ++ struct list_head *list_head; ++ ++ if (!resource) ++ list_head = &resources->resource_list; ++ else ++ list_head = &resource->resource_list; ++ ++ list_for_each_entry(resource, list_head, list) { ++ struct resource *child_resource; ++ ++ if (resource->id == resource_id) ++ return resource; ++ ++ child_resource = resource_find(resources, resource, ++ resource_id); ++ if (child_resource) ++ return child_resource; ++ } ++ return NULL; ++} ++ ++static void ++resource_path_print(struct dl *dl, struct resources *resources, ++ uint64_t resource_id) ++{ ++ struct resource *resource, *parent_resource; ++ const char del[] = "/"; ++ int path_len = 0; ++ char *path; ++ ++ resource = resource_find(resources, NULL, resource_id); ++ if (!resource) ++ return; ++ ++ for (parent_resource = resource; parent_resource; ++ parent_resource = parent_resource->parent) ++ path_len += strlen(parent_resource->name) + 1; ++ ++ path_len++; ++ path = calloc(1, path_len); ++ if (!path) ++ return; ++ ++ path += path_len - 1; ++ for (parent_resource = resource; parent_resource; ++ parent_resource = parent_resource->parent) { ++ path -= strlen(parent_resource->name); ++ memcpy(path, parent_resource->name, ++ strlen(parent_resource->name)); ++ path -= strlen(del); ++ memcpy(path, del, strlen(del)); ++ } ++ pr_out_str(dl, "resource_path", path); ++ free(path); ++} ++ + static int dpipe_table_show(struct dpipe_ctx *ctx, struct nlattr *nl) + { + struct nlattr *nla_table[DEVLINK_ATTR_MAX + 1] = {}; +@@ -3617,10 +3788,324 @@ static int cmd_dpipe(struct dl *dl) + return -ENOENT; + } + ++static int ++resource_parse(struct resource_ctx *ctx, struct resource *resource, ++ struct nlattr **nla_resource) ++{ ++ if (!nla_resource[DEVLINK_ATTR_RESOURCE_NAME] || ++ !nla_resource[DEVLINK_ATTR_RESOURCE_SIZE] || ++ !nla_resource[DEVLINK_ATTR_RESOURCE_ID] || ++ !nla_resource[DEVLINK_ATTR_RESOURCE_UNIT] || ++ !nla_resource[DEVLINK_ATTR_RESOURCE_SIZE_MIN] || ++ !nla_resource[DEVLINK_ATTR_RESOURCE_SIZE_MAX] || ++ !nla_resource[DEVLINK_ATTR_RESOURCE_SIZE_GRAN]) { ++ return -EINVAL; ++ } ++ ++ resource->name = strdup(mnl_attr_get_str(nla_resource[DEVLINK_ATTR_RESOURCE_NAME])); ++ resource->size = mnl_attr_get_u64(nla_resource[DEVLINK_ATTR_RESOURCE_SIZE]); ++ resource->id = mnl_attr_get_u64(nla_resource[DEVLINK_ATTR_RESOURCE_ID]); ++ resource->unit = mnl_attr_get_u8(nla_resource[DEVLINK_ATTR_RESOURCE_UNIT]); ++ resource->size_min = mnl_attr_get_u64(nla_resource[DEVLINK_ATTR_RESOURCE_SIZE_MIN]); ++ resource->size_max = mnl_attr_get_u64(nla_resource[DEVLINK_ATTR_RESOURCE_SIZE_MAX]); ++ resource->size_gran = mnl_attr_get_u64(nla_resource[DEVLINK_ATTR_RESOURCE_SIZE_GRAN]); ++ ++ if (nla_resource[DEVLINK_ATTR_RESOURCE_SIZE_NEW]) ++ resource->size_new = mnl_attr_get_u64(nla_resource[DEVLINK_ATTR_RESOURCE_SIZE_NEW]); ++ else ++ resource->size_new = resource->size; ++ ++ if (nla_resource[DEVLINK_ATTR_RESOURCE_OCC]) { ++ resource->size_occ = mnl_attr_get_u64(nla_resource[DEVLINK_ATTR_RESOURCE_OCC]); ++ resource->occ_valid = true; ++ } ++ ++ if (resource->size_new != resource->size) ++ ctx->pending_change = true; ++ ++ return 0; ++} ++ ++static int ++resource_get(struct resource_ctx *ctx, struct resource *resource, ++ struct resource *parent_resource, struct nlattr *nl) ++{ ++ struct nlattr *nla_resource[DEVLINK_ATTR_MAX + 1] = {}; ++ struct nlattr *nla_child_resource; ++ struct nlattr *nla_resources; ++ bool top = false; ++ int err; ++ ++ if (!resource) { ++ nla_resources = nl; ++ top = true; ++ goto out; ++ } ++ ++ err = mnl_attr_parse_nested(nl, attr_cb, nla_resource); ++ if (err != MNL_CB_OK) ++ return -EINVAL; ++ ++ err = resource_parse(ctx, resource, nla_resource); ++ if (err) ++ return err; ++ ++ resource->parent = parent_resource; ++ if (!nla_resource[DEVLINK_ATTR_RESOURCE_LIST]) ++ return 0; ++ ++ resource->size_valid = !!mnl_attr_get_u8(nla_resource[DEVLINK_ATTR_RESOURCE_SIZE_VALID]); ++ nla_resources = nla_resource[DEVLINK_ATTR_RESOURCE_LIST]; ++out: ++ mnl_attr_for_each_nested(nla_child_resource, nla_resources) { ++ struct resource *child_resource; ++ struct list_head *list; ++ ++ child_resource = resource_alloc(); ++ if (!child_resource) ++ return -ENOMEM; ++ ++ if (top) ++ list = &ctx->resources->resource_list; ++ else ++ list = &resource->resource_list; ++ ++ list_add_tail(&child_resource->list, list); ++ err = resource_get(ctx, child_resource, resource, ++ nla_child_resource); ++ if (err) ++ return err; ++ } ++ ++ return 0; ++} ++ ++static const char *resource_unit_str_get(enum devlink_resource_unit unit) ++{ ++ switch (unit) { ++ case DEVLINK_RESOURCE_UNIT_ENTRY: return "entry"; ++ default: return ""; ++ } ++} ++ ++static void resource_show(struct resource *resource, ++ struct resource_ctx *ctx) ++{ ++ struct resource *child_resource; ++ struct dl *dl = ctx->dl; ++ ++ pr_out_str(dl, "name", resource->name); ++ if (dl->verbose) ++ resource_path_print(dl, ctx->resources, resource->id); ++ pr_out_uint(dl, "size", resource->size); ++ if (resource->size != resource->size_new) ++ pr_out_uint(dl, "size_new", resource->size_new); ++ if (resource->occ_valid) ++ pr_out_uint(dl, "occ", resource->size_occ); ++ pr_out_str(dl, "unit", resource_unit_str_get(resource->unit)); ++ ++ if (resource->size_min != resource->size_max) { ++ pr_out_uint(dl, "size_min", resource->size_min); ++ pr_out_uint(dl, "size_max", resource->size_max); ++ pr_out_uint(dl, "size_gran", resource->size_gran); ++ } ++ ++ if (list_empty(&resource->resource_list)) ++ return; ++ ++ if (ctx->pending_change) ++ pr_out_str(dl, "size_valid", resource->size_valid ? ++ "true" : "false"); ++ pr_out_array_start(dl, "resources"); ++ list_for_each_entry(child_resource, &resource->resource_list, list) { ++ pr_out_entry_start(dl); ++ resource_show(child_resource, ctx); ++ pr_out_entry_end(dl); ++ } ++ pr_out_array_end(dl); ++} ++ ++static void ++resources_show(struct resource_ctx *ctx, struct nlattr **tb) ++{ ++ struct resources *resources = ctx->resources; ++ struct resource *resource; ++ ++ list_for_each_entry(resource, &resources->resource_list, list) { ++ pr_out_handle_start_arr(ctx->dl, tb); ++ resource_show(resource, ctx); ++ pr_out_handle_end(ctx->dl); ++ } ++} ++ ++static int resources_get(struct resource_ctx *ctx, struct nlattr **tb) ++{ ++ return resource_get(ctx, NULL, NULL, tb[DEVLINK_ATTR_RESOURCE_LIST]); ++} ++ ++static int cmd_resource_dump_cb(const struct nlmsghdr *nlh, void *data) ++{ ++ struct resource_ctx *ctx = data; ++ struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; ++ struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); ++ int err; ++ ++ mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); ++ if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || ++ !tb[DEVLINK_ATTR_RESOURCE_LIST]) ++ return MNL_CB_ERROR; ++ ++ err = resources_get(ctx, tb); ++ if (err) { ++ ctx->err = err; ++ return MNL_CB_ERROR; ++ } ++ ++ if (ctx->print_resources) ++ resources_show(ctx, tb); ++ ++ return MNL_CB_OK; ++} ++ ++static int cmd_resource_show(struct dl *dl) ++{ ++ struct nlmsghdr *nlh; ++ struct resource_ctx ctx = {}; ++ int err; ++ ++ nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_RESOURCE_DUMP, ++ NLM_F_REQUEST | NLM_F_ACK); ++ ++ err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE, 0); ++ if (err) ++ return err; ++ ++ err = resource_ctx_init(&ctx, dl); ++ if (err) ++ return err; ++ ++ ctx.print_resources = true; ++ pr_out_section_start(dl, "resources"); ++ err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_resource_dump_cb, &ctx); ++ pr_out_section_end(dl); ++ resource_ctx_fini(&ctx); ++ return err; ++} ++ ++static void cmd_resource_help(void) ++{ ++ pr_err("Usage: devlink resource show DEV\n" ++ " devlink resource set DEV path PATH size SIZE\n"); ++} ++ ++static struct resource * ++resource_find_by_name(struct list_head *list, char *name) ++{ ++ struct resource *resource; ++ ++ list_for_each_entry(resource, list, list) { ++ if (!strcmp(resource->name, name)) ++ return resource; ++ } ++ return NULL; ++} ++ ++static int ++resource_path_parse(struct resource_ctx *ctx, const char *resource_path, ++ uint32_t *p_resource_id, bool *p_resource_valid) ++{ ++ struct resource *resource; ++ uint32_t resource_id = 0; ++ char *resource_path_dup; ++ struct list_head *list; ++ const char del[] = "/"; ++ char *resource_name; ++ ++ resource_path_dup = strdup(resource_path); ++ list = &ctx->resources->resource_list; ++ resource_name = strtok(resource_path_dup, del); ++ while (resource_name != NULL) { ++ resource = resource_find_by_name(list, resource_name); ++ if (!resource) ++ goto err_resource_lookup; ++ ++ list = &resource->resource_list; ++ resource_name = strtok(NULL, del); ++ resource_id = resource->id; ++ } ++ free(resource_path_dup); ++ *p_resource_valid = true; ++ *p_resource_id = resource_id; ++ return 0; ++ ++err_resource_lookup: ++ free(resource_path_dup); ++ return -EINVAL; ++} ++ ++static int cmd_resource_set(struct dl *dl) ++{ ++ struct nlmsghdr *nlh; ++ struct resource_ctx ctx = {}; ++ int err; ++ ++ err = resource_ctx_init(&ctx, dl); ++ if (err) ++ return err; ++ ++ ctx.print_resources = false; ++ err = dl_argv_parse(dl, DL_OPT_HANDLE | DL_OPT_RESOURCE_PATH | ++ DL_OPT_RESOURCE_SIZE, 0); ++ if (err) ++ goto out; ++ ++ nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_RESOURCE_DUMP, ++ NLM_F_REQUEST); ++ dl_opts_put(nlh, dl); ++ err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_resource_dump_cb, &ctx); ++ if (err) { ++ pr_err("error getting resources %s\n", strerror(ctx.err)); ++ goto out; ++ } ++ ++ err = resource_path_parse(&ctx, dl->opts.resource_path, ++ &dl->opts.resource_id, ++ &dl->opts.resource_id_valid); ++ if (err) { ++ pr_err("error parsing resource path %s\n", strerror(err)); ++ goto out; ++ } ++ ++ nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_RESOURCE_SET, ++ NLM_F_REQUEST | NLM_F_ACK); ++ ++ dl_opts_put(nlh, dl); ++ err = _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); ++out: ++ resource_ctx_fini(&ctx); ++ return err; ++} ++ ++static int cmd_resource(struct dl *dl) ++{ ++ if (dl_argv_match(dl, "help") || dl_no_arg(dl)) { ++ cmd_resource_help(); ++ return 0; ++ } else if (dl_argv_match(dl, "show")) { ++ dl_arg_inc(dl); ++ return cmd_resource_show(dl); ++ } else if (dl_argv_match(dl, "set")) { ++ dl_arg_inc(dl); ++ return cmd_resource_set(dl); ++ } ++ pr_err("Command \"%s\" not found\n", dl_argv(dl)); ++ return -ENOENT; ++} ++ + static void help(void) + { + pr_err("Usage: devlink [ OPTIONS ] OBJECT { COMMAND | help }\n" +- "where OBJECT := { dev | port | sb | monitor | dpipe }\n" ++ "where OBJECT := { dev | port | sb | monitor | dpipe | resource }\n" + " OPTIONS := { -V[ersion] | -n[no-nice-names] | -j[json] | -p[pretty] | -v[verbose] }\n"); + } + +@@ -3644,6 +4129,9 @@ static int dl_cmd(struct dl *dl) + } else if (dl_argv_match(dl, "dpipe")) { + dl_arg_inc(dl); + return cmd_dpipe(dl); ++ } else if (dl_argv_match(dl, "resource")) { ++ dl_arg_inc(dl); ++ return cmd_resource(dl); + } + pr_err("Object \"%s\" not found\n", dl_argv(dl)); + return -ENOENT; +diff --git a/include/list.h b/include/list.h +index 5b529dc6e5211..b2adf55578449 100644 +--- a/include/list.h ++++ b/include/list.h +@@ -107,6 +107,11 @@ static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) + n->pprev = &h->first; + } + ++static inline int list_empty(const struct list_head *head) ++{ ++ return head->next == head; ++} ++ + #define hlist_for_each(pos, head) \ + for (pos = (head)->first; pos ; pos = pos->next) + +-- +2.20.1 + diff --git a/SOURCES/0057-devlink-Add-support-for-hot-reload.patch b/SOURCES/0057-devlink-Add-support-for-hot-reload.patch new file mode 100644 index 0000000..9c804b6 --- /dev/null +++ b/SOURCES/0057-devlink-Add-support-for-hot-reload.patch @@ -0,0 +1,81 @@ +From ceaa3a5ecffe0c558c990be6c4ba682be5ce85e8 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 25 Mar 2019 11:40:57 +0100 +Subject: [PATCH] devlink: Add support for hot reload + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1644731 +Upstream Status: iproute2.git commit 06dd94f952e50 + +commit 06dd94f952e50edeffe5ea8b7b95b5cd562b9365 +Author: Arkadi Sharshevsky +Date: Wed Feb 14 10:55:19 2018 +0200 + + devlink: Add support for hot reload + + Add support for hot reload. It should be used in order for resource + updates to take place. + + Signed-off-by: Arkadi Sharshevsky + Acked-by: Jiri Pirko + Signed-off-by: Stephen Hemminger +--- + devlink/devlink.c | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +diff --git a/devlink/devlink.c b/devlink/devlink.c +index 7f47b79450094..fc3939e564bc8 100644 +--- a/devlink/devlink.c ++++ b/devlink/devlink.c +@@ -1163,6 +1163,7 @@ static void cmd_dev_help(void) + pr_err(" [ inline-mode { none | link | network | transport } ]\n"); + pr_err(" [ encap { disable | enable } ]\n"); + pr_err(" devlink dev eswitch show DEV\n"); ++ pr_err(" devlink dev reload DEV\n"); + } + + static bool cmp_arr_last_handle(struct dl *dl, const char *bus_name, +@@ -1602,6 +1603,31 @@ static int cmd_dev_show(struct dl *dl) + return err; + } + ++static void cmd_dev_reload_help(void) ++{ ++ pr_err("Usage: devlink dev reload [ DEV ]\n"); ++} ++ ++static int cmd_dev_reload(struct dl *dl) ++{ ++ struct nlmsghdr *nlh; ++ int err; ++ ++ if (dl_argv_match(dl, "help") || dl_no_arg(dl)) { ++ cmd_dev_reload_help(); ++ return 0; ++ } ++ ++ nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_RELOAD, ++ NLM_F_REQUEST | NLM_F_ACK); ++ ++ err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE, 0); ++ if (err) ++ return err; ++ ++ return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); ++} ++ + static int cmd_dev(struct dl *dl) + { + if (dl_argv_match(dl, "help")) { +@@ -1614,6 +1640,9 @@ static int cmd_dev(struct dl *dl) + } else if (dl_argv_match(dl, "eswitch")) { + dl_arg_inc(dl); + return cmd_dev_eswitch(dl); ++ } else if (dl_argv_match(dl, "reload")) { ++ dl_arg_inc(dl); ++ return cmd_dev_reload(dl); + } + pr_err("Command \"%s\" not found\n", dl_argv(dl)); + return -ENOENT; +-- +2.20.1 + diff --git a/SOURCES/0058-devlink-Update-man-pages-and-add-resource-man.patch b/SOURCES/0058-devlink-Update-man-pages-and-add-resource-man.patch new file mode 100644 index 0000000..4fe2d73 --- /dev/null +++ b/SOURCES/0058-devlink-Update-man-pages-and-add-resource-man.patch @@ -0,0 +1,165 @@ +From edf1a3765c440bdd6a15ca7dd4d52a2264a67f69 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 25 Mar 2019 11:40:57 +0100 +Subject: [PATCH] devlink: Update man pages and add resource man + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1644731 +Upstream Status: iproute2.git commit 58b48c5d75e29 + +commit 58b48c5d75e2960dfcd947975911a170ae765975 +Author: Arkadi Sharshevsky +Date: Wed Feb 14 10:55:22 2018 +0200 + + devlink: Update man pages and add resource man + + Add resource man, and update dev manual for reload command. + + Signed-off-by: Arkadi Sharshevsky + Acked-by: Jiri Pirko + Signed-off-by: Stephen Hemminger +--- + man/man8/devlink-dev.8 | 15 +++++++ + man/man8/devlink-resource.8 | 78 +++++++++++++++++++++++++++++++++++++ + man/man8/devlink.8 | 1 + + 3 files changed, 94 insertions(+) + create mode 100644 man/man8/devlink-resource.8 + +diff --git a/man/man8/devlink-dev.8 b/man/man8/devlink-dev.8 +index b074d57a19369..7c749ddabaeeb 100644 +--- a/man/man8/devlink-dev.8 ++++ b/man/man8/devlink-dev.8 +@@ -42,6 +42,10 @@ devlink-dev \- devlink device configuration + .BR "devlink dev eswitch show" + .IR DEV + ++.ti -8 ++.BR "devlink dev reload" ++.IR DEV ++ + .SH "DESCRIPTION" + .SS devlink dev show - display devlink device attributes + +@@ -94,6 +98,12 @@ Set eswitch encapsulation support + .I enable + - Enable encapsulation support + ++.SS devlink dev reload - perform hot reload of the driver. ++ ++.PP ++.I "DEV" ++- Specifies the devlink device to reload. ++ + .SH "EXAMPLES" + .PP + devlink dev show +@@ -114,6 +124,11 @@ Shows the eswitch mode of specified devlink device. + devlink dev eswitch set pci/0000:01:00.0 mode switchdev + .RS 4 + Sets the eswitch mode of specified devlink device to switchdev. ++.RE ++.PP ++devlink dev reload pci/0000:01:00.0 ++.RS 4 ++Performs hot reload of specified devlink device. + + .SH SEE ALSO + .BR devlink (8), +diff --git a/man/man8/devlink-resource.8 b/man/man8/devlink-resource.8 +new file mode 100644 +index 0000000000000..b8f788060427b +--- /dev/null ++++ b/man/man8/devlink-resource.8 +@@ -0,0 +1,78 @@ ++.TH DEVLINK\-RESOURCE 8 "11 Feb 2018" "iproute2" "Linux" ++.SH NAME ++devlink-resource \- devlink device resource configuration ++.SH SYNOPSIS ++.sp ++.ad l ++.in +8 ++.ti -8 ++.B devlink ++.RI "[ " OPTIONS " ]" ++.B resource ++.RI " { " COMMAND " | " ++.BR help " }" ++.sp ++ ++.ti -8 ++.IR OPTIONS " := { " ++\fB\-v\fR[\fIerbose\fR] } ++ ++.ti -8 ++.B devlink resource show ++.IR DEV ++ ++.ti -8 ++.B devlink resource help ++ ++.ti -8 ++.BR "devlink resource set" ++.IR DEV ++.BI path " RESOURCE_PATH" ++.BI size " RESOURCE_SIZE" ++ ++.SH "DESCRIPTION" ++.SS devlink resource show - display devlink device's resosources ++ ++.PP ++.I "DEV" ++- specifies the devlink device to show. ++ ++.in +4 ++Format is: ++.in +2 ++BUS_NAME/BUS_ADDRESS ++ ++.SS devlink resource set - sets resource size of specific resource ++ ++.PP ++.I "DEV" ++- specifies the devlink device. ++ ++.TP ++.BI path " RESOURCE_PATH" ++Resource's path. ++ ++.TP ++.BI size " RESOURCE_SIZE" ++The new resource's size. ++ ++.SH "EXAMPLES" ++.PP ++devlink resource show pci/0000:01:00.0 ++.RS 4 ++Shows the resources of the specified devlink device. ++.RE ++.PP ++devlink resource set pci/0000:01:00.0 /kvd/linear 98304 ++.RS 4 ++Sets the size of the specified resource for the specified devlink device. ++ ++.SH SEE ALSO ++.BR devlink (8), ++.BR devlink-port (8), ++.BR devlink-sb (8), ++.BR devlink-monitor (8), ++.br ++ ++.SH AUTHOR ++Arkadi Sharshevsky +diff --git a/man/man8/devlink.8 b/man/man8/devlink.8 +index a480766cbbdbe..6bf398274a612 100644 +--- a/man/man8/devlink.8 ++++ b/man/man8/devlink.8 +@@ -87,6 +87,7 @@ Exit status is 0 if command was successful or a positive integer upon failure. + .BR devlink-port (8), + .BR devlink-monitor (8), + .BR devlink-sb (8), ++.BR devlink-resource (8), + .br + + .SH REPORTING BUGS +-- +2.20.1 + diff --git a/SOURCES/0059-devlink-Add-param-command-support.patch b/SOURCES/0059-devlink-Add-param-command-support.patch new file mode 100644 index 0000000..e9a174d --- /dev/null +++ b/SOURCES/0059-devlink-Add-param-command-support.patch @@ -0,0 +1,704 @@ +From 8bd31b6df5fd1da612accbb4d131b3c3bcded079 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 25 Mar 2019 11:40:58 +0100 +Subject: [PATCH] devlink: Add param command support + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1644731 +Upstream Status: iproute2.git commit 13925ae9eb38b +Conflicts: context change due to missing commit 844646a52837f + ("devlink: Change empty line indication with indentations") + +commit 13925ae9eb38b99107be1d3fe21a1b73cf40bd97 +Author: Moshe Shemesh +Date: Wed Jul 4 17:12:06 2018 +0300 + + devlink: Add param command support + + Add support for configuration parameters set and show. + Each parameter can be either generic or driver-specific. + The user can retrieve data on these configuration parameters by devlink + param show command and can set new value to a configuration parameter + by devlink param set command. + The configuration parameters can be set in different configuration + modes: + runtime - set while driver is running, no reset required. + driverinit - applied while driver initializes, requires restart + driver by devlink reload command. + permanent - written to device's non-volatile memory, hard reset + required to apply. + + New commands added: + devlink dev param show [DEV name PARAMETER] + devlink dev param set DEV name PARAMETER value VALUE + cmode { permanent | driverinit | runtime } + + Signed-off-by: Moshe Shemesh + Signed-off-by: Jiri Pirko + Signed-off-by: David Ahern +--- + devlink/devlink.c | 454 +++++++++++++++++++++++++++++++++++++++++ + man/man8/devlink-dev.8 | 57 ++++++ + 2 files changed, 511 insertions(+) + +diff --git a/devlink/devlink.c b/devlink/devlink.c +index fc3939e564bc8..92e78c9c8d9f6 100644 +--- a/devlink/devlink.c ++++ b/devlink/devlink.c +@@ -33,6 +33,10 @@ + #define ESWITCH_INLINE_MODE_NETWORK "network" + #define ESWITCH_INLINE_MODE_TRANSPORT "transport" + ++#define PARAM_CMODE_RUNTIME_STR "runtime" ++#define PARAM_CMODE_DRIVERINIT_STR "driverinit" ++#define PARAM_CMODE_PERMANENT_STR "permanent" ++ + #define pr_err(args...) fprintf(stderr, ##args) + #define pr_out(args...) \ + do { \ +@@ -179,6 +183,9 @@ static void ifname_map_free(struct ifname_map *ifname_map) + #define DL_OPT_ESWITCH_ENCAP_MODE BIT(15) + #define DL_OPT_RESOURCE_PATH BIT(16) + #define DL_OPT_RESOURCE_SIZE BIT(17) ++#define DL_OPT_PARAM_NAME BIT(18) ++#define DL_OPT_PARAM_VALUE BIT(19) ++#define DL_OPT_PARAM_CMODE BIT(20) + + struct dl_opts { + uint32_t present; /* flags of present items */ +@@ -203,6 +210,9 @@ struct dl_opts { + uint32_t resource_size; + uint32_t resource_id; + bool resource_id_valid; ++ const char *param_name; ++ const char *param_value; ++ enum devlink_param_cmode cmode; + }; + + struct dl { +@@ -340,6 +350,12 @@ static const enum mnl_attr_data_type devlink_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_DPIPE_FIELD_ID] = MNL_TYPE_U32, + [DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH] = MNL_TYPE_U32, + [DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE] = MNL_TYPE_U32, ++ [DEVLINK_ATTR_PARAM] = MNL_TYPE_NESTED, ++ [DEVLINK_ATTR_PARAM_NAME] = MNL_TYPE_STRING, ++ [DEVLINK_ATTR_PARAM_TYPE] = MNL_TYPE_U8, ++ [DEVLINK_ATTR_PARAM_VALUES_LIST] = MNL_TYPE_NESTED, ++ [DEVLINK_ATTR_PARAM_VALUE] = MNL_TYPE_NESTED, ++ [DEVLINK_ATTR_PARAM_VALUE_CMODE] = MNL_TYPE_U8, + }; + + static int attr_cb(const struct nlattr *attr, void *data) +@@ -506,6 +522,34 @@ static int strtouint16_t(const char *str, uint16_t *p_val) + return 0; + } + ++static int strtouint8_t(const char *str, uint8_t *p_val) ++{ ++ char *endptr; ++ unsigned long int val; ++ ++ val = strtoul(str, &endptr, 10); ++ if (endptr == str || *endptr != '\0') ++ return -EINVAL; ++ if (val > UCHAR_MAX) ++ return -ERANGE; ++ *p_val = val; ++ return 0; ++} ++ ++static int strtobool(const char *str, bool *p_val) ++{ ++ bool val; ++ ++ if (!strcmp(str, "true") || !strcmp(str, "1")) ++ val = true; ++ else if (!strcmp(str, "false") || !strcmp(str, "0")) ++ val = false; ++ else ++ return -EINVAL; ++ *p_val = val; ++ return 0; ++} ++ + static int __dl_argv_handle(char *str, char **p_bus_name, char **p_dev_name) + { + strslashrsplit(str, p_bus_name, p_dev_name); +@@ -776,6 +820,22 @@ static int eswitch_encap_mode_get(const char *typestr, bool *p_mode) + return 0; + } + ++static int param_cmode_get(const char *cmodestr, ++ enum devlink_param_cmode *cmode) ++{ ++ if (strcmp(cmodestr, PARAM_CMODE_RUNTIME_STR) == 0) { ++ *cmode = DEVLINK_PARAM_CMODE_RUNTIME; ++ } else if (strcmp(cmodestr, PARAM_CMODE_DRIVERINIT_STR) == 0) { ++ *cmode = DEVLINK_PARAM_CMODE_DRIVERINIT; ++ } else if (strcmp(cmodestr, PARAM_CMODE_PERMANENT_STR) == 0) { ++ *cmode = DEVLINK_PARAM_CMODE_PERMANENT; ++ } else { ++ pr_err("Unknown configuration mode \"%s\"\n", cmodestr); ++ return -EINVAL; ++ } ++ return 0; ++} ++ + static int dl_argv_parse(struct dl *dl, uint32_t o_required, + uint32_t o_optional) + { +@@ -957,6 +1017,32 @@ static int dl_argv_parse(struct dl *dl, uint32_t o_required, + if (err) + return err; + o_found |= DL_OPT_RESOURCE_SIZE; ++ } else if (dl_argv_match(dl, "name") && ++ (o_all & DL_OPT_PARAM_NAME)) { ++ dl_arg_inc(dl); ++ err = dl_argv_str(dl, &opts->param_name); ++ if (err) ++ return err; ++ o_found |= DL_OPT_PARAM_NAME; ++ } else if (dl_argv_match(dl, "value") && ++ (o_all & DL_OPT_PARAM_VALUE)) { ++ dl_arg_inc(dl); ++ err = dl_argv_str(dl, &opts->param_value); ++ if (err) ++ return err; ++ o_found |= DL_OPT_PARAM_VALUE; ++ } else if (dl_argv_match(dl, "cmode") && ++ (o_all & DL_OPT_PARAM_CMODE)) { ++ const char *cmodestr; ++ ++ dl_arg_inc(dl); ++ err = dl_argv_str(dl, &cmodestr); ++ if (err) ++ return err; ++ err = param_cmode_get(cmodestr, &opts->cmode); ++ if (err) ++ return err; ++ o_found |= DL_OPT_PARAM_CMODE; + } else { + pr_err("Unknown option \"%s\"\n", dl_argv(dl)); + return -EINVAL; +@@ -1041,6 +1127,24 @@ static int dl_argv_parse(struct dl *dl, uint32_t o_required, + return -EINVAL; + } + ++ if ((o_required & DL_OPT_PARAM_NAME) && ++ !(o_found & DL_OPT_PARAM_NAME)) { ++ pr_err("Parameter name expected.\n"); ++ return -EINVAL; ++ } ++ ++ if ((o_required & DL_OPT_PARAM_VALUE) && ++ !(o_found & DL_OPT_PARAM_VALUE)) { ++ pr_err("Value to set expected.\n"); ++ return -EINVAL; ++ } ++ ++ if ((o_required & DL_OPT_PARAM_CMODE) && ++ !(o_found & DL_OPT_PARAM_CMODE)) { ++ pr_err("Configuration mode expected.\n"); ++ return -EINVAL; ++ } ++ + return 0; + } + +@@ -1105,6 +1209,12 @@ static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl) + if (opts->present & DL_OPT_RESOURCE_SIZE) + mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_SIZE, + opts->resource_size); ++ if (opts->present & DL_OPT_PARAM_NAME) ++ mnl_attr_put_strz(nlh, DEVLINK_ATTR_PARAM_NAME, ++ opts->param_name); ++ if (opts->present & DL_OPT_PARAM_CMODE) ++ mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, ++ opts->cmode); + } + + static int dl_argv_parse_put(struct nlmsghdr *nlh, struct dl *dl, +@@ -1163,6 +1273,8 @@ static void cmd_dev_help(void) + pr_err(" [ inline-mode { none | link | network | transport } ]\n"); + pr_err(" [ encap { disable | enable } ]\n"); + pr_err(" devlink dev eswitch show DEV\n"); ++ pr_err(" devlink dev param set DEV name PARAMETER value VALUE cmode { permanent | driverinit | runtime }\n"); ++ pr_err(" devlink dev param show [DEV name PARAMETER]\n"); + pr_err(" devlink dev reload DEV\n"); + } + +@@ -1377,6 +1489,14 @@ static void pr_out_str(struct dl *dl, const char *name, const char *val) + } + } + ++static void pr_out_bool(struct dl *dl, const char *name, bool val) ++{ ++ if (val) ++ pr_out_str(dl, name, "true"); ++ else ++ pr_out_str(dl, name, "false"); ++} ++ + static void pr_out_uint(struct dl *dl, const char *name, unsigned int val) + { + if (dl->json_output) { +@@ -1449,6 +1569,19 @@ static void pr_out_entry_end(struct dl *dl) + __pr_out_newline(); + } + ++static const char *param_cmode_name(uint8_t cmode) ++{ ++ switch (cmode) { ++ case DEVLINK_PARAM_CMODE_RUNTIME: ++ return PARAM_CMODE_RUNTIME_STR; ++ case DEVLINK_PARAM_CMODE_DRIVERINIT: ++ return PARAM_CMODE_DRIVERINIT_STR; ++ case DEVLINK_PARAM_CMODE_PERMANENT: ++ return PARAM_CMODE_PERMANENT_STR; ++ default: return ""; ++ } ++} ++ + static const char *eswitch_mode_name(uint32_t mode) + { + switch (mode) { +@@ -1567,6 +1700,304 @@ static int cmd_dev_eswitch(struct dl *dl) + return -ENOENT; + } + ++static void pr_out_param_value(struct dl *dl, int nla_type, struct nlattr *nl) ++{ ++ struct nlattr *nla_value[DEVLINK_ATTR_MAX + 1] = {}; ++ struct nlattr *val_attr; ++ int err; ++ ++ err = mnl_attr_parse_nested(nl, attr_cb, nla_value); ++ if (err != MNL_CB_OK) ++ return; ++ ++ if (!nla_value[DEVLINK_ATTR_PARAM_VALUE_CMODE] || ++ (nla_type != MNL_TYPE_FLAG && ++ !nla_value[DEVLINK_ATTR_PARAM_VALUE_DATA])) ++ return; ++ ++ pr_out_str(dl, "cmode", ++ param_cmode_name(mnl_attr_get_u8(nla_value[DEVLINK_ATTR_PARAM_VALUE_CMODE]))); ++ val_attr = nla_value[DEVLINK_ATTR_PARAM_VALUE_DATA]; ++ ++ switch (nla_type) { ++ case MNL_TYPE_U8: ++ pr_out_uint(dl, "value", mnl_attr_get_u8(val_attr)); ++ break; ++ case MNL_TYPE_U16: ++ pr_out_uint(dl, "value", mnl_attr_get_u16(val_attr)); ++ break; ++ case MNL_TYPE_U32: ++ pr_out_uint(dl, "value", mnl_attr_get_u32(val_attr)); ++ break; ++ case MNL_TYPE_STRING: ++ pr_out_str(dl, "value", mnl_attr_get_str(val_attr)); ++ break; ++ case MNL_TYPE_FLAG: ++ pr_out_bool(dl, "value", val_attr ? true : false); ++ break; ++ } ++} ++ ++static void pr_out_param(struct dl *dl, struct nlattr **tb, bool array) ++{ ++ struct nlattr *nla_param[DEVLINK_ATTR_MAX + 1] = {}; ++ struct nlattr *param_value_attr; ++ int nla_type; ++ int err; ++ ++ err = mnl_attr_parse_nested(tb[DEVLINK_ATTR_PARAM], attr_cb, nla_param); ++ if (err != MNL_CB_OK) ++ return; ++ if (!nla_param[DEVLINK_ATTR_PARAM_NAME] || ++ !nla_param[DEVLINK_ATTR_PARAM_TYPE] || ++ !nla_param[DEVLINK_ATTR_PARAM_VALUES_LIST]) ++ return; ++ ++ if (array) ++ pr_out_handle_start_arr(dl, tb); ++ else ++ __pr_out_handle_start(dl, tb, true, false); ++ ++ nla_type = mnl_attr_get_u8(nla_param[DEVLINK_ATTR_PARAM_TYPE]); ++ ++ pr_out_str(dl, "name", ++ mnl_attr_get_str(nla_param[DEVLINK_ATTR_PARAM_NAME])); ++ ++ if (!nla_param[DEVLINK_ATTR_PARAM_GENERIC]) ++ pr_out_str(dl, "type", "driver-specific"); ++ else ++ pr_out_str(dl, "type", "generic"); ++ ++ pr_out_array_start(dl, "values"); ++ mnl_attr_for_each_nested(param_value_attr, ++ nla_param[DEVLINK_ATTR_PARAM_VALUES_LIST]) { ++ pr_out_entry_start(dl); ++ pr_out_param_value(dl, nla_type, param_value_attr); ++ pr_out_entry_end(dl); ++ } ++ pr_out_array_end(dl); ++ pr_out_handle_end(dl); ++} ++ ++static int cmd_dev_param_show_cb(const struct nlmsghdr *nlh, void *data) ++{ ++ struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); ++ struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; ++ struct dl *dl = data; ++ ++ mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); ++ if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || ++ !tb[DEVLINK_ATTR_PARAM]) ++ return MNL_CB_ERROR; ++ pr_out_param(dl, tb, true); ++ return MNL_CB_OK; ++} ++ ++struct param_ctx { ++ struct dl *dl; ++ int nla_type; ++ union { ++ uint8_t vu8; ++ uint16_t vu16; ++ uint32_t vu32; ++ const char *vstr; ++ bool vbool; ++ } value; ++}; ++ ++static int cmd_dev_param_set_cb(const struct nlmsghdr *nlh, void *data) ++{ ++ struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); ++ struct nlattr *nla_param[DEVLINK_ATTR_MAX + 1] = {}; ++ struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; ++ struct nlattr *param_value_attr; ++ enum devlink_param_cmode cmode; ++ struct param_ctx *ctx = data; ++ struct dl *dl = ctx->dl; ++ int nla_type; ++ int err; ++ ++ mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); ++ if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || ++ !tb[DEVLINK_ATTR_PARAM]) ++ return MNL_CB_ERROR; ++ ++ err = mnl_attr_parse_nested(tb[DEVLINK_ATTR_PARAM], attr_cb, nla_param); ++ if (err != MNL_CB_OK) ++ return MNL_CB_ERROR; ++ ++ if (!nla_param[DEVLINK_ATTR_PARAM_TYPE] || ++ !nla_param[DEVLINK_ATTR_PARAM_VALUES_LIST]) ++ return MNL_CB_ERROR; ++ ++ nla_type = mnl_attr_get_u8(nla_param[DEVLINK_ATTR_PARAM_TYPE]); ++ mnl_attr_for_each_nested(param_value_attr, ++ nla_param[DEVLINK_ATTR_PARAM_VALUES_LIST]) { ++ struct nlattr *nla_value[DEVLINK_ATTR_MAX + 1] = {}; ++ struct nlattr *val_attr; ++ ++ err = mnl_attr_parse_nested(param_value_attr, ++ attr_cb, nla_value); ++ if (err != MNL_CB_OK) ++ return MNL_CB_ERROR; ++ ++ if (!nla_value[DEVLINK_ATTR_PARAM_VALUE_CMODE] || ++ (nla_type != MNL_TYPE_FLAG && ++ !nla_value[DEVLINK_ATTR_PARAM_VALUE_DATA])) ++ return MNL_CB_ERROR; ++ ++ cmode = mnl_attr_get_u8(nla_value[DEVLINK_ATTR_PARAM_VALUE_CMODE]); ++ if (cmode == dl->opts.cmode) { ++ val_attr = nla_value[DEVLINK_ATTR_PARAM_VALUE_DATA]; ++ switch (nla_type) { ++ case MNL_TYPE_U8: ++ ctx->value.vu8 = mnl_attr_get_u8(val_attr); ++ break; ++ case MNL_TYPE_U16: ++ ctx->value.vu16 = mnl_attr_get_u16(val_attr); ++ break; ++ case MNL_TYPE_U32: ++ ctx->value.vu32 = mnl_attr_get_u32(val_attr); ++ break; ++ case MNL_TYPE_STRING: ++ ctx->value.vstr = mnl_attr_get_str(val_attr); ++ break; ++ case MNL_TYPE_FLAG: ++ ctx->value.vbool = val_attr ? true : false; ++ break; ++ } ++ break; ++ } ++ } ++ ctx->nla_type = nla_type; ++ return MNL_CB_OK; ++} ++ ++static int cmd_dev_param_set(struct dl *dl) ++{ ++ struct param_ctx ctx = {}; ++ struct nlmsghdr *nlh; ++ uint32_t val_u32; ++ uint16_t val_u16; ++ uint8_t val_u8; ++ bool val_bool; ++ int err; ++ ++ err = dl_argv_parse(dl, DL_OPT_HANDLE | ++ DL_OPT_PARAM_NAME | ++ DL_OPT_PARAM_VALUE | ++ DL_OPT_PARAM_CMODE, 0); ++ if (err) ++ return err; ++ ++ /* Get value type */ ++ nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PARAM_GET, ++ NLM_F_REQUEST | NLM_F_ACK); ++ dl_opts_put(nlh, dl); ++ ++ ctx.dl = dl; ++ err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dev_param_set_cb, &ctx); ++ if (err) ++ return err; ++ ++ nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PARAM_SET, ++ NLM_F_REQUEST | NLM_F_ACK); ++ dl_opts_put(nlh, dl); ++ ++ mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_TYPE, ctx.nla_type); ++ switch (ctx.nla_type) { ++ case MNL_TYPE_U8: ++ err = strtouint8_t(dl->opts.param_value, &val_u8); ++ if (err) ++ goto err_param_value_parse; ++ if (val_u8 == ctx.value.vu8) ++ return 0; ++ mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, val_u8); ++ break; ++ case MNL_TYPE_U16: ++ err = strtouint16_t(dl->opts.param_value, &val_u16); ++ if (err) ++ goto err_param_value_parse; ++ if (val_u16 == ctx.value.vu16) ++ return 0; ++ mnl_attr_put_u16(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, val_u16); ++ break; ++ case MNL_TYPE_U32: ++ err = strtouint32_t(dl->opts.param_value, &val_u32); ++ if (err) ++ goto err_param_value_parse; ++ if (val_u32 == ctx.value.vu32) ++ return 0; ++ mnl_attr_put_u32(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, val_u32); ++ break; ++ case MNL_TYPE_FLAG: ++ err = strtobool(dl->opts.param_value, &val_bool); ++ if (err) ++ goto err_param_value_parse; ++ if (val_bool == ctx.value.vbool) ++ return 0; ++ if (val_bool) ++ mnl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, ++ 0, NULL); ++ break; ++ case MNL_TYPE_STRING: ++ mnl_attr_put_strz(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, ++ dl->opts.param_value); ++ if (!strcmp(dl->opts.param_value, ctx.value.vstr)) ++ return 0; ++ break; ++ default: ++ printf("Value type not supported\n"); ++ return -ENOTSUP; ++ } ++ return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); ++ ++err_param_value_parse: ++ pr_err("Value \"%s\" is not a number or not within range\n", ++ dl->opts.param_value); ++ return err; ++} ++ ++static int cmd_dev_param_show(struct dl *dl) ++{ ++ uint16_t flags = NLM_F_REQUEST | NLM_F_ACK; ++ struct nlmsghdr *nlh; ++ int err; ++ ++ if (dl_argc(dl) == 0) ++ flags |= NLM_F_DUMP; ++ ++ nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PARAM_GET, flags); ++ ++ if (dl_argc(dl) > 0) { ++ err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE | ++ DL_OPT_PARAM_NAME, 0); ++ if (err) ++ return err; ++ } ++ ++ pr_out_section_start(dl, "param"); ++ err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dev_param_show_cb, dl); ++ pr_out_section_end(dl); ++ return err; ++} ++ ++static int cmd_dev_param(struct dl *dl) ++{ ++ if (dl_argv_match(dl, "help")) { ++ cmd_dev_help(); ++ return 0; ++ } else if (dl_argv_match(dl, "show") || ++ dl_argv_match(dl, "list") || dl_no_arg(dl)) { ++ dl_arg_inc(dl); ++ return cmd_dev_param_show(dl); ++ } else if (dl_argv_match(dl, "set")) { ++ dl_arg_inc(dl); ++ return cmd_dev_param_set(dl); ++ } ++ pr_err("Command \"%s\" not found\n", dl_argv(dl)); ++ return -ENOENT; ++} + static int cmd_dev_show_cb(const struct nlmsghdr *nlh, void *data) + { + struct dl *dl = data; +@@ -1643,6 +2074,9 @@ static int cmd_dev(struct dl *dl) + } else if (dl_argv_match(dl, "reload")) { + dl_arg_inc(dl); + return cmd_dev_reload(dl); ++ } else if (dl_argv_match(dl, "param")) { ++ dl_arg_inc(dl); ++ return cmd_dev_param(dl); + } + pr_err("Command \"%s\" not found\n", dl_argv(dl)); + return -ENOENT; +@@ -2586,6 +3020,10 @@ static const char *cmd_name(uint8_t cmd) + case DEVLINK_CMD_PORT_SET: return "set"; + case DEVLINK_CMD_PORT_NEW: return "net"; + case DEVLINK_CMD_PORT_DEL: return "del"; ++ case DEVLINK_CMD_PARAM_GET: return "get"; ++ case DEVLINK_CMD_PARAM_SET: return "set"; ++ case DEVLINK_CMD_PARAM_NEW: return "new"; ++ case DEVLINK_CMD_PARAM_DEL: return "del"; + default: return ""; + } + } +@@ -2604,6 +3042,11 @@ static const char *cmd_obj(uint8_t cmd) + case DEVLINK_CMD_PORT_NEW: + case DEVLINK_CMD_PORT_DEL: + return "port"; ++ case DEVLINK_CMD_PARAM_GET: ++ case DEVLINK_CMD_PARAM_SET: ++ case DEVLINK_CMD_PARAM_NEW: ++ case DEVLINK_CMD_PARAM_DEL: ++ return "param"; + default: return ""; + } + } +@@ -2660,6 +3103,17 @@ static int cmd_mon_show_cb(const struct nlmsghdr *nlh, void *data) + pr_out_mon_header(genl->cmd); + pr_out_port(dl, tb); + break; ++ case DEVLINK_CMD_PARAM_GET: /* fall through */ ++ case DEVLINK_CMD_PARAM_SET: /* fall through */ ++ case DEVLINK_CMD_PARAM_NEW: /* fall through */ ++ case DEVLINK_CMD_PARAM_DEL: ++ mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); ++ if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || ++ !tb[DEVLINK_ATTR_PARAM]) ++ return MNL_CB_ERROR; ++ pr_out_mon_header(genl->cmd); ++ pr_out_param(dl, tb, false); ++ break; + } + return MNL_CB_OK; + } +diff --git a/man/man8/devlink-dev.8 b/man/man8/devlink-dev.8 +index 7c749ddabaeeb..d985da172aa05 100644 +--- a/man/man8/devlink-dev.8 ++++ b/man/man8/devlink-dev.8 +@@ -42,6 +42,23 @@ devlink-dev \- devlink device configuration + .BR "devlink dev eswitch show" + .IR DEV + ++.ti -8 ++.BR "devlink dev param set" ++.IR DEV ++.BR name ++.IR PARAMETER ++.BR value ++.IR VALUE ++.BR cmode " { " runtime " | " driverinit " | " permanent " } " ++ ++.ti -8 ++.BR "devlink dev param show" ++.RI "[ " ++.IR DEV ++.BR name ++.IR PARAMETER ++.RI "]" ++ + .ti -8 + .BR "devlink dev reload" + .IR DEV +@@ -98,6 +115,36 @@ Set eswitch encapsulation support + .I enable + - Enable encapsulation support + ++.SS devlink dev param set - set new value to devlink device configuration parameter ++ ++.TP ++.BI name " PARAMETER" ++Specify parameter name to set. ++ ++.TP ++.BI value " VALUE" ++New value to set. ++ ++.TP ++.BR cmode " { " runtime " | " driverinit " | " permanent " } " ++Configuration mode in which the new value is set. ++ ++.I runtime ++- Set new value while driver is running. This configuration mode doesn't require any reset to apply the new value. ++ ++.I driverinit ++- Set new value which will be applied during driver initialization. This configuration mode requires restart driver by devlink reload command to apply the new value. ++ ++.I permanent ++- New value is written to device's non-volatile memory. This configuration mode requires hard reset to apply the new value. ++ ++.SS devlink dev param show - display devlink device supported configuration parameters attributes ++ ++.BR name ++.IR PARAMETER ++Specify parameter name to show. ++If this argument is omitted all parameters supported by devlink devices are listed. ++ + .SS devlink dev reload - perform hot reload of the driver. + + .PP +@@ -126,6 +173,16 @@ devlink dev eswitch set pci/0000:01:00.0 mode switchdev + Sets the eswitch mode of specified devlink device to switchdev. + .RE + .PP ++devlink dev param show pci/0000:01:00.0 name max_macs ++.RS 4 ++Shows the parameter max_macs attributes. ++.RE ++.PP ++devlink dev param set pci/0000:01:00.0 name internal_error_reset value true cmode runtime ++.RS 4 ++Sets the parameter internal_error_reset of specified devlink device to true. ++.RE ++.PP + devlink dev reload pci/0000:01:00.0 + .RS 4 + Performs hot reload of specified devlink device. +-- +2.20.1 + diff --git a/SOURCES/0060-man-ip-route.8-ssthresh-parameter-is-NUMBER.patch b/SOURCES/0060-man-ip-route.8-ssthresh-parameter-is-NUMBER.patch new file mode 100644 index 0000000..e1bbaf3 --- /dev/null +++ b/SOURCES/0060-man-ip-route.8-ssthresh-parameter-is-NUMBER.patch @@ -0,0 +1,38 @@ +From b6de96b49af2751003d67ed4edbf91fd25cee19c Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 25 Mar 2019 11:52:31 +0100 +Subject: [PATCH] man: ip-route.8: ssthresh parameter is NUMBER + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1593628 +Upstream Status: iproute2.git commit 79f49f58aaefe + +commit 79f49f58aaefe11f677c8e072557b834a19f47f3 +Author: Phil Sutter +Date: Thu Mar 22 15:00:38 2018 +0100 + + man: ip-route.8: ssthresh parameter is NUMBER + + Synopsis section was inconsistent with regards to help text and later + description of ssthresh parameter. + + Signed-off-by: Phil Sutter +--- + man/man8/ip-route.8.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in +index d9a547748c017..0616cf01740f3 100644 +--- a/man/man8/ip-route.8.in ++++ b/man/man8/ip-route.8.in +@@ -120,7 +120,7 @@ replace " } " + .B cwnd + .IR NUMBER " ] [ " + .B ssthresh +-.IR REALM " ] [ " ++.IR NUMBER " ] [ " + .B realms + .IR REALM " ] [ " + .B rto_min +-- +2.20.1 + diff --git a/SOURCES/0061-man-tc-vlan.8-Fix-for-incorrect-example.patch b/SOURCES/0061-man-tc-vlan.8-Fix-for-incorrect-example.patch new file mode 100644 index 0000000..b299549 --- /dev/null +++ b/SOURCES/0061-man-tc-vlan.8-Fix-for-incorrect-example.patch @@ -0,0 +1,39 @@ +From 58fe50b2e23c1b77ed93d242545d0f274f819681 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 25 Mar 2019 11:57:41 +0100 +Subject: [PATCH] man: tc-vlan.8: Fix for incorrect example + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1593630 +Upstream Status: iproute2.git commit 8ee38d833ccb1 + +commit 8ee38d833ccb1863f06634e12c5236b0ef7c2d76 +Author: Phil Sutter +Date: Fri Mar 23 21:18:56 2018 +0100 + + man: tc-vlan.8: Fix for incorrect example + + This has to be a second match statement to the same u32 filter, not a + second one (which tc-filter doesn't support at all). + + Signed-off-by: Phil Sutter + Signed-off-by: Stephen Hemminger +--- + man/man8/tc-vlan.8 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/man/man8/tc-vlan.8 b/man/man8/tc-vlan.8 +index af3de1c54e343..a526f66b60b4c 100644 +--- a/man/man8/tc-vlan.8 ++++ b/man/man8/tc-vlan.8 +@@ -103,7 +103,7 @@ into VLAN ID 123: + #tc qdisc add dev eth0 handle ffff: ingress + #tc filter add dev eth0 parent ffff: pref 11 protocol ip \\ + u32 match ip protocol 1 0xff flowid 1:1 \\ +- u32 match ip src 10.0.0.2 flowid 1:1 \\ ++ match ip src 10.0.0.2 flowid 1:1 \\ + action vlan push id 123 + .EE + .RE +-- +2.20.1 + diff --git a/SOURCES/0062-tc-flower-Add-support-for-QinQ.patch b/SOURCES/0062-tc-flower-Add-support-for-QinQ.patch new file mode 100644 index 0000000..f17a40b --- /dev/null +++ b/SOURCES/0062-tc-flower-Add-support-for-QinQ.patch @@ -0,0 +1,271 @@ +From dfbec1b67fc02a5af0d5cc30328b918902f20717 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 25 Mar 2019 12:19:05 +0100 +Subject: [PATCH] tc: flower: Add support for QinQ + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1642347 +Upstream Status: iproute2.git commit 1f0a5dfd388cd +Conflicts: context change due to missing commits + 7638ee13c1586 ("tc: flower: support for matching MPLS labels") + e28b88a464c49 ("tc: jsonify flower filter") + also adjust code to use fprintf instead of print_string due to + missing commit d0e720111aad2 ("ip: ipaddress.c: add support for json output") + +commit 1f0a5dfd388cd5c25f6a24247667e04b2346e568 +Author: Jianbo Liu +Date: Sat Jun 30 10:01:33 2018 +0000 + + tc: flower: Add support for QinQ + + To support matching on both outer and inner vlan headers, + we add new cvlan_id/cvlan_prio/cvlan_ethtype for inner vlan header. + + Example: + # tc filter add dev eth0 protocol 802.1ad parent ffff: \ + flower vlan_id 1000 vlan_ethtype 802.1q \ + cvlan_id 100 cvlan_ethtype ipv4 \ + action vlan pop \ + action vlan pop \ + action mirred egress redirect dev eth1 + + # tc filter show dev eth0 ingress + filter protocol 802.1ad pref 1 flower chain 0 + filter protocol 802.1ad pref 1 flower chain 0 handle 0x1 +   vlan_id 1000 +   vlan_ethtype 802.1Q +   cvlan_id 100 +   cvlan_ethtype ip +   eth_type ipv4 +   in_hw + + Signed-off-by: Jianbo Liu + Acked-by: Jiri Pirko + Signed-off-by: David Ahern +--- + man/man8/tc-flower.8 | 23 ++++++++++ + tc/f_flower.c | 99 ++++++++++++++++++++++++++++++++++++++------ + 2 files changed, 110 insertions(+), 12 deletions(-) + +diff --git a/man/man8/tc-flower.8 b/man/man8/tc-flower.8 +index af19708d9649e..387f73f5cd2e9 100644 +--- a/man/man8/tc-flower.8 ++++ b/man/man8/tc-flower.8 +@@ -29,6 +29,12 @@ flower \- flow based traffic control filter + .IR PRIORITY " | " + .BR vlan_ethtype " { " ipv4 " | " ipv6 " | " + .IR ETH_TYPE " } | " ++.B cvlan_id ++.IR VID " | " ++.B cvlan_prio ++.IR PRIORITY " | " ++.BR cvlan_ethtype " { " ipv4 " | " ipv6 " | " ++.IR ETH_TYPE " } | " + .BR ip_proto " { " tcp " | " udp " | " sctp " | " icmp " | " icmpv6 " | " + .IR IP_PROTO " } | " + .B ip_tos +@@ -121,6 +127,23 @@ Match on layer three protocol. + .I VLAN_ETH_TYPE + may be either + .BR ipv4 ", " ipv6 ++or an unsigned 16bit value in hexadecimal format. To match on QinQ packet, it must be 802.1Q or 802.1AD. ++.TP ++.BI cvlan_id " VID" ++Match on QinQ inner vlan tag id. ++.I VID ++is an unsigned 12bit value in decimal format. ++.TP ++.BI cvlan_prio " PRIORITY" ++Match on QinQ inner vlan tag priority. ++.I PRIORITY ++is an unsigned 3bit value in decimal format. ++.TP ++.BI cvlan_ethtype " VLAN_ETH_TYPE" ++Match on QinQ layer three protocol. ++.I VLAN_ETH_TYPE ++may be either ++.BR ipv4 ", " ipv6 + or an unsigned 16bit value in hexadecimal format. + .TP + .BI ip_proto " IP_PROTO" +diff --git a/tc/f_flower.c b/tc/f_flower.c +index 5f5236ca523f8..40dcfbd687a20 100644 +--- a/tc/f_flower.c ++++ b/tc/f_flower.c +@@ -50,6 +50,9 @@ static void explain(void) + " vlan_id VID |\n" + " vlan_prio PRIORITY |\n" + " vlan_ethtype [ ipv4 | ipv6 | ETH-TYPE ] |\n" ++ " cvlan_id VID |\n" ++ " cvlan_prio PRIORITY |\n" ++ " cvlan_ethtype [ ipv4 | ipv6 | ETH-TYPE ] |\n" + " dst_mac MASKED-LLADDR |\n" + " src_mac MASKED-LLADDR |\n" + " ip_proto [tcp | udp | sctp | icmp | icmpv6 | IP-PROTO ] |\n" +@@ -126,15 +129,21 @@ err: + return err; + } + ++static bool eth_type_vlan(__be16 ethertype) ++{ ++ return ethertype == htons(ETH_P_8021Q) || ++ ethertype == htons(ETH_P_8021AD); ++} ++ + static int flower_parse_vlan_eth_type(char *str, __be16 eth_type, int type, + __be16 *p_vlan_eth_type, + struct nlmsghdr *n) + { + __be16 vlan_eth_type; + +- if (eth_type != htons(ETH_P_8021Q)) { +- fprintf(stderr, +- "Can't set \"vlan_ethtype\" if ethertype isn't 802.1Q\n"); ++ if (!eth_type_vlan(eth_type)) { ++ fprintf(stderr, "Can't set \"%s\" if ethertype isn't 802.1Q or 802.1AD\n", ++ type == TCA_FLOWER_KEY_VLAN_ETH_TYPE ? "vlan_ethtype" : "cvlan_ethtype"); + return -1; + } + +@@ -583,6 +592,7 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, + struct rtattr *tail; + __be16 eth_type = TC_H_MIN(t->tcm_info); + __be16 vlan_ethtype = 0; ++ __be16 cvlan_ethtype = 0; + __u8 ip_proto = 0xff; + __u32 flags = 0; + __u32 mtf = 0; +@@ -640,9 +650,8 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, + __u16 vid; + + NEXT_ARG(); +- if (eth_type != htons(ETH_P_8021Q)) { +- fprintf(stderr, +- "Can't set \"vlan_id\" if ethertype isn't 802.1Q\n"); ++ if (!eth_type_vlan(eth_type)) { ++ fprintf(stderr, "Can't set \"vlan_id\" if ethertype isn't 802.1Q or 802.1AD\n"); + return -1; + } + ret = get_u16(&vid, *argv, 10); +@@ -655,9 +664,8 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, + __u8 vlan_prio; + + NEXT_ARG(); +- if (eth_type != htons(ETH_P_8021Q)) { +- fprintf(stderr, +- "Can't set \"vlan_prio\" if ethertype isn't 802.1Q\n"); ++ if (!eth_type_vlan(eth_type)) { ++ fprintf(stderr, "Can't set \"vlan_prio\" if ethertype isn't 802.1Q or 802.1AD\n"); + return -1; + } + ret = get_u8(&vlan_prio, *argv, 10); +@@ -674,6 +682,42 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, + &vlan_ethtype, n); + if (ret < 0) + return -1; ++ } else if (matches(*argv, "cvlan_id") == 0) { ++ __u16 vid; ++ ++ NEXT_ARG(); ++ if (!eth_type_vlan(vlan_ethtype)) { ++ fprintf(stderr, "Can't set \"cvlan_id\" if inner vlan ethertype isn't 802.1Q or 802.1AD\n"); ++ return -1; ++ } ++ ret = get_u16(&vid, *argv, 10); ++ if (ret < 0 || vid & ~0xfff) { ++ fprintf(stderr, "Illegal \"cvlan_id\"\n"); ++ return -1; ++ } ++ addattr16(n, MAX_MSG, TCA_FLOWER_KEY_CVLAN_ID, vid); ++ } else if (matches(*argv, "cvlan_prio") == 0) { ++ __u8 cvlan_prio; ++ ++ NEXT_ARG(); ++ if (!eth_type_vlan(vlan_ethtype)) { ++ fprintf(stderr, "Can't set \"cvlan_prio\" if inner vlan ethertype isn't 802.1Q or 802.1AD\n"); ++ return -1; ++ } ++ ret = get_u8(&cvlan_prio, *argv, 10); ++ if (ret < 0 || cvlan_prio & ~0x7) { ++ fprintf(stderr, "Illegal \"cvlan_prio\"\n"); ++ return -1; ++ } ++ addattr8(n, MAX_MSG, ++ TCA_FLOWER_KEY_CVLAN_PRIO, cvlan_prio); ++ } else if (matches(*argv, "cvlan_ethtype") == 0) { ++ NEXT_ARG(); ++ ret = flower_parse_vlan_eth_type(*argv, vlan_ethtype, ++ TCA_FLOWER_KEY_CVLAN_ETH_TYPE, ++ &cvlan_ethtype, n); ++ if (ret < 0) ++ return -1; + } else if (matches(*argv, "dst_mac") == 0) { + NEXT_ARG(); + ret = flower_parse_eth_addr(*argv, +@@ -696,7 +740,8 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, + } + } else if (matches(*argv, "ip_proto") == 0) { + NEXT_ARG(); +- ret = flower_parse_ip_proto(*argv, vlan_ethtype ? ++ ret = flower_parse_ip_proto(*argv, cvlan_ethtype ? ++ cvlan_ethtype : vlan_ethtype ? + vlan_ethtype : eth_type, + TCA_FLOWER_KEY_IP_PROTO, + &ip_proto, n); +@@ -726,7 +771,8 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, + } + } else if (matches(*argv, "dst_ip") == 0) { + NEXT_ARG(); +- ret = flower_parse_ip_addr(*argv, vlan_ethtype ? ++ ret = flower_parse_ip_addr(*argv, cvlan_ethtype ? ++ cvlan_ethtype : vlan_ethtype ? + vlan_ethtype : eth_type, + TCA_FLOWER_KEY_IPV4_DST, + TCA_FLOWER_KEY_IPV4_DST_MASK, +@@ -739,7 +785,8 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, + } + } else if (matches(*argv, "src_ip") == 0) { + NEXT_ARG(); +- ret = flower_parse_ip_addr(*argv, vlan_ethtype ? ++ ret = flower_parse_ip_addr(*argv, cvlan_ethtype ? ++ cvlan_ethtype : vlan_ethtype ? + vlan_ethtype : eth_type, + TCA_FLOWER_KEY_IPV4_SRC, + TCA_FLOWER_KEY_IPV4_SRC_MASK, +@@ -1234,6 +1281,34 @@ static int flower_print_opt(struct filter_util *qu, FILE *f, + fprintf(f, "\n vlan_prio %d", rta_getattr_u8(attr)); + } + ++ if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) { ++ SPRINT_BUF(buf); ++ struct rtattr *attr = tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]; ++ ++ fprintf(f, "\n vlan_ethtype %s", ++ ll_proto_n2a(rta_getattr_u16(attr), buf, sizeof(buf))); ++ } ++ ++ if (tb[TCA_FLOWER_KEY_CVLAN_ID]) { ++ struct rtattr *attr = tb[TCA_FLOWER_KEY_CVLAN_ID]; ++ ++ fprintf(f, "\n cvlan_id %u", rta_getattr_u16(attr)); ++ } ++ ++ if (tb[TCA_FLOWER_KEY_CVLAN_PRIO]) { ++ struct rtattr *attr = tb[TCA_FLOWER_KEY_CVLAN_PRIO]; ++ ++ fprintf(f, "\n cvlan_prio %d", rta_getattr_u8(attr)); ++ } ++ ++ if (tb[TCA_FLOWER_KEY_CVLAN_ETH_TYPE]) { ++ SPRINT_BUF(buf); ++ struct rtattr *attr = tb[TCA_FLOWER_KEY_CVLAN_ETH_TYPE]; ++ ++ fprintf(f, "\n cvlan_ethtype %s", ++ ll_proto_n2a(rta_getattr_u16(attr), buf, sizeof(buf))); ++ } ++ + flower_print_eth_addr(f, "dst_mac", tb[TCA_FLOWER_KEY_ETH_DST], + tb[TCA_FLOWER_KEY_ETH_DST_MASK]); + flower_print_eth_addr(f, "src_mac", tb[TCA_FLOWER_KEY_ETH_SRC], +-- +2.20.1 + diff --git a/SOURCES/0063-utils-Move-BIT-macro-to-common-header.patch b/SOURCES/0063-utils-Move-BIT-macro-to-common-header.patch new file mode 100644 index 0000000..6bfee1c --- /dev/null +++ b/SOURCES/0063-utils-Move-BIT-macro-to-common-header.patch @@ -0,0 +1,61 @@ +From 42a457a97118379936cdeb20eef1d116e858d4c1 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 25 Mar 2019 13:30:09 +0100 +Subject: [PATCH] utils: Move BIT macro to common header + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1642479 +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1641914 +Upstream Status: iproute2.git commit afdc119410efe + +commit afdc119410efe2a5e826c660446b1e4e1a72793d +Author: Leon Romanovsky +Date: Sun Aug 20 12:58:21 2017 +0300 + + utils: Move BIT macro to common header + + BIT() macro was implemented and used by devlink for now, but following + patches of rdmatool will reuse the same macro, so put it in common + header file. + + Signed-off-by: Leon Romanovsky +--- + devlink/devlink.c | 2 +- + include/utils.h | 2 ++ + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/devlink/devlink.c b/devlink/devlink.c +index 92e78c9c8d9f6..2000db81aabb0 100644 +--- a/devlink/devlink.c ++++ b/devlink/devlink.c +@@ -25,6 +25,7 @@ + #include "list.h" + #include "mnlg.h" + #include "json_writer.h" ++#include "utils.h" + + #define ESWITCH_MODE_LEGACY "legacy" + #define ESWITCH_MODE_SWITCHDEV "switchdev" +@@ -164,7 +165,6 @@ static void ifname_map_free(struct ifname_map *ifname_map) + free(ifname_map); + } + +-#define BIT(nr) (1UL << (nr)) + #define DL_OPT_HANDLE BIT(0) + #define DL_OPT_HANDLEP BIT(1) + #define DL_OPT_PORT_TYPE BIT(2) +diff --git a/include/utils.h b/include/utils.h +index 8c12e1e2a60c2..d707a9dacdb85 100644 +--- a/include/utils.h ++++ b/include/utils.h +@@ -208,6 +208,8 @@ static inline void __jiffies_to_tv(struct timeval *tv, unsigned long jiffies) + int print_timestamp(FILE *fp); + void print_nlmsg_timestamp(FILE *fp, const struct nlmsghdr *n); + ++#define BIT(nr) (1UL << (nr)) ++ + #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + + #define BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)])) +-- +2.20.1 + diff --git a/SOURCES/0064-lib-make-resolve_hosts-variable-common.patch b/SOURCES/0064-lib-make-resolve_hosts-variable-common.patch new file mode 100644 index 0000000..529ffcb --- /dev/null +++ b/SOURCES/0064-lib-make-resolve_hosts-variable-common.patch @@ -0,0 +1,137 @@ +From c74a0bcb2e9c88b2ee166afc08574141a6a288b8 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 25 Mar 2019 13:30:43 +0100 +Subject: [PATCH] lib: make resolve_hosts variable common + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1642479 +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1641914 +Upstream Status: iproute2.git commit 6648853975332 + +commit 6648853975332e5f34d03a1e2a6e09f5e1742099 +Author: Ivan Vecera +Date: Fri Nov 10 07:20:13 2017 +0100 + + lib: make resolve_hosts variable common + + Any iproute utility that uses any function from lib/utils.c needs + to declare its own resolve_hosts variable instance although it does + not need/use hostname resolving functionality (currently only 'ip' + and 'ss' commands uses this). + The patch declares single common instance of resolve_hosts directly + in utils.c so the existing ones can be removed (the same approach + that is used for timestamp_short). + + Cc: Jiri Pirko + Cc: Arkadi Sharshevsky + Signed-off-by: Ivan Vecera +--- + bridge/bridge.c | 1 - + genl/genl.c | 1 - + ip/ip.c | 1 - + ip/rtmon.c | 1 - + lib/utils.c | 1 + + misc/arpd.c | 2 -- + misc/ss.c | 1 - + tc/tc.c | 1 - + 8 files changed, 1 insertion(+), 8 deletions(-) + +diff --git a/bridge/bridge.c b/bridge/bridge.c +index 5ff038d672ad2..6658cb8fd801d 100644 +--- a/bridge/bridge.c ++++ b/bridge/bridge.c +@@ -18,7 +18,6 @@ + + struct rtnl_handle rth = { .fd = -1 }; + int preferred_family = AF_UNSPEC; +-int resolve_hosts; + int oneline; + int show_stats; + int show_details; +diff --git a/genl/genl.c b/genl/genl.c +index 747074b029a7b..7e4a208d449f2 100644 +--- a/genl/genl.c ++++ b/genl/genl.c +@@ -30,7 +30,6 @@ + int show_stats = 0; + int show_details = 0; + int show_raw = 0; +-int resolve_hosts = 0; + + static void *BODY; + static struct genl_util * genl_list; +diff --git a/ip/ip.c b/ip/ip.c +index 07050b07592ac..0c0ad1bde7cb6 100644 +--- a/ip/ip.c ++++ b/ip/ip.c +@@ -30,7 +30,6 @@ int human_readable; + int use_iec; + int show_stats; + int show_details; +-int resolve_hosts; + int oneline; + int brief; + int timestamp; +diff --git a/ip/rtmon.c b/ip/rtmon.c +index 1c2981f79d3d1..94baa38e3b7cb 100644 +--- a/ip/rtmon.c ++++ b/ip/rtmon.c +@@ -25,7 +25,6 @@ + #include "utils.h" + #include "libnetlink.h" + +-int resolve_hosts; + static int init_phase = 1; + + static void write_stamp(FILE *fp) +diff --git a/lib/utils.c b/lib/utils.c +index 9f55391d3c1ea..fc9c575ba0c7d 100644 +--- a/lib/utils.c ++++ b/lib/utils.c +@@ -35,6 +35,7 @@ + #include "utils.h" + #include "namespace.h" + ++int resolve_hosts; + int timestamp_short; + + int get_hex(char c) +diff --git a/misc/arpd.c b/misc/arpd.c +index bfab44544ee1d..c9d86475e5995 100644 +--- a/misc/arpd.c ++++ b/misc/arpd.c +@@ -38,8 +38,6 @@ + #include "utils.h" + #include "rt_names.h" + +-int resolve_hosts; +- + DB *dbase; + char *dbname = "/var/lib/arpd/arpd.db"; + +diff --git a/misc/ss.c b/misc/ss.c +index e92266539e6b5..c0cb33e96d9ec 100644 +--- a/misc/ss.c ++++ b/misc/ss.c +@@ -89,7 +89,6 @@ static int security_get_initial_context(char *name, char **context) + } + #endif + +-int resolve_hosts; + int resolve_services = 1; + int preferred_family = AF_UNSPEC; + int show_options; +diff --git a/tc/tc.c b/tc/tc.c +index 360c9f11c235b..11a364fabbbea 100644 +--- a/tc/tc.c ++++ b/tc/tc.c +@@ -39,7 +39,6 @@ int show_graph; + int timestamp; + + int batch_mode; +-int resolve_hosts; + int use_iec; + int force; + int ok; +-- +2.20.1 + diff --git a/SOURCES/0065-json_writer-add-new-json-handlers-null-float-with-fo.patch b/SOURCES/0065-json_writer-add-new-json-handlers-null-float-with-fo.patch new file mode 100644 index 0000000..cb5f307 --- /dev/null +++ b/SOURCES/0065-json_writer-add-new-json-handlers-null-float-with-fo.patch @@ -0,0 +1,161 @@ +From b4b11394d071810d694b66962e8d48cb866af473 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 25 Mar 2019 13:30:53 +0100 +Subject: [PATCH] json_writer: add new json handlers (null, float with format, + lluint, hu) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1642479 +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1641914 +Upstream Status: iproute2.git commit 7252f16b2d191 + +commit 7252f16b2d1919b31f0a2ec094dd3516b1e33a55 +Author: Julien Fortin +Date: Thu Aug 17 10:35:50 2017 -0700 + + json_writer: add new json handlers (null, float with format, lluint, hu) + + Signed-off-by: Julien Fortin +--- + include/json_writer.h | 9 +++++++++ + lib/json_writer.c | 44 +++++++++++++++++++++++++++++++++++++++---- + 2 files changed, 49 insertions(+), 4 deletions(-) + +diff --git a/include/json_writer.h b/include/json_writer.h +index ab9a008a67994..1516aafba59df 100644 +--- a/include/json_writer.h ++++ b/include/json_writer.h +@@ -33,20 +33,29 @@ void jsonw_pretty(json_writer_t *self, bool on); + void jsonw_name(json_writer_t *self, const char *name); + + /* Add value */ ++void jsonw_printf(json_writer_t *self, const char *fmt, ...); + void jsonw_string(json_writer_t *self, const char *value); + void jsonw_bool(json_writer_t *self, bool value); + void jsonw_float(json_writer_t *self, double number); ++void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num); + void jsonw_uint(json_writer_t *self, uint64_t number); ++void jsonw_hu(json_writer_t *self, unsigned short number); + void jsonw_int(json_writer_t *self, int64_t number); + void jsonw_null(json_writer_t *self); ++void jsonw_lluint(json_writer_t *self, unsigned long long int num); + + /* Useful Combinations of name and value */ + void jsonw_string_field(json_writer_t *self, const char *prop, const char *val); + void jsonw_bool_field(json_writer_t *self, const char *prop, bool value); + void jsonw_float_field(json_writer_t *self, const char *prop, double num); + void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num); ++void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num); + void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num); + void jsonw_null_field(json_writer_t *self, const char *prop); ++void jsonw_lluint_field(json_writer_t *self, const char *prop, ++ unsigned long long int num); ++void jsonw_float_field_fmt(json_writer_t *self, const char *prop, ++ const char *fmt, double val); + + /* Collections */ + void jsonw_start_object(json_writer_t *self); +diff --git a/lib/json_writer.c b/lib/json_writer.c +index 9fc05e96b605f..6b77d288cce2b 100644 +--- a/lib/json_writer.c ++++ b/lib/json_writer.c +@@ -156,7 +156,7 @@ void jsonw_name(json_writer_t *self, const char *name) + putc(' ', self->out); + } + +-static void jsonw_printf(json_writer_t *self, const char *fmt, ...) ++void jsonw_printf(json_writer_t *self, const char *fmt, ...) + { + va_list ap; + +@@ -199,23 +199,38 @@ void jsonw_bool(json_writer_t *self, bool val) + jsonw_printf(self, "%s", val ? "true" : "false"); + } + +-#ifdef notused + void jsonw_null(json_writer_t *self) + { + jsonw_printf(self, "null"); + } + ++void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num) ++{ ++ jsonw_printf(self, fmt, num); ++} ++ ++#ifdef notused + void jsonw_float(json_writer_t *self, double num) + { + jsonw_printf(self, "%g", num); + } + #endif + ++void jsonw_hu(json_writer_t *self, unsigned short num) ++{ ++ jsonw_printf(self, "%hu", num); ++} ++ + void jsonw_uint(json_writer_t *self, uint64_t num) + { + jsonw_printf(self, "%"PRIu64, num); + } + ++void jsonw_lluint(json_writer_t *self, unsigned long long int num) ++{ ++ jsonw_printf(self, "%llu", num); ++} ++ + void jsonw_int(json_writer_t *self, int64_t num) + { + jsonw_printf(self, "%"PRId64, num); +@@ -242,25 +257,46 @@ void jsonw_float_field(json_writer_t *self, const char *prop, double val) + } + #endif + ++void jsonw_float_field_fmt(json_writer_t *self, ++ const char *prop, ++ const char *fmt, ++ double val) ++{ ++ jsonw_name(self, prop); ++ jsonw_float_fmt(self, fmt, val); ++} ++ + void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num) + { + jsonw_name(self, prop); + jsonw_uint(self, num); + } + ++void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num) ++{ ++ jsonw_name(self, prop); ++ jsonw_hu(self, num); ++} ++ ++void jsonw_lluint_field(json_writer_t *self, ++ const char *prop, ++ unsigned long long int num) ++{ ++ jsonw_name(self, prop); ++ jsonw_lluint(self, num); ++} ++ + void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num) + { + jsonw_name(self, prop); + jsonw_int(self, num); + } + +-#ifdef notused + void jsonw_null_field(json_writer_t *self, const char *prop) + { + jsonw_name(self, prop); + jsonw_null(self); + } +-#endif + + #ifdef TEST + int main(int argc, char **argv) +-- +2.20.1 + diff --git a/SOURCES/0066-rdma-Add-MR-resource-tracking-information.patch b/SOURCES/0066-rdma-Add-MR-resource-tracking-information.patch new file mode 100644 index 0000000..76c39d0 --- /dev/null +++ b/SOURCES/0066-rdma-Add-MR-resource-tracking-information.patch @@ -0,0 +1,90 @@ +From a7f1b85b6838bdab705aef188bb0c86626bc3391 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 25 Mar 2019 13:31:02 +0100 +Subject: [PATCH] rdma: Add MR resource tracking information + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1642479 +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1641914 +Upstream Status: iproute2.git commit 8958a15c040e0 +Conflicts: remove rdma chunks due to missing files + +commit 8958a15c040e05f4f2c6f3946322202fdb875348 +Author: Steve Wise +Date: Thu Mar 29 09:10:41 2018 -0700 + + rdma: Add MR resource tracking information + + Sample output: + + Without CAP_NET_ADMIN: + + $ rdma resource show mr mrlen 65536 + dev mlx4_0 mrlen 65536 pid 0 comm [nvme_rdma] + dev cxgb4_0 mrlen 65536 pid 0 comm [nvme_rdma] + + With CAP_NET_ADMIN: + + # rdma resource show mr mrlen 65536 + dev mlx4_0 rkey 0x12702 lkey 0x12702 iova 0x85724a000 mrlen 65536 pid 0 comm [nvme_rdma] + dev cxgb4_0 rkey 0x68fe4e9 lkey 0x68fe4e9 iova 0x835b91000 mrlen 65536 pid 0 comm [nvme_rdma] + + Signed-off-by: Steve Wise + Reviewed-by: Leon Romanovsky + Signed-off-by: David Ahern +--- + include/json_writer.h | 2 ++ + lib/json_writer.c | 11 +++++++++++ + 2 files changed, 13 insertions(+) + +diff --git a/include/json_writer.h b/include/json_writer.h +index 1516aafba59df..34f2ccc2f5423 100644 +--- a/include/json_writer.h ++++ b/include/json_writer.h +@@ -39,6 +39,7 @@ void jsonw_bool(json_writer_t *self, bool value); + void jsonw_float(json_writer_t *self, double number); + void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num); + void jsonw_uint(json_writer_t *self, uint64_t number); ++void jsonw_xint(json_writer_t *self, uint64_t number); + void jsonw_hu(json_writer_t *self, unsigned short number); + void jsonw_int(json_writer_t *self, int64_t number); + void jsonw_null(json_writer_t *self); +@@ -49,6 +50,7 @@ void jsonw_string_field(json_writer_t *self, const char *prop, const char *val); + void jsonw_bool_field(json_writer_t *self, const char *prop, bool value); + void jsonw_float_field(json_writer_t *self, const char *prop, double num); + void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num); ++void jsonw_xint_field(json_writer_t *self, const char *prop, uint64_t num); + void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num); + void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num); + void jsonw_null_field(json_writer_t *self, const char *prop); +diff --git a/lib/json_writer.c b/lib/json_writer.c +index 6b77d288cce2b..6aaa6b4170711 100644 +--- a/lib/json_writer.c ++++ b/lib/json_writer.c +@@ -226,6 +226,11 @@ void jsonw_uint(json_writer_t *self, uint64_t num) + jsonw_printf(self, "%"PRIu64, num); + } + ++void jsonw_xint(json_writer_t *self, uint64_t num) ++{ ++ jsonw_printf(self, "%"PRIx64, num); ++} ++ + void jsonw_lluint(json_writer_t *self, unsigned long long int num) + { + jsonw_printf(self, "%llu", num); +@@ -272,6 +277,12 @@ void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num) + jsonw_uint(self, num); + } + ++void jsonw_xint_field(json_writer_t *self, const char *prop, uint64_t num) ++{ ++ jsonw_name(self, prop); ++ jsonw_xint(self, num); ++} ++ + void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num) + { + jsonw_name(self, prop); +-- +2.20.1 + diff --git a/SOURCES/0067-rdma-add-infrastructure-for-RDMA-tool.patch b/SOURCES/0067-rdma-add-infrastructure-for-RDMA-tool.patch new file mode 100644 index 0000000..e30744c --- /dev/null +++ b/SOURCES/0067-rdma-add-infrastructure-for-RDMA-tool.patch @@ -0,0 +1,5287 @@ +From 6ca04b58fcbaeaa5c8848e77ae0cfcf8b5f4c9ab Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 25 Mar 2019 13:31:34 +0100 +Subject: [PATCH] rdma: add infrastructure for RDMA tool + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1642479 +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1641914 +Upstream Status: RHEL-only + +commit de87313c8cd0399fd803fcaa8dfaa4aa27912f79 +Author: Andrea Claudi +Date: Thu Mar 21 17:24:12 2019 +0100 + + rdma: add infrastructure for RDMA tool + + Checkout to the v5.0.0 upstream tag. + + Conflicts: + - add rdma on base Makefile + - fix config path and libmnl cflags and libs on rdma/Makefile + + Signed-off-by: Andrea Claudi +--- + Makefile | 2 +- + rdma/.gitignore | 1 + + rdma/Makefile | 27 + + rdma/dev.c | 312 ++++++ + rdma/include/uapi/rdma/ib_user_sa.h | 77 ++ + rdma/include/uapi/rdma/ib_user_verbs.h | 1302 ++++++++++++++++++++++++ + rdma/include/uapi/rdma/rdma_netlink.h | 438 ++++++++ + rdma/include/uapi/rdma/rdma_user_cm.h | 324 ++++++ + rdma/link.c | 355 +++++++ + rdma/rdma.c | 203 ++++ + rdma/rdma.h | 131 +++ + rdma/res.c | 1111 ++++++++++++++++++++ + rdma/utils.c | 868 ++++++++++++++++ + 13 files changed, 5150 insertions(+), 1 deletion(-) + create mode 100644 rdma/.gitignore + create mode 100644 rdma/Makefile + create mode 100644 rdma/dev.c + create mode 100644 rdma/include/uapi/rdma/ib_user_sa.h + create mode 100644 rdma/include/uapi/rdma/ib_user_verbs.h + create mode 100644 rdma/include/uapi/rdma/rdma_netlink.h + create mode 100644 rdma/include/uapi/rdma/rdma_user_cm.h + create mode 100644 rdma/link.c + create mode 100644 rdma/rdma.c + create mode 100644 rdma/rdma.h + create mode 100644 rdma/res.c + create mode 100644 rdma/utils.c + +diff --git a/Makefile b/Makefile +index df2fa33630e65..aea12423166cd 100644 +--- a/Makefile ++++ b/Makefile +@@ -52,7 +52,7 @@ WFLAGS += -Wmissing-declarations -Wold-style-definition -Wformat=2 + CFLAGS := $(WFLAGS) $(CCOPTS) -I../include -I../include/uapi $(DEFINES) $(CFLAGS) + YACCFLAGS = -d -t -v + +-SUBDIRS=lib ip tc bridge misc netem genl tipc devlink man ++SUBDIRS=lib ip tc bridge misc netem genl tipc devlink rdma man + + LIBNETLINK=../lib/libnetlink.a ../lib/libutil.a + LDLIBS += $(LIBNETLINK) +diff --git a/rdma/.gitignore b/rdma/.gitignore +new file mode 100644 +index 0000000000000..51fb172baa216 +--- /dev/null ++++ b/rdma/.gitignore +@@ -0,0 +1 @@ ++rdma +diff --git a/rdma/Makefile b/rdma/Makefile +new file mode 100644 +index 0000000000000..0830c82f77edb +--- /dev/null ++++ b/rdma/Makefile +@@ -0,0 +1,27 @@ ++# SPDX-License-Identifier: GPL-2.0 ++include ../Config ++ ++TARGETS := ++ ++ifeq ($(HAVE_MNL),y) ++CFLAGS += -I./include/uapi/ ++CFLAGS += $(shell $(PKG_CONFIG) libmnl --cflags) ++LDLIBS += $(shell $(PKG_CONFIG) libmnl --libs) ++ ++RDMA_OBJ = rdma.o utils.o dev.o link.o res.o ++ ++TARGETS += rdma ++endif ++ ++all: $(TARGETS) $(LIBS) ++ ++rdma: $(RDMA_OBJ) $(LIBS) ++ $(QUIET_LINK)$(CC) $^ $(LDFLAGS) $(LDLIBS) -o $@ ++ ++install: all ++ for i in $(TARGETS); \ ++ do install -m 0755 $$i $(DESTDIR)$(SBINDIR); \ ++ done ++ ++clean: ++ rm -f $(RDMA_OBJ) $(TARGETS) +diff --git a/rdma/dev.c b/rdma/dev.c +new file mode 100644 +index 0000000000000..60ff4b31e3204 +--- /dev/null ++++ b/rdma/dev.c +@@ -0,0 +1,312 @@ ++/* ++ * dev.c RDMA tool ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * Authors: Leon Romanovsky ++ */ ++ ++#include "rdma.h" ++ ++static int dev_help(struct rd *rd) ++{ ++ pr_out("Usage: %s dev show [DEV]\n", rd->filename); ++ pr_out(" %s dev set [DEV] name DEVNAME\n", rd->filename); ++ return 0; ++} ++ ++static const char *dev_caps_to_str(uint32_t idx) ++{ ++#define RDMA_DEV_FLAGS_LOW(x) \ ++ x(RESIZE_MAX_WR, 0) \ ++ x(BAD_PKEY_CNTR, 1) \ ++ x(BAD_QKEY_CNTR, 2) \ ++ x(RAW_MULTI, 3) \ ++ x(AUTO_PATH_MIG, 4) \ ++ x(CHANGE_PHY_PORT, 5) \ ++ x(UD_AV_PORT_ENFORCE_PORT_ENFORCE, 6) \ ++ x(CURR_QP_STATE_MOD, 7) \ ++ x(SHUTDOWN_PORT, 8) \ ++ x(INIT_TYPE, 9) \ ++ x(PORT_ACTIVE_EVENT, 10) \ ++ x(SYS_IMAGE_GUID, 11) \ ++ x(RC_RNR_NAK_GEN, 12) \ ++ x(SRQ_RESIZE, 13) \ ++ x(N_NOTIFY_CQ, 14) \ ++ x(LOCAL_DMA_LKEY, 15) \ ++ x(MEM_WINDOW, 17) \ ++ x(UD_IP_CSUM, 18) \ ++ x(UD_TSO, 19) \ ++ x(XRC, 20) \ ++ x(MEM_MGT_EXTENSIONS, 21) \ ++ x(BLOCK_MULTICAST_LOOPBACK, 22) \ ++ x(MEM_WINDOW_TYPE_2A, 23) \ ++ x(MEM_WINDOW_TYPE_2B, 24) \ ++ x(RC_IP_CSUM, 25) \ ++ x(RAW_IP_CSUM, 26) \ ++ x(CROSS_CHANNEL, 27) \ ++ x(MANAGED_FLOW_STEERING, 29) \ ++ x(SIGNATURE_HANDOVER, 30) \ ++ x(ON_DEMAND_PAGING, 31) ++ ++#define RDMA_DEV_FLAGS_HIGH(x) \ ++ x(SG_GAPS_REG, 0) \ ++ x(VIRTUAL_FUNCTION, 1) \ ++ x(RAW_SCATTER_FCS, 2) \ ++ x(RDMA_NETDEV_OPA_VNIC, 3) \ ++ x(PCI_WRITE_END_PADDING, 4) ++ ++ /* ++ * Separation below is needed to allow compilation of rdmatool ++ * on 32bits systems. On such systems, C-enum is limited to be ++ * int and can't hold more than 32 bits. ++ */ ++ enum { RDMA_DEV_FLAGS_LOW(RDMA_BITMAP_ENUM) }; ++ enum { RDMA_DEV_FLAGS_HIGH(RDMA_BITMAP_ENUM) }; ++ ++ static const char * const ++ rdma_dev_names_low[] = { RDMA_DEV_FLAGS_LOW(RDMA_BITMAP_NAMES) }; ++ static const char * const ++ rdma_dev_names_high[] = { RDMA_DEV_FLAGS_HIGH(RDMA_BITMAP_NAMES) }; ++ uint32_t high_idx; ++ #undef RDMA_DEV_FLAGS_LOW ++ #undef RDMA_DEV_FLAGS_HIGH ++ ++ if (idx < ARRAY_SIZE(rdma_dev_names_low) && rdma_dev_names_low[idx]) ++ return rdma_dev_names_low[idx]; ++ ++ high_idx = idx - ARRAY_SIZE(rdma_dev_names_low); ++ if (high_idx < ARRAY_SIZE(rdma_dev_names_high) && ++ rdma_dev_names_high[high_idx]) ++ return rdma_dev_names_high[high_idx]; ++ ++ return "UNKNOWN"; ++} ++ ++static void dev_print_caps(struct rd *rd, struct nlattr **tb) ++{ ++ uint64_t caps; ++ uint32_t idx; ++ ++ if (!tb[RDMA_NLDEV_ATTR_CAP_FLAGS]) ++ return; ++ ++ caps = mnl_attr_get_u64(tb[RDMA_NLDEV_ATTR_CAP_FLAGS]); ++ ++ if (rd->json_output) { ++ jsonw_name(rd->jw, "caps"); ++ jsonw_start_array(rd->jw); ++ } else { ++ pr_out("\n caps: <"); ++ } ++ for (idx = 0; caps; idx++) { ++ if (caps & 0x1) { ++ if (rd->json_output) { ++ jsonw_string(rd->jw, dev_caps_to_str(idx)); ++ } else { ++ pr_out("%s", dev_caps_to_str(idx)); ++ if (caps >> 0x1) ++ pr_out(", "); ++ } ++ } ++ caps >>= 0x1; ++ } ++ ++ if (rd->json_output) ++ jsonw_end_array(rd->jw); ++ else ++ pr_out(">"); ++} ++ ++static void dev_print_fw(struct rd *rd, struct nlattr **tb) ++{ ++ const char *str; ++ if (!tb[RDMA_NLDEV_ATTR_FW_VERSION]) ++ return; ++ ++ str = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_FW_VERSION]); ++ if (rd->json_output) ++ jsonw_string_field(rd->jw, "fw", str); ++ else ++ pr_out("fw %s ", str); ++} ++ ++static void dev_print_node_guid(struct rd *rd, struct nlattr **tb) ++{ ++ uint64_t node_guid; ++ uint16_t vp[4]; ++ char str[32]; ++ ++ if (!tb[RDMA_NLDEV_ATTR_NODE_GUID]) ++ return; ++ ++ node_guid = mnl_attr_get_u64(tb[RDMA_NLDEV_ATTR_NODE_GUID]); ++ memcpy(vp, &node_guid, sizeof(uint64_t)); ++ snprintf(str, 32, "%04x:%04x:%04x:%04x", vp[3], vp[2], vp[1], vp[0]); ++ if (rd->json_output) ++ jsonw_string_field(rd->jw, "node_guid", str); ++ else ++ pr_out("node_guid %s ", str); ++} ++ ++static void dev_print_sys_image_guid(struct rd *rd, struct nlattr **tb) ++{ ++ uint64_t sys_image_guid; ++ uint16_t vp[4]; ++ char str[32]; ++ ++ if (!tb[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID]) ++ return; ++ ++ sys_image_guid = mnl_attr_get_u64(tb[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID]); ++ memcpy(vp, &sys_image_guid, sizeof(uint64_t)); ++ snprintf(str, 32, "%04x:%04x:%04x:%04x", vp[3], vp[2], vp[1], vp[0]); ++ if (rd->json_output) ++ jsonw_string_field(rd->jw, "sys_image_guid", str); ++ else ++ pr_out("sys_image_guid %s ", str); ++} ++ ++static const char *node_type_to_str(uint8_t node_type) ++{ ++ static const char * const node_type_str[] = { "unknown", "ca", ++ "switch", "router", ++ "rnic", "usnic", ++ "usnic_dp" }; ++ if (node_type < ARRAY_SIZE(node_type_str)) ++ return node_type_str[node_type]; ++ return "unknown"; ++} ++ ++static void dev_print_node_type(struct rd *rd, struct nlattr **tb) ++{ ++ const char *node_str; ++ uint8_t node_type; ++ ++ if (!tb[RDMA_NLDEV_ATTR_DEV_NODE_TYPE]) ++ return; ++ ++ node_type = mnl_attr_get_u8(tb[RDMA_NLDEV_ATTR_DEV_NODE_TYPE]); ++ node_str = node_type_to_str(node_type); ++ if (rd->json_output) ++ jsonw_string_field(rd->jw, "node_type", node_str); ++ else ++ pr_out("node_type %s ", node_str); ++} ++ ++static int dev_parse_cb(const struct nlmsghdr *nlh, void *data) ++{ ++ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {}; ++ struct rd *rd = data; ++ const char *name; ++ uint32_t idx; ++ ++ mnl_attr_parse(nlh, 0, rd_attr_cb, tb); ++ if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_DEV_NAME]) ++ return MNL_CB_ERROR; ++ ++ idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); ++ name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_DEV_NAME]); ++ if (rd->json_output) { ++ jsonw_uint_field(rd->jw, "ifindex", idx); ++ jsonw_string_field(rd->jw, "ifname", name); ++ } else { ++ pr_out("%u: %s: ", idx, name); ++ } ++ ++ dev_print_node_type(rd, tb); ++ dev_print_fw(rd, tb); ++ dev_print_node_guid(rd, tb); ++ dev_print_sys_image_guid(rd, tb); ++ if (rd->show_details) ++ dev_print_caps(rd, tb); ++ ++ if (!rd->json_output) ++ pr_out("\n"); ++ return MNL_CB_OK; ++} ++ ++static int dev_no_args(struct rd *rd) ++{ ++ uint32_t seq; ++ int ret; ++ ++ rd_prepare_msg(rd, RDMA_NLDEV_CMD_GET, ++ &seq, (NLM_F_REQUEST | NLM_F_ACK)); ++ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx); ++ ret = rd_send_msg(rd); ++ if (ret) ++ return ret; ++ ++ if (rd->json_output) ++ jsonw_start_object(rd->jw); ++ ret = rd_recv_msg(rd, dev_parse_cb, rd, seq); ++ if (rd->json_output) ++ jsonw_end_object(rd->jw); ++ return ret; ++} ++ ++static int dev_one_show(struct rd *rd) ++{ ++ const struct rd_cmd cmds[] = { ++ { NULL, dev_no_args}, ++ { 0 } ++ }; ++ ++ return rd_exec_cmd(rd, cmds, "parameter"); ++} ++ ++static int dev_set_name(struct rd *rd) ++{ ++ uint32_t seq; ++ ++ if (rd_no_arg(rd)) { ++ pr_err("Please provide device new name.\n"); ++ return -EINVAL; ++ } ++ ++ rd_prepare_msg(rd, RDMA_NLDEV_CMD_SET, ++ &seq, (NLM_F_REQUEST | NLM_F_ACK)); ++ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx); ++ mnl_attr_put_strz(rd->nlh, RDMA_NLDEV_ATTR_DEV_NAME, rd_argv(rd)); ++ ++ return rd_send_msg(rd); ++} ++ ++static int dev_one_set(struct rd *rd) ++{ ++ const struct rd_cmd cmds[] = { ++ { NULL, dev_help}, ++ { "name", dev_set_name}, ++ { 0 } ++ }; ++ ++ return rd_exec_cmd(rd, cmds, "parameter"); ++} ++ ++static int dev_show(struct rd *rd) ++{ ++ return rd_exec_dev(rd, dev_one_show); ++} ++ ++static int dev_set(struct rd *rd) ++{ ++ return rd_exec_require_dev(rd, dev_one_set); ++} ++ ++int cmd_dev(struct rd *rd) ++{ ++ const struct rd_cmd cmds[] = { ++ { NULL, dev_show }, ++ { "show", dev_show }, ++ { "list", dev_show }, ++ { "set", dev_set }, ++ { "help", dev_help }, ++ { 0 } ++ }; ++ ++ return rd_exec_cmd(rd, cmds, "dev command"); ++} +diff --git a/rdma/include/uapi/rdma/ib_user_sa.h b/rdma/include/uapi/rdma/ib_user_sa.h +new file mode 100644 +index 0000000000000..435155d6e1c6a +--- /dev/null ++++ b/rdma/include/uapi/rdma/ib_user_sa.h +@@ -0,0 +1,77 @@ ++/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ ++/* ++ * Copyright (c) 2005 Intel Corporation. All rights reserved. ++ * ++ * This software is available to you under a choice of one of two ++ * licenses. You may choose to be licensed under the terms of the GNU ++ * General Public License (GPL) Version 2, available from the file ++ * COPYING in the main directory of this source tree, or the ++ * OpenIB.org BSD license below: ++ * ++ * Redistribution and use in source and binary forms, with or ++ * without modification, are permitted provided that the following ++ * conditions are met: ++ * ++ * - Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * - Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials ++ * provided with the distribution. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS ++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN ++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#ifndef IB_USER_SA_H ++#define IB_USER_SA_H ++ ++#include ++ ++enum { ++ IB_PATH_GMP = 1, ++ IB_PATH_PRIMARY = (1<<1), ++ IB_PATH_ALTERNATE = (1<<2), ++ IB_PATH_OUTBOUND = (1<<3), ++ IB_PATH_INBOUND = (1<<4), ++ IB_PATH_INBOUND_REVERSE = (1<<5), ++ IB_PATH_BIDIRECTIONAL = IB_PATH_OUTBOUND | IB_PATH_INBOUND_REVERSE ++}; ++ ++struct ib_path_rec_data { ++ __u32 flags; ++ __u32 reserved; ++ __u32 path_rec[16]; ++}; ++ ++struct ib_user_path_rec { ++ __u8 dgid[16]; ++ __u8 sgid[16]; ++ __be16 dlid; ++ __be16 slid; ++ __u32 raw_traffic; ++ __be32 flow_label; ++ __u32 reversible; ++ __u32 mtu; ++ __be16 pkey; ++ __u8 hop_limit; ++ __u8 traffic_class; ++ __u8 numb_path; ++ __u8 sl; ++ __u8 mtu_selector; ++ __u8 rate_selector; ++ __u8 rate; ++ __u8 packet_life_time_selector; ++ __u8 packet_life_time; ++ __u8 preference; ++}; ++ ++#endif /* IB_USER_SA_H */ +diff --git a/rdma/include/uapi/rdma/ib_user_verbs.h b/rdma/include/uapi/rdma/ib_user_verbs.h +new file mode 100644 +index 0000000000000..480d9a60b68e4 +--- /dev/null ++++ b/rdma/include/uapi/rdma/ib_user_verbs.h +@@ -0,0 +1,1302 @@ ++/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ ++/* ++ * Copyright (c) 2005 Topspin Communications. All rights reserved. ++ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. ++ * Copyright (c) 2005 PathScale, Inc. All rights reserved. ++ * Copyright (c) 2006 Mellanox Technologies. All rights reserved. ++ * ++ * This software is available to you under a choice of one of two ++ * licenses. You may choose to be licensed under the terms of the GNU ++ * General Public License (GPL) Version 2, available from the file ++ * COPYING in the main directory of this source tree, or the ++ * OpenIB.org BSD license below: ++ * ++ * Redistribution and use in source and binary forms, with or ++ * without modification, are permitted provided that the following ++ * conditions are met: ++ * ++ * - Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * - Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials ++ * provided with the distribution. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS ++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN ++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#ifndef IB_USER_VERBS_H ++#define IB_USER_VERBS_H ++ ++#include ++ ++/* ++ * Increment this value if any changes that break userspace ABI ++ * compatibility are made. ++ */ ++#define IB_USER_VERBS_ABI_VERSION 6 ++#define IB_USER_VERBS_CMD_THRESHOLD 50 ++ ++enum ib_uverbs_write_cmds { ++ IB_USER_VERBS_CMD_GET_CONTEXT, ++ IB_USER_VERBS_CMD_QUERY_DEVICE, ++ IB_USER_VERBS_CMD_QUERY_PORT, ++ IB_USER_VERBS_CMD_ALLOC_PD, ++ IB_USER_VERBS_CMD_DEALLOC_PD, ++ IB_USER_VERBS_CMD_CREATE_AH, ++ IB_USER_VERBS_CMD_MODIFY_AH, ++ IB_USER_VERBS_CMD_QUERY_AH, ++ IB_USER_VERBS_CMD_DESTROY_AH, ++ IB_USER_VERBS_CMD_REG_MR, ++ IB_USER_VERBS_CMD_REG_SMR, ++ IB_USER_VERBS_CMD_REREG_MR, ++ IB_USER_VERBS_CMD_QUERY_MR, ++ IB_USER_VERBS_CMD_DEREG_MR, ++ IB_USER_VERBS_CMD_ALLOC_MW, ++ IB_USER_VERBS_CMD_BIND_MW, ++ IB_USER_VERBS_CMD_DEALLOC_MW, ++ IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL, ++ IB_USER_VERBS_CMD_CREATE_CQ, ++ IB_USER_VERBS_CMD_RESIZE_CQ, ++ IB_USER_VERBS_CMD_DESTROY_CQ, ++ IB_USER_VERBS_CMD_POLL_CQ, ++ IB_USER_VERBS_CMD_PEEK_CQ, ++ IB_USER_VERBS_CMD_REQ_NOTIFY_CQ, ++ IB_USER_VERBS_CMD_CREATE_QP, ++ IB_USER_VERBS_CMD_QUERY_QP, ++ IB_USER_VERBS_CMD_MODIFY_QP, ++ IB_USER_VERBS_CMD_DESTROY_QP, ++ IB_USER_VERBS_CMD_POST_SEND, ++ IB_USER_VERBS_CMD_POST_RECV, ++ IB_USER_VERBS_CMD_ATTACH_MCAST, ++ IB_USER_VERBS_CMD_DETACH_MCAST, ++ IB_USER_VERBS_CMD_CREATE_SRQ, ++ IB_USER_VERBS_CMD_MODIFY_SRQ, ++ IB_USER_VERBS_CMD_QUERY_SRQ, ++ IB_USER_VERBS_CMD_DESTROY_SRQ, ++ IB_USER_VERBS_CMD_POST_SRQ_RECV, ++ IB_USER_VERBS_CMD_OPEN_XRCD, ++ IB_USER_VERBS_CMD_CLOSE_XRCD, ++ IB_USER_VERBS_CMD_CREATE_XSRQ, ++ IB_USER_VERBS_CMD_OPEN_QP, ++}; ++ ++enum { ++ IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE, ++ IB_USER_VERBS_EX_CMD_CREATE_CQ = IB_USER_VERBS_CMD_CREATE_CQ, ++ IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP, ++ IB_USER_VERBS_EX_CMD_MODIFY_QP = IB_USER_VERBS_CMD_MODIFY_QP, ++ IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, ++ IB_USER_VERBS_EX_CMD_DESTROY_FLOW, ++ IB_USER_VERBS_EX_CMD_CREATE_WQ, ++ IB_USER_VERBS_EX_CMD_MODIFY_WQ, ++ IB_USER_VERBS_EX_CMD_DESTROY_WQ, ++ IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL, ++ IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL, ++ IB_USER_VERBS_EX_CMD_MODIFY_CQ ++}; ++ ++/* ++ * Make sure that all structs defined in this file remain laid out so ++ * that they pack the same way on 32-bit and 64-bit architectures (to ++ * avoid incompatibility between 32-bit userspace and 64-bit kernels). ++ * Specifically: ++ * - Do not use pointer types -- pass pointers in __u64 instead. ++ * - Make sure that any structure larger than 4 bytes is padded to a ++ * multiple of 8 bytes. Otherwise the structure size will be ++ * different between 32-bit and 64-bit architectures. ++ */ ++ ++struct ib_uverbs_async_event_desc { ++ __aligned_u64 element; ++ __u32 event_type; /* enum ib_event_type */ ++ __u32 reserved; ++}; ++ ++struct ib_uverbs_comp_event_desc { ++ __aligned_u64 cq_handle; ++}; ++ ++struct ib_uverbs_cq_moderation_caps { ++ __u16 max_cq_moderation_count; ++ __u16 max_cq_moderation_period; ++ __u32 reserved; ++}; ++ ++/* ++ * All commands from userspace should start with a __u32 command field ++ * followed by __u16 in_words and out_words fields (which give the ++ * length of the command block and response buffer if any in 32-bit ++ * words). The kernel driver will read these fields first and read ++ * the rest of the command struct based on these value. ++ */ ++ ++#define IB_USER_VERBS_CMD_COMMAND_MASK 0xff ++#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80000000u ++ ++struct ib_uverbs_cmd_hdr { ++ __u32 command; ++ __u16 in_words; ++ __u16 out_words; ++}; ++ ++struct ib_uverbs_ex_cmd_hdr { ++ __aligned_u64 response; ++ __u16 provider_in_words; ++ __u16 provider_out_words; ++ __u32 cmd_hdr_reserved; ++}; ++ ++struct ib_uverbs_get_context { ++ __aligned_u64 response; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_get_context_resp { ++ __u32 async_fd; ++ __u32 num_comp_vectors; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_query_device { ++ __aligned_u64 response; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_query_device_resp { ++ __aligned_u64 fw_ver; ++ __be64 node_guid; ++ __be64 sys_image_guid; ++ __aligned_u64 max_mr_size; ++ __aligned_u64 page_size_cap; ++ __u32 vendor_id; ++ __u32 vendor_part_id; ++ __u32 hw_ver; ++ __u32 max_qp; ++ __u32 max_qp_wr; ++ __u32 device_cap_flags; ++ __u32 max_sge; ++ __u32 max_sge_rd; ++ __u32 max_cq; ++ __u32 max_cqe; ++ __u32 max_mr; ++ __u32 max_pd; ++ __u32 max_qp_rd_atom; ++ __u32 max_ee_rd_atom; ++ __u32 max_res_rd_atom; ++ __u32 max_qp_init_rd_atom; ++ __u32 max_ee_init_rd_atom; ++ __u32 atomic_cap; ++ __u32 max_ee; ++ __u32 max_rdd; ++ __u32 max_mw; ++ __u32 max_raw_ipv6_qp; ++ __u32 max_raw_ethy_qp; ++ __u32 max_mcast_grp; ++ __u32 max_mcast_qp_attach; ++ __u32 max_total_mcast_qp_attach; ++ __u32 max_ah; ++ __u32 max_fmr; ++ __u32 max_map_per_fmr; ++ __u32 max_srq; ++ __u32 max_srq_wr; ++ __u32 max_srq_sge; ++ __u16 max_pkeys; ++ __u8 local_ca_ack_delay; ++ __u8 phys_port_cnt; ++ __u8 reserved[4]; ++}; ++ ++struct ib_uverbs_ex_query_device { ++ __u32 comp_mask; ++ __u32 reserved; ++}; ++ ++struct ib_uverbs_odp_caps { ++ __aligned_u64 general_caps; ++ struct { ++ __u32 rc_odp_caps; ++ __u32 uc_odp_caps; ++ __u32 ud_odp_caps; ++ } per_transport_caps; ++ __u32 reserved; ++}; ++ ++struct ib_uverbs_rss_caps { ++ /* Corresponding bit will be set if qp type from ++ * 'enum ib_qp_type' is supported, e.g. ++ * supported_qpts |= 1 << IB_QPT_UD ++ */ ++ __u32 supported_qpts; ++ __u32 max_rwq_indirection_tables; ++ __u32 max_rwq_indirection_table_size; ++ __u32 reserved; ++}; ++ ++struct ib_uverbs_tm_caps { ++ /* Max size of rendezvous request message */ ++ __u32 max_rndv_hdr_size; ++ /* Max number of entries in tag matching list */ ++ __u32 max_num_tags; ++ /* TM flags */ ++ __u32 flags; ++ /* Max number of outstanding list operations */ ++ __u32 max_ops; ++ /* Max number of SGE in tag matching entry */ ++ __u32 max_sge; ++ __u32 reserved; ++}; ++ ++struct ib_uverbs_ex_query_device_resp { ++ struct ib_uverbs_query_device_resp base; ++ __u32 comp_mask; ++ __u32 response_length; ++ struct ib_uverbs_odp_caps odp_caps; ++ __aligned_u64 timestamp_mask; ++ __aligned_u64 hca_core_clock; /* in KHZ */ ++ __aligned_u64 device_cap_flags_ex; ++ struct ib_uverbs_rss_caps rss_caps; ++ __u32 max_wq_type_rq; ++ __u32 raw_packet_caps; ++ struct ib_uverbs_tm_caps tm_caps; ++ struct ib_uverbs_cq_moderation_caps cq_moderation_caps; ++ __aligned_u64 max_dm_size; ++}; ++ ++struct ib_uverbs_query_port { ++ __aligned_u64 response; ++ __u8 port_num; ++ __u8 reserved[7]; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_query_port_resp { ++ __u32 port_cap_flags; /* see ib_uverbs_query_port_cap_flags */ ++ __u32 max_msg_sz; ++ __u32 bad_pkey_cntr; ++ __u32 qkey_viol_cntr; ++ __u32 gid_tbl_len; ++ __u16 pkey_tbl_len; ++ __u16 lid; ++ __u16 sm_lid; ++ __u8 state; ++ __u8 max_mtu; ++ __u8 active_mtu; ++ __u8 lmc; ++ __u8 max_vl_num; ++ __u8 sm_sl; ++ __u8 subnet_timeout; ++ __u8 init_type_reply; ++ __u8 active_width; ++ __u8 active_speed; ++ __u8 phys_state; ++ __u8 link_layer; ++ __u8 flags; /* see ib_uverbs_query_port_flags */ ++ __u8 reserved; ++}; ++ ++struct ib_uverbs_alloc_pd { ++ __aligned_u64 response; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_alloc_pd_resp { ++ __u32 pd_handle; ++ __u32 driver_data[0]; ++}; ++ ++struct ib_uverbs_dealloc_pd { ++ __u32 pd_handle; ++}; ++ ++struct ib_uverbs_open_xrcd { ++ __aligned_u64 response; ++ __u32 fd; ++ __u32 oflags; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_open_xrcd_resp { ++ __u32 xrcd_handle; ++ __u32 driver_data[0]; ++}; ++ ++struct ib_uverbs_close_xrcd { ++ __u32 xrcd_handle; ++}; ++ ++struct ib_uverbs_reg_mr { ++ __aligned_u64 response; ++ __aligned_u64 start; ++ __aligned_u64 length; ++ __aligned_u64 hca_va; ++ __u32 pd_handle; ++ __u32 access_flags; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_reg_mr_resp { ++ __u32 mr_handle; ++ __u32 lkey; ++ __u32 rkey; ++ __u32 driver_data[0]; ++}; ++ ++struct ib_uverbs_rereg_mr { ++ __aligned_u64 response; ++ __u32 mr_handle; ++ __u32 flags; ++ __aligned_u64 start; ++ __aligned_u64 length; ++ __aligned_u64 hca_va; ++ __u32 pd_handle; ++ __u32 access_flags; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_rereg_mr_resp { ++ __u32 lkey; ++ __u32 rkey; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_dereg_mr { ++ __u32 mr_handle; ++}; ++ ++struct ib_uverbs_alloc_mw { ++ __aligned_u64 response; ++ __u32 pd_handle; ++ __u8 mw_type; ++ __u8 reserved[3]; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_alloc_mw_resp { ++ __u32 mw_handle; ++ __u32 rkey; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_dealloc_mw { ++ __u32 mw_handle; ++}; ++ ++struct ib_uverbs_create_comp_channel { ++ __aligned_u64 response; ++}; ++ ++struct ib_uverbs_create_comp_channel_resp { ++ __u32 fd; ++}; ++ ++struct ib_uverbs_create_cq { ++ __aligned_u64 response; ++ __aligned_u64 user_handle; ++ __u32 cqe; ++ __u32 comp_vector; ++ __s32 comp_channel; ++ __u32 reserved; ++ __aligned_u64 driver_data[0]; ++}; ++ ++enum ib_uverbs_ex_create_cq_flags { ++ IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0, ++ IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1, ++}; ++ ++struct ib_uverbs_ex_create_cq { ++ __aligned_u64 user_handle; ++ __u32 cqe; ++ __u32 comp_vector; ++ __s32 comp_channel; ++ __u32 comp_mask; ++ __u32 flags; /* bitmask of ib_uverbs_ex_create_cq_flags */ ++ __u32 reserved; ++}; ++ ++struct ib_uverbs_create_cq_resp { ++ __u32 cq_handle; ++ __u32 cqe; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_ex_create_cq_resp { ++ struct ib_uverbs_create_cq_resp base; ++ __u32 comp_mask; ++ __u32 response_length; ++}; ++ ++struct ib_uverbs_resize_cq { ++ __aligned_u64 response; ++ __u32 cq_handle; ++ __u32 cqe; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_resize_cq_resp { ++ __u32 cqe; ++ __u32 reserved; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_poll_cq { ++ __aligned_u64 response; ++ __u32 cq_handle; ++ __u32 ne; ++}; ++ ++struct ib_uverbs_wc { ++ __aligned_u64 wr_id; ++ __u32 status; ++ __u32 opcode; ++ __u32 vendor_err; ++ __u32 byte_len; ++ union { ++ __be32 imm_data; ++ __u32 invalidate_rkey; ++ } ex; ++ __u32 qp_num; ++ __u32 src_qp; ++ __u32 wc_flags; ++ __u16 pkey_index; ++ __u16 slid; ++ __u8 sl; ++ __u8 dlid_path_bits; ++ __u8 port_num; ++ __u8 reserved; ++}; ++ ++struct ib_uverbs_poll_cq_resp { ++ __u32 count; ++ __u32 reserved; ++ struct ib_uverbs_wc wc[0]; ++}; ++ ++struct ib_uverbs_req_notify_cq { ++ __u32 cq_handle; ++ __u32 solicited_only; ++}; ++ ++struct ib_uverbs_destroy_cq { ++ __aligned_u64 response; ++ __u32 cq_handle; ++ __u32 reserved; ++}; ++ ++struct ib_uverbs_destroy_cq_resp { ++ __u32 comp_events_reported; ++ __u32 async_events_reported; ++}; ++ ++struct ib_uverbs_global_route { ++ __u8 dgid[16]; ++ __u32 flow_label; ++ __u8 sgid_index; ++ __u8 hop_limit; ++ __u8 traffic_class; ++ __u8 reserved; ++}; ++ ++struct ib_uverbs_ah_attr { ++ struct ib_uverbs_global_route grh; ++ __u16 dlid; ++ __u8 sl; ++ __u8 src_path_bits; ++ __u8 static_rate; ++ __u8 is_global; ++ __u8 port_num; ++ __u8 reserved; ++}; ++ ++struct ib_uverbs_qp_attr { ++ __u32 qp_attr_mask; ++ __u32 qp_state; ++ __u32 cur_qp_state; ++ __u32 path_mtu; ++ __u32 path_mig_state; ++ __u32 qkey; ++ __u32 rq_psn; ++ __u32 sq_psn; ++ __u32 dest_qp_num; ++ __u32 qp_access_flags; ++ ++ struct ib_uverbs_ah_attr ah_attr; ++ struct ib_uverbs_ah_attr alt_ah_attr; ++ ++ /* ib_qp_cap */ ++ __u32 max_send_wr; ++ __u32 max_recv_wr; ++ __u32 max_send_sge; ++ __u32 max_recv_sge; ++ __u32 max_inline_data; ++ ++ __u16 pkey_index; ++ __u16 alt_pkey_index; ++ __u8 en_sqd_async_notify; ++ __u8 sq_draining; ++ __u8 max_rd_atomic; ++ __u8 max_dest_rd_atomic; ++ __u8 min_rnr_timer; ++ __u8 port_num; ++ __u8 timeout; ++ __u8 retry_cnt; ++ __u8 rnr_retry; ++ __u8 alt_port_num; ++ __u8 alt_timeout; ++ __u8 reserved[5]; ++}; ++ ++struct ib_uverbs_create_qp { ++ __aligned_u64 response; ++ __aligned_u64 user_handle; ++ __u32 pd_handle; ++ __u32 send_cq_handle; ++ __u32 recv_cq_handle; ++ __u32 srq_handle; ++ __u32 max_send_wr; ++ __u32 max_recv_wr; ++ __u32 max_send_sge; ++ __u32 max_recv_sge; ++ __u32 max_inline_data; ++ __u8 sq_sig_all; ++ __u8 qp_type; ++ __u8 is_srq; ++ __u8 reserved; ++ __aligned_u64 driver_data[0]; ++}; ++ ++enum ib_uverbs_create_qp_mask { ++ IB_UVERBS_CREATE_QP_MASK_IND_TABLE = 1UL << 0, ++}; ++ ++enum { ++ IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE, ++}; ++ ++enum { ++ /* ++ * This value is equal to IB_QP_DEST_QPN. ++ */ ++ IB_USER_LEGACY_LAST_QP_ATTR_MASK = 1ULL << 20, ++}; ++ ++enum { ++ /* ++ * This value is equal to IB_QP_RATE_LIMIT. ++ */ ++ IB_USER_LAST_QP_ATTR_MASK = 1ULL << 25, ++}; ++ ++struct ib_uverbs_ex_create_qp { ++ __aligned_u64 user_handle; ++ __u32 pd_handle; ++ __u32 send_cq_handle; ++ __u32 recv_cq_handle; ++ __u32 srq_handle; ++ __u32 max_send_wr; ++ __u32 max_recv_wr; ++ __u32 max_send_sge; ++ __u32 max_recv_sge; ++ __u32 max_inline_data; ++ __u8 sq_sig_all; ++ __u8 qp_type; ++ __u8 is_srq; ++ __u8 reserved; ++ __u32 comp_mask; ++ __u32 create_flags; ++ __u32 rwq_ind_tbl_handle; ++ __u32 source_qpn; ++}; ++ ++struct ib_uverbs_open_qp { ++ __aligned_u64 response; ++ __aligned_u64 user_handle; ++ __u32 pd_handle; ++ __u32 qpn; ++ __u8 qp_type; ++ __u8 reserved[7]; ++ __aligned_u64 driver_data[0]; ++}; ++ ++/* also used for open response */ ++struct ib_uverbs_create_qp_resp { ++ __u32 qp_handle; ++ __u32 qpn; ++ __u32 max_send_wr; ++ __u32 max_recv_wr; ++ __u32 max_send_sge; ++ __u32 max_recv_sge; ++ __u32 max_inline_data; ++ __u32 reserved; ++ __u32 driver_data[0]; ++}; ++ ++struct ib_uverbs_ex_create_qp_resp { ++ struct ib_uverbs_create_qp_resp base; ++ __u32 comp_mask; ++ __u32 response_length; ++}; ++ ++/* ++ * This struct needs to remain a multiple of 8 bytes to keep the ++ * alignment of the modify QP parameters. ++ */ ++struct ib_uverbs_qp_dest { ++ __u8 dgid[16]; ++ __u32 flow_label; ++ __u16 dlid; ++ __u16 reserved; ++ __u8 sgid_index; ++ __u8 hop_limit; ++ __u8 traffic_class; ++ __u8 sl; ++ __u8 src_path_bits; ++ __u8 static_rate; ++ __u8 is_global; ++ __u8 port_num; ++}; ++ ++struct ib_uverbs_query_qp { ++ __aligned_u64 response; ++ __u32 qp_handle; ++ __u32 attr_mask; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_query_qp_resp { ++ struct ib_uverbs_qp_dest dest; ++ struct ib_uverbs_qp_dest alt_dest; ++ __u32 max_send_wr; ++ __u32 max_recv_wr; ++ __u32 max_send_sge; ++ __u32 max_recv_sge; ++ __u32 max_inline_data; ++ __u32 qkey; ++ __u32 rq_psn; ++ __u32 sq_psn; ++ __u32 dest_qp_num; ++ __u32 qp_access_flags; ++ __u16 pkey_index; ++ __u16 alt_pkey_index; ++ __u8 qp_state; ++ __u8 cur_qp_state; ++ __u8 path_mtu; ++ __u8 path_mig_state; ++ __u8 sq_draining; ++ __u8 max_rd_atomic; ++ __u8 max_dest_rd_atomic; ++ __u8 min_rnr_timer; ++ __u8 port_num; ++ __u8 timeout; ++ __u8 retry_cnt; ++ __u8 rnr_retry; ++ __u8 alt_port_num; ++ __u8 alt_timeout; ++ __u8 sq_sig_all; ++ __u8 reserved[5]; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_modify_qp { ++ struct ib_uverbs_qp_dest dest; ++ struct ib_uverbs_qp_dest alt_dest; ++ __u32 qp_handle; ++ __u32 attr_mask; ++ __u32 qkey; ++ __u32 rq_psn; ++ __u32 sq_psn; ++ __u32 dest_qp_num; ++ __u32 qp_access_flags; ++ __u16 pkey_index; ++ __u16 alt_pkey_index; ++ __u8 qp_state; ++ __u8 cur_qp_state; ++ __u8 path_mtu; ++ __u8 path_mig_state; ++ __u8 en_sqd_async_notify; ++ __u8 max_rd_atomic; ++ __u8 max_dest_rd_atomic; ++ __u8 min_rnr_timer; ++ __u8 port_num; ++ __u8 timeout; ++ __u8 retry_cnt; ++ __u8 rnr_retry; ++ __u8 alt_port_num; ++ __u8 alt_timeout; ++ __u8 reserved[2]; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_ex_modify_qp { ++ struct ib_uverbs_modify_qp base; ++ __u32 rate_limit; ++ __u32 reserved; ++}; ++ ++struct ib_uverbs_ex_modify_qp_resp { ++ __u32 comp_mask; ++ __u32 response_length; ++}; ++ ++struct ib_uverbs_destroy_qp { ++ __aligned_u64 response; ++ __u32 qp_handle; ++ __u32 reserved; ++}; ++ ++struct ib_uverbs_destroy_qp_resp { ++ __u32 events_reported; ++}; ++ ++/* ++ * The ib_uverbs_sge structure isn't used anywhere, since we assume ++ * the ib_sge structure is packed the same way on 32-bit and 64-bit ++ * architectures in both kernel and user space. It's just here to ++ * document the ABI. ++ */ ++struct ib_uverbs_sge { ++ __aligned_u64 addr; ++ __u32 length; ++ __u32 lkey; ++}; ++ ++enum ib_uverbs_wr_opcode { ++ IB_UVERBS_WR_RDMA_WRITE = 0, ++ IB_UVERBS_WR_RDMA_WRITE_WITH_IMM = 1, ++ IB_UVERBS_WR_SEND = 2, ++ IB_UVERBS_WR_SEND_WITH_IMM = 3, ++ IB_UVERBS_WR_RDMA_READ = 4, ++ IB_UVERBS_WR_ATOMIC_CMP_AND_SWP = 5, ++ IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD = 6, ++ IB_UVERBS_WR_LOCAL_INV = 7, ++ IB_UVERBS_WR_BIND_MW = 8, ++ IB_UVERBS_WR_SEND_WITH_INV = 9, ++ IB_UVERBS_WR_TSO = 10, ++ IB_UVERBS_WR_RDMA_READ_WITH_INV = 11, ++ IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP = 12, ++ IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD = 13, ++ /* Review enum ib_wr_opcode before modifying this */ ++}; ++ ++struct ib_uverbs_send_wr { ++ __aligned_u64 wr_id; ++ __u32 num_sge; ++ __u32 opcode; /* see enum ib_uverbs_wr_opcode */ ++ __u32 send_flags; ++ union { ++ __be32 imm_data; ++ __u32 invalidate_rkey; ++ } ex; ++ union { ++ struct { ++ __aligned_u64 remote_addr; ++ __u32 rkey; ++ __u32 reserved; ++ } rdma; ++ struct { ++ __aligned_u64 remote_addr; ++ __aligned_u64 compare_add; ++ __aligned_u64 swap; ++ __u32 rkey; ++ __u32 reserved; ++ } atomic; ++ struct { ++ __u32 ah; ++ __u32 remote_qpn; ++ __u32 remote_qkey; ++ __u32 reserved; ++ } ud; ++ } wr; ++}; ++ ++struct ib_uverbs_post_send { ++ __aligned_u64 response; ++ __u32 qp_handle; ++ __u32 wr_count; ++ __u32 sge_count; ++ __u32 wqe_size; ++ struct ib_uverbs_send_wr send_wr[0]; ++}; ++ ++struct ib_uverbs_post_send_resp { ++ __u32 bad_wr; ++}; ++ ++struct ib_uverbs_recv_wr { ++ __aligned_u64 wr_id; ++ __u32 num_sge; ++ __u32 reserved; ++}; ++ ++struct ib_uverbs_post_recv { ++ __aligned_u64 response; ++ __u32 qp_handle; ++ __u32 wr_count; ++ __u32 sge_count; ++ __u32 wqe_size; ++ struct ib_uverbs_recv_wr recv_wr[0]; ++}; ++ ++struct ib_uverbs_post_recv_resp { ++ __u32 bad_wr; ++}; ++ ++struct ib_uverbs_post_srq_recv { ++ __aligned_u64 response; ++ __u32 srq_handle; ++ __u32 wr_count; ++ __u32 sge_count; ++ __u32 wqe_size; ++ struct ib_uverbs_recv_wr recv[0]; ++}; ++ ++struct ib_uverbs_post_srq_recv_resp { ++ __u32 bad_wr; ++}; ++ ++struct ib_uverbs_create_ah { ++ __aligned_u64 response; ++ __aligned_u64 user_handle; ++ __u32 pd_handle; ++ __u32 reserved; ++ struct ib_uverbs_ah_attr attr; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_create_ah_resp { ++ __u32 ah_handle; ++ __u32 driver_data[0]; ++}; ++ ++struct ib_uverbs_destroy_ah { ++ __u32 ah_handle; ++}; ++ ++struct ib_uverbs_attach_mcast { ++ __u8 gid[16]; ++ __u32 qp_handle; ++ __u16 mlid; ++ __u16 reserved; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_detach_mcast { ++ __u8 gid[16]; ++ __u32 qp_handle; ++ __u16 mlid; ++ __u16 reserved; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_flow_spec_hdr { ++ __u32 type; ++ __u16 size; ++ __u16 reserved; ++ /* followed by flow_spec */ ++ __aligned_u64 flow_spec_data[0]; ++}; ++ ++struct ib_uverbs_flow_eth_filter { ++ __u8 dst_mac[6]; ++ __u8 src_mac[6]; ++ __be16 ether_type; ++ __be16 vlan_tag; ++}; ++ ++struct ib_uverbs_flow_spec_eth { ++ union { ++ struct ib_uverbs_flow_spec_hdr hdr; ++ struct { ++ __u32 type; ++ __u16 size; ++ __u16 reserved; ++ }; ++ }; ++ struct ib_uverbs_flow_eth_filter val; ++ struct ib_uverbs_flow_eth_filter mask; ++}; ++ ++struct ib_uverbs_flow_ipv4_filter { ++ __be32 src_ip; ++ __be32 dst_ip; ++ __u8 proto; ++ __u8 tos; ++ __u8 ttl; ++ __u8 flags; ++}; ++ ++struct ib_uverbs_flow_spec_ipv4 { ++ union { ++ struct ib_uverbs_flow_spec_hdr hdr; ++ struct { ++ __u32 type; ++ __u16 size; ++ __u16 reserved; ++ }; ++ }; ++ struct ib_uverbs_flow_ipv4_filter val; ++ struct ib_uverbs_flow_ipv4_filter mask; ++}; ++ ++struct ib_uverbs_flow_tcp_udp_filter { ++ __be16 dst_port; ++ __be16 src_port; ++}; ++ ++struct ib_uverbs_flow_spec_tcp_udp { ++ union { ++ struct ib_uverbs_flow_spec_hdr hdr; ++ struct { ++ __u32 type; ++ __u16 size; ++ __u16 reserved; ++ }; ++ }; ++ struct ib_uverbs_flow_tcp_udp_filter val; ++ struct ib_uverbs_flow_tcp_udp_filter mask; ++}; ++ ++struct ib_uverbs_flow_ipv6_filter { ++ __u8 src_ip[16]; ++ __u8 dst_ip[16]; ++ __be32 flow_label; ++ __u8 next_hdr; ++ __u8 traffic_class; ++ __u8 hop_limit; ++ __u8 reserved; ++}; ++ ++struct ib_uverbs_flow_spec_ipv6 { ++ union { ++ struct ib_uverbs_flow_spec_hdr hdr; ++ struct { ++ __u32 type; ++ __u16 size; ++ __u16 reserved; ++ }; ++ }; ++ struct ib_uverbs_flow_ipv6_filter val; ++ struct ib_uverbs_flow_ipv6_filter mask; ++}; ++ ++struct ib_uverbs_flow_spec_action_tag { ++ union { ++ struct ib_uverbs_flow_spec_hdr hdr; ++ struct { ++ __u32 type; ++ __u16 size; ++ __u16 reserved; ++ }; ++ }; ++ __u32 tag_id; ++ __u32 reserved1; ++}; ++ ++struct ib_uverbs_flow_spec_action_drop { ++ union { ++ struct ib_uverbs_flow_spec_hdr hdr; ++ struct { ++ __u32 type; ++ __u16 size; ++ __u16 reserved; ++ }; ++ }; ++}; ++ ++struct ib_uverbs_flow_spec_action_handle { ++ union { ++ struct ib_uverbs_flow_spec_hdr hdr; ++ struct { ++ __u32 type; ++ __u16 size; ++ __u16 reserved; ++ }; ++ }; ++ __u32 handle; ++ __u32 reserved1; ++}; ++ ++struct ib_uverbs_flow_spec_action_count { ++ union { ++ struct ib_uverbs_flow_spec_hdr hdr; ++ struct { ++ __u32 type; ++ __u16 size; ++ __u16 reserved; ++ }; ++ }; ++ __u32 handle; ++ __u32 reserved1; ++}; ++ ++struct ib_uverbs_flow_tunnel_filter { ++ __be32 tunnel_id; ++}; ++ ++struct ib_uverbs_flow_spec_tunnel { ++ union { ++ struct ib_uverbs_flow_spec_hdr hdr; ++ struct { ++ __u32 type; ++ __u16 size; ++ __u16 reserved; ++ }; ++ }; ++ struct ib_uverbs_flow_tunnel_filter val; ++ struct ib_uverbs_flow_tunnel_filter mask; ++}; ++ ++struct ib_uverbs_flow_spec_esp_filter { ++ __u32 spi; ++ __u32 seq; ++}; ++ ++struct ib_uverbs_flow_spec_esp { ++ union { ++ struct ib_uverbs_flow_spec_hdr hdr; ++ struct { ++ __u32 type; ++ __u16 size; ++ __u16 reserved; ++ }; ++ }; ++ struct ib_uverbs_flow_spec_esp_filter val; ++ struct ib_uverbs_flow_spec_esp_filter mask; ++}; ++ ++struct ib_uverbs_flow_gre_filter { ++ /* c_ks_res0_ver field is bits 0-15 in offset 0 of a standard GRE header: ++ * bit 0 - C - checksum bit. ++ * bit 1 - reserved. set to 0. ++ * bit 2 - key bit. ++ * bit 3 - sequence number bit. ++ * bits 4:12 - reserved. set to 0. ++ * bits 13:15 - GRE version. ++ */ ++ __be16 c_ks_res0_ver; ++ __be16 protocol; ++ __be32 key; ++}; ++ ++struct ib_uverbs_flow_spec_gre { ++ union { ++ struct ib_uverbs_flow_spec_hdr hdr; ++ struct { ++ __u32 type; ++ __u16 size; ++ __u16 reserved; ++ }; ++ }; ++ struct ib_uverbs_flow_gre_filter val; ++ struct ib_uverbs_flow_gre_filter mask; ++}; ++ ++struct ib_uverbs_flow_mpls_filter { ++ /* The field includes the entire MPLS label: ++ * bits 0:19 - label field. ++ * bits 20:22 - traffic class field. ++ * bits 23 - bottom of stack bit. ++ * bits 24:31 - ttl field. ++ */ ++ __be32 label; ++}; ++ ++struct ib_uverbs_flow_spec_mpls { ++ union { ++ struct ib_uverbs_flow_spec_hdr hdr; ++ struct { ++ __u32 type; ++ __u16 size; ++ __u16 reserved; ++ }; ++ }; ++ struct ib_uverbs_flow_mpls_filter val; ++ struct ib_uverbs_flow_mpls_filter mask; ++}; ++ ++struct ib_uverbs_flow_attr { ++ __u32 type; ++ __u16 size; ++ __u16 priority; ++ __u8 num_of_specs; ++ __u8 reserved[2]; ++ __u8 port; ++ __u32 flags; ++ /* Following are the optional layers according to user request ++ * struct ib_flow_spec_xxx ++ * struct ib_flow_spec_yyy ++ */ ++ struct ib_uverbs_flow_spec_hdr flow_specs[0]; ++}; ++ ++struct ib_uverbs_create_flow { ++ __u32 comp_mask; ++ __u32 qp_handle; ++ struct ib_uverbs_flow_attr flow_attr; ++}; ++ ++struct ib_uverbs_create_flow_resp { ++ __u32 comp_mask; ++ __u32 flow_handle; ++}; ++ ++struct ib_uverbs_destroy_flow { ++ __u32 comp_mask; ++ __u32 flow_handle; ++}; ++ ++struct ib_uverbs_create_srq { ++ __aligned_u64 response; ++ __aligned_u64 user_handle; ++ __u32 pd_handle; ++ __u32 max_wr; ++ __u32 max_sge; ++ __u32 srq_limit; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_create_xsrq { ++ __aligned_u64 response; ++ __aligned_u64 user_handle; ++ __u32 srq_type; ++ __u32 pd_handle; ++ __u32 max_wr; ++ __u32 max_sge; ++ __u32 srq_limit; ++ __u32 max_num_tags; ++ __u32 xrcd_handle; ++ __u32 cq_handle; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_create_srq_resp { ++ __u32 srq_handle; ++ __u32 max_wr; ++ __u32 max_sge; ++ __u32 srqn; ++ __u32 driver_data[0]; ++}; ++ ++struct ib_uverbs_modify_srq { ++ __u32 srq_handle; ++ __u32 attr_mask; ++ __u32 max_wr; ++ __u32 srq_limit; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_query_srq { ++ __aligned_u64 response; ++ __u32 srq_handle; ++ __u32 reserved; ++ __aligned_u64 driver_data[0]; ++}; ++ ++struct ib_uverbs_query_srq_resp { ++ __u32 max_wr; ++ __u32 max_sge; ++ __u32 srq_limit; ++ __u32 reserved; ++}; ++ ++struct ib_uverbs_destroy_srq { ++ __aligned_u64 response; ++ __u32 srq_handle; ++ __u32 reserved; ++}; ++ ++struct ib_uverbs_destroy_srq_resp { ++ __u32 events_reported; ++}; ++ ++struct ib_uverbs_ex_create_wq { ++ __u32 comp_mask; ++ __u32 wq_type; ++ __aligned_u64 user_handle; ++ __u32 pd_handle; ++ __u32 cq_handle; ++ __u32 max_wr; ++ __u32 max_sge; ++ __u32 create_flags; /* Use enum ib_wq_flags */ ++ __u32 reserved; ++}; ++ ++struct ib_uverbs_ex_create_wq_resp { ++ __u32 comp_mask; ++ __u32 response_length; ++ __u32 wq_handle; ++ __u32 max_wr; ++ __u32 max_sge; ++ __u32 wqn; ++}; ++ ++struct ib_uverbs_ex_destroy_wq { ++ __u32 comp_mask; ++ __u32 wq_handle; ++}; ++ ++struct ib_uverbs_ex_destroy_wq_resp { ++ __u32 comp_mask; ++ __u32 response_length; ++ __u32 events_reported; ++ __u32 reserved; ++}; ++ ++struct ib_uverbs_ex_modify_wq { ++ __u32 attr_mask; ++ __u32 wq_handle; ++ __u32 wq_state; ++ __u32 curr_wq_state; ++ __u32 flags; /* Use enum ib_wq_flags */ ++ __u32 flags_mask; /* Use enum ib_wq_flags */ ++}; ++ ++/* Prevent memory allocation rather than max expected size */ ++#define IB_USER_VERBS_MAX_LOG_IND_TBL_SIZE 0x0d ++struct ib_uverbs_ex_create_rwq_ind_table { ++ __u32 comp_mask; ++ __u32 log_ind_tbl_size; ++ /* Following are the wq handles according to log_ind_tbl_size ++ * wq_handle1 ++ * wq_handle2 ++ */ ++ __u32 wq_handles[0]; ++}; ++ ++struct ib_uverbs_ex_create_rwq_ind_table_resp { ++ __u32 comp_mask; ++ __u32 response_length; ++ __u32 ind_tbl_handle; ++ __u32 ind_tbl_num; ++}; ++ ++struct ib_uverbs_ex_destroy_rwq_ind_table { ++ __u32 comp_mask; ++ __u32 ind_tbl_handle; ++}; ++ ++struct ib_uverbs_cq_moderation { ++ __u16 cq_count; ++ __u16 cq_period; ++}; ++ ++struct ib_uverbs_ex_modify_cq { ++ __u32 cq_handle; ++ __u32 attr_mask; ++ struct ib_uverbs_cq_moderation attr; ++ __u32 reserved; ++}; ++ ++#define IB_DEVICE_NAME_MAX 64 ++ ++#endif /* IB_USER_VERBS_H */ +diff --git a/rdma/include/uapi/rdma/rdma_netlink.h b/rdma/include/uapi/rdma/rdma_netlink.h +new file mode 100644 +index 0000000000000..04c80cebef49f +--- /dev/null ++++ b/rdma/include/uapi/rdma/rdma_netlink.h +@@ -0,0 +1,438 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef _RDMA_NETLINK_H ++#define _RDMA_NETLINK_H ++ ++#include ++ ++enum { ++ RDMA_NL_RDMA_CM = 1, ++ RDMA_NL_IWCM, ++ RDMA_NL_RSVD, ++ RDMA_NL_LS, /* RDMA Local Services */ ++ RDMA_NL_NLDEV, /* RDMA device interface */ ++ RDMA_NL_NUM_CLIENTS ++}; ++ ++enum { ++ RDMA_NL_GROUP_CM = 1, ++ RDMA_NL_GROUP_IWPM, ++ RDMA_NL_GROUP_LS, ++ RDMA_NL_NUM_GROUPS ++}; ++ ++#define RDMA_NL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10) ++#define RDMA_NL_GET_OP(type) (type & ((1 << 10) - 1)) ++#define RDMA_NL_GET_TYPE(client, op) ((client << 10) + op) ++ ++enum { ++ RDMA_NL_RDMA_CM_ID_STATS = 0, ++ RDMA_NL_RDMA_CM_NUM_OPS ++}; ++ ++enum { ++ RDMA_NL_RDMA_CM_ATTR_SRC_ADDR = 1, ++ RDMA_NL_RDMA_CM_ATTR_DST_ADDR, ++ RDMA_NL_RDMA_CM_NUM_ATTR, ++}; ++ ++/* iwarp port mapper op-codes */ ++enum { ++ RDMA_NL_IWPM_REG_PID = 0, ++ RDMA_NL_IWPM_ADD_MAPPING, ++ RDMA_NL_IWPM_QUERY_MAPPING, ++ RDMA_NL_IWPM_REMOVE_MAPPING, ++ RDMA_NL_IWPM_REMOTE_INFO, ++ RDMA_NL_IWPM_HANDLE_ERR, ++ RDMA_NL_IWPM_MAPINFO, ++ RDMA_NL_IWPM_MAPINFO_NUM, ++ RDMA_NL_IWPM_NUM_OPS ++}; ++ ++struct rdma_cm_id_stats { ++ __u32 qp_num; ++ __u32 bound_dev_if; ++ __u32 port_space; ++ __s32 pid; ++ __u8 cm_state; ++ __u8 node_type; ++ __u8 port_num; ++ __u8 qp_type; ++}; ++ ++enum { ++ IWPM_NLA_REG_PID_UNSPEC = 0, ++ IWPM_NLA_REG_PID_SEQ, ++ IWPM_NLA_REG_IF_NAME, ++ IWPM_NLA_REG_IBDEV_NAME, ++ IWPM_NLA_REG_ULIB_NAME, ++ IWPM_NLA_REG_PID_MAX ++}; ++ ++enum { ++ IWPM_NLA_RREG_PID_UNSPEC = 0, ++ IWPM_NLA_RREG_PID_SEQ, ++ IWPM_NLA_RREG_IBDEV_NAME, ++ IWPM_NLA_RREG_ULIB_NAME, ++ IWPM_NLA_RREG_ULIB_VER, ++ IWPM_NLA_RREG_PID_ERR, ++ IWPM_NLA_RREG_PID_MAX ++ ++}; ++ ++enum { ++ IWPM_NLA_MANAGE_MAPPING_UNSPEC = 0, ++ IWPM_NLA_MANAGE_MAPPING_SEQ, ++ IWPM_NLA_MANAGE_ADDR, ++ IWPM_NLA_MANAGE_MAPPED_LOC_ADDR, ++ IWPM_NLA_RMANAGE_MAPPING_ERR, ++ IWPM_NLA_RMANAGE_MAPPING_MAX ++}; ++ ++#define IWPM_NLA_MANAGE_MAPPING_MAX 3 ++#define IWPM_NLA_QUERY_MAPPING_MAX 4 ++#define IWPM_NLA_MAPINFO_SEND_MAX 3 ++ ++enum { ++ IWPM_NLA_QUERY_MAPPING_UNSPEC = 0, ++ IWPM_NLA_QUERY_MAPPING_SEQ, ++ IWPM_NLA_QUERY_LOCAL_ADDR, ++ IWPM_NLA_QUERY_REMOTE_ADDR, ++ IWPM_NLA_RQUERY_MAPPED_LOC_ADDR, ++ IWPM_NLA_RQUERY_MAPPED_REM_ADDR, ++ IWPM_NLA_RQUERY_MAPPING_ERR, ++ IWPM_NLA_RQUERY_MAPPING_MAX ++}; ++ ++enum { ++ IWPM_NLA_MAPINFO_REQ_UNSPEC = 0, ++ IWPM_NLA_MAPINFO_ULIB_NAME, ++ IWPM_NLA_MAPINFO_ULIB_VER, ++ IWPM_NLA_MAPINFO_REQ_MAX ++}; ++ ++enum { ++ IWPM_NLA_MAPINFO_UNSPEC = 0, ++ IWPM_NLA_MAPINFO_LOCAL_ADDR, ++ IWPM_NLA_MAPINFO_MAPPED_ADDR, ++ IWPM_NLA_MAPINFO_MAX ++}; ++ ++enum { ++ IWPM_NLA_MAPINFO_NUM_UNSPEC = 0, ++ IWPM_NLA_MAPINFO_SEQ, ++ IWPM_NLA_MAPINFO_SEND_NUM, ++ IWPM_NLA_MAPINFO_ACK_NUM, ++ IWPM_NLA_MAPINFO_NUM_MAX ++}; ++ ++enum { ++ IWPM_NLA_ERR_UNSPEC = 0, ++ IWPM_NLA_ERR_SEQ, ++ IWPM_NLA_ERR_CODE, ++ IWPM_NLA_ERR_MAX ++}; ++ ++/* ++ * Local service operations: ++ * RESOLVE - The client requests the local service to resolve a path. ++ * SET_TIMEOUT - The local service requests the client to set the timeout. ++ * IP_RESOLVE - The client requests the local service to resolve an IP to GID. ++ */ ++enum { ++ RDMA_NL_LS_OP_RESOLVE = 0, ++ RDMA_NL_LS_OP_SET_TIMEOUT, ++ RDMA_NL_LS_OP_IP_RESOLVE, ++ RDMA_NL_LS_NUM_OPS ++}; ++ ++/* Local service netlink message flags */ ++#define RDMA_NL_LS_F_ERR 0x0100 /* Failed response */ ++ ++/* ++ * Local service resolve operation family header. ++ * The layout for the resolve operation: ++ * nlmsg header ++ * family header ++ * attributes ++ */ ++ ++/* ++ * Local service path use: ++ * Specify how the path(s) will be used. ++ * ALL - For connected CM operation (6 pathrecords) ++ * UNIDIRECTIONAL - For unidirectional UD (1 pathrecord) ++ * GMP - For miscellaneous GMP like operation (at least 1 reversible ++ * pathrecord) ++ */ ++enum { ++ LS_RESOLVE_PATH_USE_ALL = 0, ++ LS_RESOLVE_PATH_USE_UNIDIRECTIONAL, ++ LS_RESOLVE_PATH_USE_GMP, ++ LS_RESOLVE_PATH_USE_MAX ++}; ++ ++#define LS_DEVICE_NAME_MAX 64 ++ ++struct rdma_ls_resolve_header { ++ __u8 device_name[LS_DEVICE_NAME_MAX]; ++ __u8 port_num; ++ __u8 path_use; ++}; ++ ++struct rdma_ls_ip_resolve_header { ++ __u32 ifindex; ++}; ++ ++/* Local service attribute type */ ++#define RDMA_NLA_F_MANDATORY (1 << 13) ++#define RDMA_NLA_TYPE_MASK (~(NLA_F_NESTED | NLA_F_NET_BYTEORDER | \ ++ RDMA_NLA_F_MANDATORY)) ++ ++/* ++ * Local service attributes: ++ * Attr Name Size Byte order ++ * ----------------------------------------------------- ++ * PATH_RECORD struct ib_path_rec_data ++ * TIMEOUT u32 cpu ++ * SERVICE_ID u64 cpu ++ * DGID u8[16] BE ++ * SGID u8[16] BE ++ * TCLASS u8 ++ * PKEY u16 cpu ++ * QOS_CLASS u16 cpu ++ * IPV4 u32 BE ++ * IPV6 u8[16] BE ++ */ ++enum { ++ LS_NLA_TYPE_UNSPEC = 0, ++ LS_NLA_TYPE_PATH_RECORD, ++ LS_NLA_TYPE_TIMEOUT, ++ LS_NLA_TYPE_SERVICE_ID, ++ LS_NLA_TYPE_DGID, ++ LS_NLA_TYPE_SGID, ++ LS_NLA_TYPE_TCLASS, ++ LS_NLA_TYPE_PKEY, ++ LS_NLA_TYPE_QOS_CLASS, ++ LS_NLA_TYPE_IPV4, ++ LS_NLA_TYPE_IPV6, ++ LS_NLA_TYPE_MAX ++}; ++ ++/* Local service DGID/SGID attribute: big endian */ ++struct rdma_nla_ls_gid { ++ __u8 gid[16]; ++}; ++ ++enum rdma_nldev_command { ++ RDMA_NLDEV_CMD_UNSPEC, ++ ++ RDMA_NLDEV_CMD_GET, /* can dump */ ++ RDMA_NLDEV_CMD_SET, ++ ++ /* 3 - 4 are free to use */ ++ ++ RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */ ++ ++ /* 6 - 8 are free to use */ ++ ++ RDMA_NLDEV_CMD_RES_GET = 9, /* can dump */ ++ ++ RDMA_NLDEV_CMD_RES_QP_GET, /* can dump */ ++ ++ RDMA_NLDEV_CMD_RES_CM_ID_GET, /* can dump */ ++ ++ RDMA_NLDEV_CMD_RES_CQ_GET, /* can dump */ ++ ++ RDMA_NLDEV_CMD_RES_MR_GET, /* can dump */ ++ ++ RDMA_NLDEV_CMD_RES_PD_GET, /* can dump */ ++ ++ RDMA_NLDEV_NUM_OPS ++}; ++ ++enum { ++ RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16, ++}; ++ ++enum rdma_nldev_print_type { ++ RDMA_NLDEV_PRINT_TYPE_UNSPEC, ++ RDMA_NLDEV_PRINT_TYPE_HEX, ++}; ++ ++enum rdma_nldev_attr { ++ /* don't change the order or add anything between, this is ABI! */ ++ RDMA_NLDEV_ATTR_UNSPEC, ++ ++ /* Pad attribute for 64b alignment */ ++ RDMA_NLDEV_ATTR_PAD = RDMA_NLDEV_ATTR_UNSPEC, ++ ++ /* Identifier for ib_device */ ++ RDMA_NLDEV_ATTR_DEV_INDEX, /* u32 */ ++ ++ RDMA_NLDEV_ATTR_DEV_NAME, /* string */ ++ /* ++ * Device index together with port index are identifiers ++ * for port/link properties. ++ * ++ * For RDMA_NLDEV_CMD_GET commamnd, port index will return number ++ * of available ports in ib_device, while for port specific operations, ++ * it will be real port index as it appears in sysfs. Port index follows ++ * sysfs notation and starts from 1 for the first port. ++ */ ++ RDMA_NLDEV_ATTR_PORT_INDEX, /* u32 */ ++ ++ /* ++ * Device and port capabilities ++ * ++ * When used for port info, first 32-bits are CapabilityMask followed by ++ * 16-bit CapabilityMask2. ++ */ ++ RDMA_NLDEV_ATTR_CAP_FLAGS, /* u64 */ ++ ++ /* ++ * FW version ++ */ ++ RDMA_NLDEV_ATTR_FW_VERSION, /* string */ ++ ++ /* ++ * Node GUID (in host byte order) associated with the RDMA device. ++ */ ++ RDMA_NLDEV_ATTR_NODE_GUID, /* u64 */ ++ ++ /* ++ * System image GUID (in host byte order) associated with ++ * this RDMA device and other devices which are part of a ++ * single system. ++ */ ++ RDMA_NLDEV_ATTR_SYS_IMAGE_GUID, /* u64 */ ++ ++ /* ++ * Subnet prefix (in host byte order) ++ */ ++ RDMA_NLDEV_ATTR_SUBNET_PREFIX, /* u64 */ ++ ++ /* ++ * Local Identifier (LID), ++ * According to IB specification, It is 16-bit address assigned ++ * by the Subnet Manager. Extended to be 32-bit for OmniPath users. ++ */ ++ RDMA_NLDEV_ATTR_LID, /* u32 */ ++ RDMA_NLDEV_ATTR_SM_LID, /* u32 */ ++ ++ /* ++ * LID mask control (LMC) ++ */ ++ RDMA_NLDEV_ATTR_LMC, /* u8 */ ++ ++ RDMA_NLDEV_ATTR_PORT_STATE, /* u8 */ ++ RDMA_NLDEV_ATTR_PORT_PHYS_STATE, /* u8 */ ++ ++ RDMA_NLDEV_ATTR_DEV_NODE_TYPE, /* u8 */ ++ ++ RDMA_NLDEV_ATTR_RES_SUMMARY, /* nested table */ ++ RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY, /* nested table */ ++ RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, /* string */ ++ RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, /* u64 */ ++ ++ RDMA_NLDEV_ATTR_RES_QP, /* nested table */ ++ RDMA_NLDEV_ATTR_RES_QP_ENTRY, /* nested table */ ++ /* ++ * Local QPN ++ */ ++ RDMA_NLDEV_ATTR_RES_LQPN, /* u32 */ ++ /* ++ * Remote QPN, ++ * Applicable for RC and UC only IBTA 11.2.5.3 QUERY QUEUE PAIR ++ */ ++ RDMA_NLDEV_ATTR_RES_RQPN, /* u32 */ ++ /* ++ * Receive Queue PSN, ++ * Applicable for RC and UC only 11.2.5.3 QUERY QUEUE PAIR ++ */ ++ RDMA_NLDEV_ATTR_RES_RQ_PSN, /* u32 */ ++ /* ++ * Send Queue PSN ++ */ ++ RDMA_NLDEV_ATTR_RES_SQ_PSN, /* u32 */ ++ RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE, /* u8 */ ++ /* ++ * QP types as visible to RDMA/core, the reserved QPT ++ * are not exported through this interface. ++ */ ++ RDMA_NLDEV_ATTR_RES_TYPE, /* u8 */ ++ RDMA_NLDEV_ATTR_RES_STATE, /* u8 */ ++ /* ++ * Process ID which created object, ++ * in case of kernel origin, PID won't exist. ++ */ ++ RDMA_NLDEV_ATTR_RES_PID, /* u32 */ ++ /* ++ * The name of process created following resource. ++ * It will exist only for kernel objects. ++ * For user created objects, the user is supposed ++ * to read /proc/PID/comm file. ++ */ ++ RDMA_NLDEV_ATTR_RES_KERN_NAME, /* string */ ++ ++ RDMA_NLDEV_ATTR_RES_CM_ID, /* nested table */ ++ RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY, /* nested table */ ++ /* ++ * rdma_cm_id port space. ++ */ ++ RDMA_NLDEV_ATTR_RES_PS, /* u32 */ ++ /* ++ * Source and destination socket addresses ++ */ ++ RDMA_NLDEV_ATTR_RES_SRC_ADDR, /* __kernel_sockaddr_storage */ ++ RDMA_NLDEV_ATTR_RES_DST_ADDR, /* __kernel_sockaddr_storage */ ++ ++ RDMA_NLDEV_ATTR_RES_CQ, /* nested table */ ++ RDMA_NLDEV_ATTR_RES_CQ_ENTRY, /* nested table */ ++ RDMA_NLDEV_ATTR_RES_CQE, /* u32 */ ++ RDMA_NLDEV_ATTR_RES_USECNT, /* u64 */ ++ RDMA_NLDEV_ATTR_RES_POLL_CTX, /* u8 */ ++ ++ RDMA_NLDEV_ATTR_RES_MR, /* nested table */ ++ RDMA_NLDEV_ATTR_RES_MR_ENTRY, /* nested table */ ++ RDMA_NLDEV_ATTR_RES_RKEY, /* u32 */ ++ RDMA_NLDEV_ATTR_RES_LKEY, /* u32 */ ++ RDMA_NLDEV_ATTR_RES_IOVA, /* u64 */ ++ RDMA_NLDEV_ATTR_RES_MRLEN, /* u64 */ ++ ++ RDMA_NLDEV_ATTR_RES_PD, /* nested table */ ++ RDMA_NLDEV_ATTR_RES_PD_ENTRY, /* nested table */ ++ RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY, /* u32 */ ++ RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY, /* u32 */ ++ /* ++ * Provides logical name and index of netdevice which is ++ * connected to physical port. This information is relevant ++ * for RoCE and iWARP. ++ * ++ * The netdevices which are associated with containers are ++ * supposed to be exported together with GID table once it ++ * will be exposed through the netlink. Because the ++ * associated netdevices are properties of GIDs. ++ */ ++ RDMA_NLDEV_ATTR_NDEV_INDEX, /* u32 */ ++ RDMA_NLDEV_ATTR_NDEV_NAME, /* string */ ++ /* ++ * driver-specific attributes. ++ */ ++ RDMA_NLDEV_ATTR_DRIVER, /* nested table */ ++ RDMA_NLDEV_ATTR_DRIVER_ENTRY, /* nested table */ ++ RDMA_NLDEV_ATTR_DRIVER_STRING, /* string */ ++ /* ++ * u8 values from enum rdma_nldev_print_type ++ */ ++ RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, /* u8 */ ++ RDMA_NLDEV_ATTR_DRIVER_S32, /* s32 */ ++ RDMA_NLDEV_ATTR_DRIVER_U32, /* u32 */ ++ RDMA_NLDEV_ATTR_DRIVER_S64, /* s64 */ ++ RDMA_NLDEV_ATTR_DRIVER_U64, /* u64 */ ++ ++ /* ++ * Always the end ++ */ ++ RDMA_NLDEV_ATTR_MAX ++}; ++#endif /* _RDMA_NETLINK_H */ +diff --git a/rdma/include/uapi/rdma/rdma_user_cm.h b/rdma/include/uapi/rdma/rdma_user_cm.h +new file mode 100644 +index 0000000000000..0d1e78ebad051 +--- /dev/null ++++ b/rdma/include/uapi/rdma/rdma_user_cm.h +@@ -0,0 +1,324 @@ ++/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ ++/* ++ * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. ++ * ++ * This software is available to you under a choice of one of two ++ * licenses. You may choose to be licensed under the terms of the GNU ++ * General Public License (GPL) Version 2, available from the file ++ * COPYING in the main directory of this source tree, or the ++ * OpenIB.org BSD license below: ++ * ++ * Redistribution and use in source and binary forms, with or ++ * without modification, are permitted provided that the following ++ * conditions are met: ++ * ++ * - Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * - Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials ++ * provided with the distribution. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS ++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN ++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#ifndef RDMA_USER_CM_H ++#define RDMA_USER_CM_H ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define RDMA_USER_CM_ABI_VERSION 4 ++ ++#define RDMA_MAX_PRIVATE_DATA 256 ++ ++enum { ++ RDMA_USER_CM_CMD_CREATE_ID, ++ RDMA_USER_CM_CMD_DESTROY_ID, ++ RDMA_USER_CM_CMD_BIND_IP, ++ RDMA_USER_CM_CMD_RESOLVE_IP, ++ RDMA_USER_CM_CMD_RESOLVE_ROUTE, ++ RDMA_USER_CM_CMD_QUERY_ROUTE, ++ RDMA_USER_CM_CMD_CONNECT, ++ RDMA_USER_CM_CMD_LISTEN, ++ RDMA_USER_CM_CMD_ACCEPT, ++ RDMA_USER_CM_CMD_REJECT, ++ RDMA_USER_CM_CMD_DISCONNECT, ++ RDMA_USER_CM_CMD_INIT_QP_ATTR, ++ RDMA_USER_CM_CMD_GET_EVENT, ++ RDMA_USER_CM_CMD_GET_OPTION, ++ RDMA_USER_CM_CMD_SET_OPTION, ++ RDMA_USER_CM_CMD_NOTIFY, ++ RDMA_USER_CM_CMD_JOIN_IP_MCAST, ++ RDMA_USER_CM_CMD_LEAVE_MCAST, ++ RDMA_USER_CM_CMD_MIGRATE_ID, ++ RDMA_USER_CM_CMD_QUERY, ++ RDMA_USER_CM_CMD_BIND, ++ RDMA_USER_CM_CMD_RESOLVE_ADDR, ++ RDMA_USER_CM_CMD_JOIN_MCAST ++}; ++ ++/* See IBTA Annex A11, servies ID bytes 4 & 5 */ ++enum rdma_ucm_port_space { ++ RDMA_PS_IPOIB = 0x0002, ++ RDMA_PS_IB = 0x013F, ++ RDMA_PS_TCP = 0x0106, ++ RDMA_PS_UDP = 0x0111, ++}; ++ ++/* ++ * command ABI structures. ++ */ ++struct rdma_ucm_cmd_hdr { ++ __u32 cmd; ++ __u16 in; ++ __u16 out; ++}; ++ ++struct rdma_ucm_create_id { ++ __aligned_u64 uid; ++ __aligned_u64 response; ++ __u16 ps; /* use enum rdma_ucm_port_space */ ++ __u8 qp_type; ++ __u8 reserved[5]; ++}; ++ ++struct rdma_ucm_create_id_resp { ++ __u32 id; ++}; ++ ++struct rdma_ucm_destroy_id { ++ __aligned_u64 response; ++ __u32 id; ++ __u32 reserved; ++}; ++ ++struct rdma_ucm_destroy_id_resp { ++ __u32 events_reported; ++}; ++ ++struct rdma_ucm_bind_ip { ++ __aligned_u64 response; ++ struct sockaddr_in6 addr; ++ __u32 id; ++}; ++ ++struct rdma_ucm_bind { ++ __u32 id; ++ __u16 addr_size; ++ __u16 reserved; ++ struct __kernel_sockaddr_storage addr; ++}; ++ ++struct rdma_ucm_resolve_ip { ++ struct sockaddr_in6 src_addr; ++ struct sockaddr_in6 dst_addr; ++ __u32 id; ++ __u32 timeout_ms; ++}; ++ ++struct rdma_ucm_resolve_addr { ++ __u32 id; ++ __u32 timeout_ms; ++ __u16 src_size; ++ __u16 dst_size; ++ __u32 reserved; ++ struct __kernel_sockaddr_storage src_addr; ++ struct __kernel_sockaddr_storage dst_addr; ++}; ++ ++struct rdma_ucm_resolve_route { ++ __u32 id; ++ __u32 timeout_ms; ++}; ++ ++enum { ++ RDMA_USER_CM_QUERY_ADDR, ++ RDMA_USER_CM_QUERY_PATH, ++ RDMA_USER_CM_QUERY_GID ++}; ++ ++struct rdma_ucm_query { ++ __aligned_u64 response; ++ __u32 id; ++ __u32 option; ++}; ++ ++struct rdma_ucm_query_route_resp { ++ __aligned_u64 node_guid; ++ struct ib_user_path_rec ib_route[2]; ++ struct sockaddr_in6 src_addr; ++ struct sockaddr_in6 dst_addr; ++ __u32 num_paths; ++ __u8 port_num; ++ __u8 reserved[3]; ++}; ++ ++struct rdma_ucm_query_addr_resp { ++ __aligned_u64 node_guid; ++ __u8 port_num; ++ __u8 reserved; ++ __u16 pkey; ++ __u16 src_size; ++ __u16 dst_size; ++ struct __kernel_sockaddr_storage src_addr; ++ struct __kernel_sockaddr_storage dst_addr; ++}; ++ ++struct rdma_ucm_query_path_resp { ++ __u32 num_paths; ++ __u32 reserved; ++ struct ib_path_rec_data path_data[0]; ++}; ++ ++struct rdma_ucm_conn_param { ++ __u32 qp_num; ++ __u32 qkey; ++ __u8 private_data[RDMA_MAX_PRIVATE_DATA]; ++ __u8 private_data_len; ++ __u8 srq; ++ __u8 responder_resources; ++ __u8 initiator_depth; ++ __u8 flow_control; ++ __u8 retry_count; ++ __u8 rnr_retry_count; ++ __u8 valid; ++}; ++ ++struct rdma_ucm_ud_param { ++ __u32 qp_num; ++ __u32 qkey; ++ struct ib_uverbs_ah_attr ah_attr; ++ __u8 private_data[RDMA_MAX_PRIVATE_DATA]; ++ __u8 private_data_len; ++ __u8 reserved[7]; ++}; ++ ++struct rdma_ucm_connect { ++ struct rdma_ucm_conn_param conn_param; ++ __u32 id; ++ __u32 reserved; ++}; ++ ++struct rdma_ucm_listen { ++ __u32 id; ++ __u32 backlog; ++}; ++ ++struct rdma_ucm_accept { ++ __aligned_u64 uid; ++ struct rdma_ucm_conn_param conn_param; ++ __u32 id; ++ __u32 reserved; ++}; ++ ++struct rdma_ucm_reject { ++ __u32 id; ++ __u8 private_data_len; ++ __u8 reserved[3]; ++ __u8 private_data[RDMA_MAX_PRIVATE_DATA]; ++}; ++ ++struct rdma_ucm_disconnect { ++ __u32 id; ++}; ++ ++struct rdma_ucm_init_qp_attr { ++ __aligned_u64 response; ++ __u32 id; ++ __u32 qp_state; ++}; ++ ++struct rdma_ucm_notify { ++ __u32 id; ++ __u32 event; ++}; ++ ++struct rdma_ucm_join_ip_mcast { ++ __aligned_u64 response; /* rdma_ucm_create_id_resp */ ++ __aligned_u64 uid; ++ struct sockaddr_in6 addr; ++ __u32 id; ++}; ++ ++/* Multicast join flags */ ++enum { ++ RDMA_MC_JOIN_FLAG_FULLMEMBER, ++ RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER, ++ RDMA_MC_JOIN_FLAG_RESERVED, ++}; ++ ++struct rdma_ucm_join_mcast { ++ __aligned_u64 response; /* rdma_ucma_create_id_resp */ ++ __aligned_u64 uid; ++ __u32 id; ++ __u16 addr_size; ++ __u16 join_flags; ++ struct __kernel_sockaddr_storage addr; ++}; ++ ++struct rdma_ucm_get_event { ++ __aligned_u64 response; ++}; ++ ++struct rdma_ucm_event_resp { ++ __aligned_u64 uid; ++ __u32 id; ++ __u32 event; ++ __u32 status; ++ /* ++ * NOTE: This union is not aligned to 8 bytes so none of the union ++ * members may contain a u64 or anything with higher alignment than 4. ++ */ ++ union { ++ struct rdma_ucm_conn_param conn; ++ struct rdma_ucm_ud_param ud; ++ } param; ++ __u32 reserved; ++}; ++ ++/* Option levels */ ++enum { ++ RDMA_OPTION_ID = 0, ++ RDMA_OPTION_IB = 1 ++}; ++ ++/* Option details */ ++enum { ++ RDMA_OPTION_ID_TOS = 0, ++ RDMA_OPTION_ID_REUSEADDR = 1, ++ RDMA_OPTION_ID_AFONLY = 2, ++ RDMA_OPTION_IB_PATH = 1 ++}; ++ ++struct rdma_ucm_set_option { ++ __aligned_u64 optval; ++ __u32 id; ++ __u32 level; ++ __u32 optname; ++ __u32 optlen; ++}; ++ ++struct rdma_ucm_migrate_id { ++ __aligned_u64 response; ++ __u32 id; ++ __u32 fd; ++}; ++ ++struct rdma_ucm_migrate_resp { ++ __u32 events_reported; ++}; ++ ++#endif /* RDMA_USER_CM_H */ +diff --git a/rdma/link.c b/rdma/link.c +new file mode 100644 +index 0000000000000..c064be627be2c +--- /dev/null ++++ b/rdma/link.c +@@ -0,0 +1,355 @@ ++/* ++ * link.c RDMA tool ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * Authors: Leon Romanovsky ++ */ ++ ++#include "rdma.h" ++ ++static int link_help(struct rd *rd) ++{ ++ pr_out("Usage: %s link show [DEV/PORT_INDEX]\n", rd->filename); ++ return 0; ++} ++ ++static const char *caps_to_str(uint32_t idx) ++{ ++#define RDMA_PORT_FLAGS_LOW(x) \ ++ x(RESERVED, 0) \ ++ x(SM, 1) \ ++ x(NOTICE, 2) \ ++ x(TRAP, 3) \ ++ x(OPT_IPD, 4) \ ++ x(AUTO_MIGR, 5) \ ++ x(SL_MAP, 6) \ ++ x(MKEY_NVRAM, 7) \ ++ x(PKEY_NVRAM, 8) \ ++ x(LED_INFO, 9) \ ++ x(SM_DISABLED, 10) \ ++ x(SYS_IMAGE_GUID, 11) \ ++ x(PKEY_SW_EXT_PORT_TRAP, 12) \ ++ x(CABLE_INFO, 13) \ ++ x(EXTENDED_SPEEDS, 14) \ ++ x(CAP_MASK2, 15) \ ++ x(CM, 16) \ ++ x(SNMP_TUNNEL, 17) \ ++ x(REINIT, 18) \ ++ x(DEVICE_MGMT, 19) \ ++ x(VENDOR_CLASS, 20) \ ++ x(DR_NOTICE, 21) \ ++ x(CAP_MASK_NOTICE, 22) \ ++ x(BOOT_MGMT, 23) \ ++ x(LINK_LATENCY, 24) \ ++ x(CLIENT_REG, 25) \ ++ x(OTHER_LOCAL_CHANGES, 26) \ ++ x(LINK_SPPED_WIDTH, 27) \ ++ x(VENDOR_SPECIFIC_MADS, 28) \ ++ x(MULT_PKER_TRAP, 29) \ ++ x(MULT_FDB, 30) \ ++ x(HIERARCHY_INFO, 31) ++ ++#define RDMA_PORT_FLAGS_HIGH(x) \ ++ x(SET_NODE_DESC, 0) \ ++ x(EXT_INFO, 1) \ ++ x(VIRT, 2) \ ++ x(SWITCH_POR_STATE_TABLE, 3) \ ++ x(LINK_WIDTH_2X, 4) \ ++ x(LINK_SPEED_HDR, 5) ++ ++ /* ++ * Separation below is needed to allow compilation of rdmatool ++ * on 32bits systems. On such systems, C-enum is limited to be ++ * int and can't hold more than 32 bits. ++ */ ++ enum { RDMA_PORT_FLAGS_LOW(RDMA_BITMAP_ENUM) }; ++ enum { RDMA_PORT_FLAGS_HIGH(RDMA_BITMAP_ENUM) }; ++ ++ static const char * const ++ rdma_port_names_low[] = { RDMA_PORT_FLAGS_LOW(RDMA_BITMAP_NAMES) }; ++ static const char * const ++ rdma_port_names_high[] = { RDMA_PORT_FLAGS_HIGH(RDMA_BITMAP_NAMES) }; ++ uint32_t high_idx; ++ #undef RDMA_PORT_FLAGS_LOW ++ #undef RDMA_PORT_FLAGS_HIGH ++ ++ if (idx < ARRAY_SIZE(rdma_port_names_low) && rdma_port_names_low[idx]) ++ return rdma_port_names_low[idx]; ++ ++ high_idx = idx - ARRAY_SIZE(rdma_port_names_low); ++ if (high_idx < ARRAY_SIZE(rdma_port_names_high) && ++ rdma_port_names_high[high_idx]) ++ return rdma_port_names_high[high_idx]; ++ ++ return "UNKNOWN"; ++} ++ ++static void link_print_caps(struct rd *rd, struct nlattr **tb) ++{ ++ uint64_t caps; ++ uint32_t idx; ++ ++ if (!tb[RDMA_NLDEV_ATTR_CAP_FLAGS]) ++ return; ++ ++ caps = mnl_attr_get_u64(tb[RDMA_NLDEV_ATTR_CAP_FLAGS]); ++ ++ if (rd->json_output) { ++ jsonw_name(rd->jw, "caps"); ++ jsonw_start_array(rd->jw); ++ } else { ++ pr_out("\n caps: <"); ++ } ++ for (idx = 0; caps; idx++) { ++ if (caps & 0x1) { ++ if (rd->json_output) { ++ jsonw_string(rd->jw, caps_to_str(idx)); ++ } else { ++ pr_out("%s", caps_to_str(idx)); ++ if (caps >> 0x1) ++ pr_out(", "); ++ } ++ } ++ caps >>= 0x1; ++ } ++ ++ if (rd->json_output) ++ jsonw_end_array(rd->jw); ++ else ++ pr_out(">"); ++} ++ ++static void link_print_subnet_prefix(struct rd *rd, struct nlattr **tb) ++{ ++ uint64_t subnet_prefix; ++ uint16_t vp[4]; ++ char str[32]; ++ ++ if (!tb[RDMA_NLDEV_ATTR_SUBNET_PREFIX]) ++ return; ++ ++ subnet_prefix = mnl_attr_get_u64(tb[RDMA_NLDEV_ATTR_SUBNET_PREFIX]); ++ memcpy(vp, &subnet_prefix, sizeof(uint64_t)); ++ snprintf(str, 32, "%04x:%04x:%04x:%04x", vp[3], vp[2], vp[1], vp[0]); ++ if (rd->json_output) ++ jsonw_string_field(rd->jw, "subnet_prefix", str); ++ else ++ pr_out("subnet_prefix %s ", str); ++} ++ ++static void link_print_lid(struct rd *rd, struct nlattr **tb) ++{ ++ uint32_t lid; ++ ++ if (!tb[RDMA_NLDEV_ATTR_LID]) ++ return; ++ ++ lid = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_LID]); ++ if (rd->json_output) ++ jsonw_uint_field(rd->jw, "lid", lid); ++ else ++ pr_out("lid %u ", lid); ++} ++ ++static void link_print_sm_lid(struct rd *rd, struct nlattr **tb) ++{ ++ uint32_t sm_lid; ++ ++ if (!tb[RDMA_NLDEV_ATTR_SM_LID]) ++ return; ++ ++ sm_lid = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_SM_LID]); ++ if (rd->json_output) ++ jsonw_uint_field(rd->jw, "sm_lid", sm_lid); ++ else ++ pr_out("sm_lid %u ", sm_lid); ++} ++ ++static void link_print_lmc(struct rd *rd, struct nlattr **tb) ++{ ++ uint8_t lmc; ++ ++ if (!tb[RDMA_NLDEV_ATTR_LMC]) ++ return; ++ ++ lmc = mnl_attr_get_u8(tb[RDMA_NLDEV_ATTR_LMC]); ++ if (rd->json_output) ++ jsonw_uint_field(rd->jw, "lmc", lmc); ++ else ++ pr_out("lmc %u ", lmc); ++} ++ ++static const char *link_state_to_str(uint8_t link_state) ++{ ++ static const char * const link_state_str[] = { "NOP", "DOWN", ++ "INIT", "ARMED", ++ "ACTIVE", ++ "ACTIVE_DEFER" }; ++ if (link_state < ARRAY_SIZE(link_state_str)) ++ return link_state_str[link_state]; ++ return "UNKNOWN"; ++} ++ ++static void link_print_state(struct rd *rd, struct nlattr **tb) ++{ ++ uint8_t state; ++ ++ if (!tb[RDMA_NLDEV_ATTR_PORT_STATE]) ++ return; ++ ++ state = mnl_attr_get_u8(tb[RDMA_NLDEV_ATTR_PORT_STATE]); ++ if (rd->json_output) ++ jsonw_string_field(rd->jw, "state", link_state_to_str(state)); ++ else ++ pr_out("state %s ", link_state_to_str(state)); ++} ++ ++static const char *phys_state_to_str(uint8_t phys_state) ++{ ++ static const char * const phys_state_str[] = { "NOP", "SLEEP", ++ "POLLING", "DISABLED", ++ "ARMED", "LINK_UP", ++ "LINK_ERROR_RECOVER", ++ "PHY_TEST", "UNKNOWN", ++ "OPA_OFFLINE", ++ "UNKNOWN", "OPA_TEST" }; ++ if (phys_state < ARRAY_SIZE(phys_state_str)) ++ return phys_state_str[phys_state]; ++ return "UNKNOWN"; ++}; ++ ++static void link_print_phys_state(struct rd *rd, struct nlattr **tb) ++{ ++ uint8_t phys_state; ++ ++ if (!tb[RDMA_NLDEV_ATTR_PORT_PHYS_STATE]) ++ return; ++ ++ phys_state = mnl_attr_get_u8(tb[RDMA_NLDEV_ATTR_PORT_PHYS_STATE]); ++ if (rd->json_output) ++ jsonw_string_field(rd->jw, "physical_state", ++ phys_state_to_str(phys_state)); ++ else ++ pr_out("physical_state %s ", phys_state_to_str(phys_state)); ++} ++ ++static void link_print_netdev(struct rd *rd, struct nlattr **tb) ++{ ++ const char *netdev_name; ++ uint32_t idx; ++ ++ if (!tb[RDMA_NLDEV_ATTR_NDEV_NAME] || !tb[RDMA_NLDEV_ATTR_NDEV_INDEX]) ++ return; ++ ++ netdev_name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_NDEV_NAME]); ++ idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_NDEV_INDEX]); ++ if (rd->json_output) { ++ jsonw_string_field(rd->jw, "netdev", netdev_name); ++ jsonw_uint_field(rd->jw, "netdev_index", idx); ++ } else { ++ pr_out("netdev %s ", netdev_name); ++ if (rd->show_details) ++ pr_out("netdev_index %u ", idx); ++ } ++} ++ ++static int link_parse_cb(const struct nlmsghdr *nlh, void *data) ++{ ++ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {}; ++ struct rd *rd = data; ++ uint32_t port, idx; ++ char name[32]; ++ ++ mnl_attr_parse(nlh, 0, rd_attr_cb, tb); ++ if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_DEV_NAME]) ++ return MNL_CB_ERROR; ++ ++ if (!tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { ++ pr_err("This tool doesn't support switches yet\n"); ++ return MNL_CB_ERROR; ++ } ++ ++ idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); ++ port = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); ++ snprintf(name, 32, "%s/%u", ++ mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_DEV_NAME]), port); ++ ++ if (rd->json_output) { ++ jsonw_uint_field(rd->jw, "ifindex", idx); ++ jsonw_uint_field(rd->jw, "port", port); ++ jsonw_string_field(rd->jw, "ifname", name); ++ ++ } else { ++ pr_out("%u/%u: %s: ", idx, port, name); ++ } ++ ++ link_print_subnet_prefix(rd, tb); ++ link_print_lid(rd, tb); ++ link_print_sm_lid(rd, tb); ++ link_print_lmc(rd, tb); ++ link_print_state(rd, tb); ++ link_print_phys_state(rd, tb); ++ link_print_netdev(rd, tb); ++ if (rd->show_details) ++ link_print_caps(rd, tb); ++ ++ if (!rd->json_output) ++ pr_out("\n"); ++ return MNL_CB_OK; ++} ++ ++static int link_no_args(struct rd *rd) ++{ ++ uint32_t seq; ++ int ret; ++ ++ rd_prepare_msg(rd, RDMA_NLDEV_CMD_PORT_GET, &seq, ++ (NLM_F_REQUEST | NLM_F_ACK)); ++ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx); ++ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_PORT_INDEX, rd->port_idx); ++ ret = rd_send_msg(rd); ++ if (ret) ++ return ret; ++ ++ if (rd->json_output) ++ jsonw_start_object(rd->jw); ++ ret = rd_recv_msg(rd, link_parse_cb, rd, seq); ++ if (rd->json_output) ++ jsonw_end_object(rd->jw); ++ return ret; ++} ++ ++static int link_one_show(struct rd *rd) ++{ ++ const struct rd_cmd cmds[] = { ++ { NULL, link_no_args}, ++ { 0 } ++ }; ++ ++ if (!rd->port_idx) ++ return 0; ++ ++ return rd_exec_cmd(rd, cmds, "parameter"); ++} ++ ++static int link_show(struct rd *rd) ++{ ++ return rd_exec_link(rd, link_one_show, true); ++} ++ ++int cmd_link(struct rd *rd) ++{ ++ const struct rd_cmd cmds[] = { ++ { NULL, link_show }, ++ { "show", link_show }, ++ { "list", link_show }, ++ { "help", link_help }, ++ { 0 } ++ }; ++ ++ return rd_exec_cmd(rd, cmds, "link command"); ++} +diff --git a/rdma/rdma.c b/rdma/rdma.c +new file mode 100644 +index 0000000000000..010e98371ef09 +--- /dev/null ++++ b/rdma/rdma.c +@@ -0,0 +1,203 @@ ++/* ++ * rdma.c RDMA tool ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * Authors: Leon Romanovsky ++ */ ++ ++#include "rdma.h" ++#include "SNAPSHOT.h" ++ ++static void help(char *name) ++{ ++ pr_out("Usage: %s [ OPTIONS ] OBJECT { COMMAND | help }\n" ++ " %s [ -f[orce] ] -b[atch] filename\n" ++ "where OBJECT := { dev | link | resource | help }\n" ++ " OPTIONS := { -V[ersion] | -d[etails] | -j[son] | -p[retty]}\n", name, name); ++} ++ ++static int cmd_help(struct rd *rd) ++{ ++ help(rd->filename); ++ return 0; ++} ++ ++static int rd_cmd(struct rd *rd, int argc, char **argv) ++{ ++ const struct rd_cmd cmds[] = { ++ { NULL, cmd_help }, ++ { "help", cmd_help }, ++ { "dev", cmd_dev }, ++ { "link", cmd_link }, ++ { "resource", cmd_res }, ++ { 0 } ++ }; ++ ++ rd->argc = argc; ++ rd->argv = argv; ++ ++ return rd_exec_cmd(rd, cmds, "object"); ++} ++ ++static int rd_batch(struct rd *rd, const char *name, bool force) ++{ ++ char *line = NULL; ++ size_t len = 0; ++ int ret = 0; ++ ++ if (name && strcmp(name, "-") != 0) { ++ if (!freopen(name, "r", stdin)) { ++ pr_err("Cannot open file \"%s\" for reading: %s\n", ++ name, strerror(errno)); ++ return errno; ++ } ++ } ++ ++ cmdlineno = 0; ++ while (getcmdline(&line, &len, stdin) != -1) { ++ char *largv[512]; ++ int largc; ++ ++ largc = makeargs(line, largv, ARRAY_SIZE(largv)); ++ if (!largc) ++ continue; /* blank line */ ++ ++ ret = rd_cmd(rd, largc, largv); ++ if (ret) { ++ pr_err("Command failed %s:%d\n", name, cmdlineno); ++ if (!force) ++ break; ++ } ++ } ++ ++ free(line); ++ ++ return ret; ++} ++ ++static int rd_init(struct rd *rd, char *filename) ++{ ++ uint32_t seq; ++ int ret; ++ ++ rd->filename = filename; ++ INIT_LIST_HEAD(&rd->dev_map_list); ++ INIT_LIST_HEAD(&rd->filter_list); ++ ++ if (rd->json_output) { ++ rd->jw = jsonw_new(stdout); ++ if (!rd->jw) { ++ pr_err("Failed to create JSON writer\n"); ++ return -ENOMEM; ++ } ++ jsonw_pretty(rd->jw, rd->pretty_output); ++ } ++ ++ rd->buff = malloc(MNL_SOCKET_BUFFER_SIZE); ++ if (!rd->buff) ++ return -ENOMEM; ++ ++ rd_prepare_msg(rd, RDMA_NLDEV_CMD_GET, ++ &seq, (NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP)); ++ ret = rd_send_msg(rd); ++ if (ret) ++ return ret; ++ ++ return rd_recv_msg(rd, rd_dev_init_cb, rd, seq); ++} ++ ++static void rd_cleanup(struct rd *rd) ++{ ++ if (rd->json_output) ++ jsonw_destroy(&rd->jw); ++ rd_free(rd); ++} ++ ++int main(int argc, char **argv) ++{ ++ static const struct option long_options[] = { ++ { "version", no_argument, NULL, 'V' }, ++ { "help", no_argument, NULL, 'h' }, ++ { "json", no_argument, NULL, 'j' }, ++ { "pretty", no_argument, NULL, 'p' }, ++ { "details", no_argument, NULL, 'd' }, ++ { "force", no_argument, NULL, 'f' }, ++ { "batch", required_argument, NULL, 'b' }, ++ { NULL, 0, NULL, 0 } ++ }; ++ bool show_driver_details = false; ++ const char *batch_file = NULL; ++ bool pretty_output = false; ++ bool show_details = false; ++ bool json_output = false; ++ bool force = false; ++ struct rd rd = {}; ++ char *filename; ++ int opt; ++ int err; ++ ++ filename = basename(argv[0]); ++ ++ while ((opt = getopt_long(argc, argv, ":Vhdpjfb:", ++ long_options, NULL)) >= 0) { ++ switch (opt) { ++ case 'V': ++ printf("%s utility, iproute2-ss%s\n", ++ filename, SNAPSHOT); ++ return EXIT_SUCCESS; ++ case 'p': ++ pretty_output = true; ++ break; ++ case 'd': ++ if (show_details) ++ show_driver_details = true; ++ else ++ show_details = true; ++ break; ++ case 'j': ++ json_output = true; ++ break; ++ case 'f': ++ force = true; ++ break; ++ case 'b': ++ batch_file = optarg; ++ break; ++ case 'h': ++ help(filename); ++ return EXIT_SUCCESS; ++ case ':': ++ pr_err("-%c option requires an argument\n", optopt); ++ return EXIT_FAILURE; ++ default: ++ pr_err("Unknown option.\n"); ++ help(filename); ++ return EXIT_FAILURE; ++ } ++ } ++ ++ argc -= optind; ++ argv += optind; ++ ++ rd.show_details = show_details; ++ rd.show_driver_details = show_driver_details; ++ rd.json_output = json_output; ++ rd.pretty_output = pretty_output; ++ ++ err = rd_init(&rd, filename); ++ if (err) ++ goto out; ++ ++ if (batch_file) ++ err = rd_batch(&rd, batch_file, force); ++ else ++ err = rd_cmd(&rd, argc, argv); ++out: ++ /* Always cleanup */ ++ rd_cleanup(&rd); ++ return err ? EXIT_FAILURE : EXIT_SUCCESS; ++} +diff --git a/rdma/rdma.h b/rdma/rdma.h +new file mode 100644 +index 0000000000000..547bb5749a39f +--- /dev/null ++++ b/rdma/rdma.h +@@ -0,0 +1,131 @@ ++/* ++ * rdma.c RDMA tool ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * Authors: Leon Romanovsky ++ */ ++#ifndef _RDMA_TOOL_H_ ++#define _RDMA_TOOL_H_ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "list.h" ++#include "utils.h" ++#include "json_writer.h" ++ ++#define pr_err(args...) fprintf(stderr, ##args) ++#define pr_out(args...) fprintf(stdout, ##args) ++ ++#define RDMA_BITMAP_ENUM(name, bit_no) RDMA_BITMAP_##name = BIT(bit_no), ++#define RDMA_BITMAP_NAMES(name, bit_no) [bit_no] = #name, ++ ++#define MAX_NUMBER_OF_FILTERS 64 ++struct filters { ++ const char *name; ++ bool is_number; ++}; ++ ++struct filter_entry { ++ struct list_head list; ++ char *key; ++ char *value; ++}; ++ ++struct dev_map { ++ struct list_head list; ++ char *dev_name; ++ uint32_t num_ports; ++ uint32_t idx; ++}; ++ ++struct rd { ++ int argc; ++ char **argv; ++ char *filename; ++ bool show_details; ++ bool show_driver_details; ++ struct list_head dev_map_list; ++ uint32_t dev_idx; ++ uint32_t port_idx; ++ struct mnl_socket *nl; ++ struct nlmsghdr *nlh; ++ char *buff; ++ json_writer_t *jw; ++ bool json_output; ++ bool pretty_output; ++ struct list_head filter_list; ++}; ++ ++struct rd_cmd { ++ const char *cmd; ++ int (*func)(struct rd *rd); ++}; ++ ++/* ++ * Parser interface ++ */ ++bool rd_no_arg(struct rd *rd); ++void rd_arg_inc(struct rd *rd); ++ ++char *rd_argv(struct rd *rd); ++ ++/* ++ * Commands interface ++ */ ++int cmd_dev(struct rd *rd); ++int cmd_link(struct rd *rd); ++int cmd_res(struct rd *rd); ++int rd_exec_cmd(struct rd *rd, const struct rd_cmd *c, const char *str); ++int rd_exec_dev(struct rd *rd, int (*cb)(struct rd *rd)); ++int rd_exec_require_dev(struct rd *rd, int (*cb)(struct rd *rd)); ++int rd_exec_link(struct rd *rd, int (*cb)(struct rd *rd), bool strict_port); ++void rd_free(struct rd *rd); ++int rd_set_arg_to_devname(struct rd *rd); ++int rd_argc(struct rd *rd); ++ ++int strcmpx(const char *str1, const char *str2); ++ ++/* ++ * Device manipulation ++ */ ++struct dev_map *dev_map_lookup(struct rd *rd, bool allow_port_index); ++ ++/* ++ * Filter manipulation ++ */ ++int rd_build_filter(struct rd *rd, const struct filters valid_filters[]); ++bool rd_check_is_filtered(struct rd *rd, const char *key, uint32_t val); ++bool rd_check_is_string_filtered(struct rd *rd, const char *key, const char *val); ++bool rd_check_is_key_exist(struct rd *rd, const char *key); ++/* ++ * Netlink ++ */ ++int rd_send_msg(struct rd *rd); ++int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, uint32_t seq); ++void rd_prepare_msg(struct rd *rd, uint32_t cmd, uint32_t *seq, uint16_t flags); ++int rd_dev_init_cb(const struct nlmsghdr *nlh, void *data); ++int rd_attr_cb(const struct nlattr *attr, void *data); ++int rd_attr_check(const struct nlattr *attr, int *typep); ++ ++/* ++ * Print helpers ++ */ ++void print_driver_table(struct rd *rd, struct nlattr *tb); ++void newline(struct rd *rd); ++void newline_indent(struct rd *rd); ++#define MAX_LINE_LENGTH 80 ++ ++#endif /* _RDMA_TOOL_H_ */ +diff --git a/rdma/res.c b/rdma/res.c +new file mode 100644 +index 0000000000000..cbb2efe6c7235 +--- /dev/null ++++ b/rdma/res.c +@@ -0,0 +1,1111 @@ ++/* ++ * res.c RDMA tool ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * Authors: Leon Romanovsky ++ */ ++ ++#include "rdma.h" ++#include ++ ++static int res_help(struct rd *rd) ++{ ++ pr_out("Usage: %s resource\n", rd->filename); ++ pr_out(" resource show [DEV]\n"); ++ pr_out(" resource show [qp|cm_id|pd|mr|cq]\n"); ++ pr_out(" resource show qp link [DEV/PORT]\n"); ++ pr_out(" resource show qp link [DEV/PORT] [FILTER-NAME FILTER-VALUE]\n"); ++ pr_out(" resource show cm_id link [DEV/PORT]\n"); ++ pr_out(" resource show cm_id link [DEV/PORT] [FILTER-NAME FILTER-VALUE]\n"); ++ pr_out(" resource show cq link [DEV/PORT]\n"); ++ pr_out(" resource show cq link [DEV/PORT] [FILTER-NAME FILTER-VALUE]\n"); ++ pr_out(" resource show pd dev [DEV]\n"); ++ pr_out(" resource show pd dev [DEV] [FILTER-NAME FILTER-VALUE]\n"); ++ pr_out(" resource show mr dev [DEV]\n"); ++ pr_out(" resource show mr dev [DEV] [FILTER-NAME FILTER-VALUE]\n"); ++ return 0; ++} ++ ++static int res_print_summary(struct rd *rd, struct nlattr **tb) ++{ ++ struct nlattr *nla_table = tb[RDMA_NLDEV_ATTR_RES_SUMMARY]; ++ struct nlattr *nla_entry; ++ const char *name; ++ uint64_t curr; ++ int err; ++ ++ mnl_attr_for_each_nested(nla_entry, nla_table) { ++ struct nlattr *nla_line[RDMA_NLDEV_ATTR_MAX] = {}; ++ char json_name[32]; ++ ++ err = mnl_attr_parse_nested(nla_entry, rd_attr_cb, nla_line); ++ if (err != MNL_CB_OK) ++ return -EINVAL; ++ ++ if (!nla_line[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] || ++ !nla_line[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]) { ++ return -EINVAL; ++ } ++ ++ name = mnl_attr_get_str(nla_line[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]); ++ curr = mnl_attr_get_u64(nla_line[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]); ++ if (rd->json_output) { ++ snprintf(json_name, 32, "%s", name); ++ jsonw_lluint_field(rd->jw, json_name, curr); ++ } else { ++ pr_out("%s %"PRId64 " ", name, curr); ++ } ++ } ++ return 0; ++} ++ ++static int res_no_args_parse_cb(const struct nlmsghdr *nlh, void *data) ++{ ++ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {}; ++ struct rd *rd = data; ++ const char *name; ++ uint32_t idx; ++ ++ mnl_attr_parse(nlh, 0, rd_attr_cb, tb); ++ if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || ++ !tb[RDMA_NLDEV_ATTR_DEV_NAME] || ++ !tb[RDMA_NLDEV_ATTR_RES_SUMMARY]) ++ return MNL_CB_ERROR; ++ ++ idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); ++ name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_DEV_NAME]); ++ if (rd->json_output) { ++ jsonw_uint_field(rd->jw, "ifindex", idx); ++ jsonw_string_field(rd->jw, "ifname", name); ++ } else { ++ pr_out("%u: %s: ", idx, name); ++ } ++ ++ res_print_summary(rd, tb); ++ ++ if (!rd->json_output) ++ pr_out("\n"); ++ return MNL_CB_OK; ++} ++ ++static int _res_send_msg(struct rd *rd, uint32_t command, mnl_cb_t callback) ++{ ++ uint32_t flags = NLM_F_REQUEST | NLM_F_ACK; ++ uint32_t seq; ++ int ret; ++ ++ if (command != RDMA_NLDEV_CMD_RES_GET) ++ flags |= NLM_F_DUMP; ++ ++ rd_prepare_msg(rd, command, &seq, flags); ++ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx); ++ if (rd->port_idx) ++ mnl_attr_put_u32(rd->nlh, ++ RDMA_NLDEV_ATTR_PORT_INDEX, rd->port_idx); ++ ++ ret = rd_send_msg(rd); ++ if (ret) ++ return ret; ++ ++ if (rd->json_output) ++ jsonw_start_object(rd->jw); ++ ret = rd_recv_msg(rd, callback, rd, seq); ++ if (rd->json_output) ++ jsonw_end_object(rd->jw); ++ return ret; ++} ++ ++#define RES_FUNC(name, command, valid_filters, strict_port) \ ++ static int _##name(struct rd *rd)\ ++ { \ ++ return _res_send_msg(rd, command, name##_parse_cb); \ ++ } \ ++ static int name(struct rd *rd) \ ++ {\ ++ int ret = rd_build_filter(rd, valid_filters); \ ++ if (ret) \ ++ return ret; \ ++ if ((uintptr_t)valid_filters != (uintptr_t)NULL) { \ ++ ret = rd_set_arg_to_devname(rd); \ ++ if (ret) \ ++ return ret;\ ++ } \ ++ if (strict_port) \ ++ return rd_exec_dev(rd, _##name); \ ++ else \ ++ return rd_exec_link(rd, _##name, strict_port); \ ++ } ++ ++static const char *path_mig_to_str(uint8_t idx) ++{ ++ static const char * const path_mig_str[] = { "MIGRATED", ++ "REARM", "ARMED" }; ++ ++ if (idx < ARRAY_SIZE(path_mig_str)) ++ return path_mig_str[idx]; ++ return "UNKNOWN"; ++} ++ ++static const char *qp_states_to_str(uint8_t idx) ++{ ++ static const char * const qp_states_str[] = { "RESET", "INIT", ++ "RTR", "RTS", "SQD", ++ "SQE", "ERR" }; ++ ++ if (idx < ARRAY_SIZE(qp_states_str)) ++ return qp_states_str[idx]; ++ return "UNKNOWN"; ++} ++ ++static const char *qp_types_to_str(uint8_t idx) ++{ ++ static const char * const qp_types_str[] = { "SMI", "GSI", "RC", ++ "UC", "UD", "RAW_IPV6", ++ "RAW_ETHERTYPE", ++ "UNKNOWN", "RAW_PACKET", ++ "XRC_INI", "XRC_TGT" }; ++ ++ if (idx < ARRAY_SIZE(qp_types_str)) ++ return qp_types_str[idx]; ++ return "UNKNOWN"; ++} ++ ++static void print_lqpn(struct rd *rd, uint32_t val) ++{ ++ if (rd->json_output) ++ jsonw_uint_field(rd->jw, "lqpn", val); ++ else ++ pr_out("lqpn %u ", val); ++} ++ ++static void print_rqpn(struct rd *rd, uint32_t val, struct nlattr **nla_line) ++{ ++ if (!nla_line[RDMA_NLDEV_ATTR_RES_RQPN]) ++ return; ++ ++ if (rd->json_output) ++ jsonw_uint_field(rd->jw, "rqpn", val); ++ else ++ pr_out("rqpn %u ", val); ++} ++ ++static void print_type(struct rd *rd, uint32_t val) ++{ ++ if (rd->json_output) ++ jsonw_string_field(rd->jw, "type", ++ qp_types_to_str(val)); ++ else ++ pr_out("type %s ", qp_types_to_str(val)); ++} ++ ++static void print_state(struct rd *rd, uint32_t val) ++{ ++ if (rd->json_output) ++ jsonw_string_field(rd->jw, "state", ++ qp_states_to_str(val)); ++ else ++ pr_out("state %s ", qp_states_to_str(val)); ++} ++ ++static void print_rqpsn(struct rd *rd, uint32_t val, struct nlattr **nla_line) ++{ ++ if (!nla_line[RDMA_NLDEV_ATTR_RES_RQ_PSN]) ++ return; ++ ++ if (rd->json_output) ++ jsonw_uint_field(rd->jw, "rq-psn", val); ++ else ++ pr_out("rq-psn %u ", val); ++} ++ ++static void print_sqpsn(struct rd *rd, uint32_t val) ++{ ++ if (rd->json_output) ++ jsonw_uint_field(rd->jw, "sq-psn", val); ++ else ++ pr_out("sq-psn %u ", val); ++} ++ ++static void print_pathmig(struct rd *rd, uint32_t val, ++ struct nlattr **nla_line) ++{ ++ if (!nla_line[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE]) ++ return; ++ ++ if (rd->json_output) ++ jsonw_string_field(rd->jw, ++ "path-mig-state", ++ path_mig_to_str(val)); ++ else ++ pr_out("path-mig-state %s ", path_mig_to_str(val)); ++} ++ ++static void print_pid(struct rd *rd, uint32_t val) ++{ ++ if (rd->json_output) ++ jsonw_uint_field(rd->jw, "pid", val); ++ else ++ pr_out("pid %u ", val); ++} ++ ++static void print_comm(struct rd *rd, const char *str, ++ struct nlattr **nla_line) ++{ ++ char tmp[18]; ++ ++ if (rd->json_output) { ++ /* Don't beatify output in JSON format */ ++ jsonw_string_field(rd->jw, "comm", str); ++ return; ++ } ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_PID]) ++ snprintf(tmp, sizeof(tmp), "%s", str); ++ else ++ snprintf(tmp, sizeof(tmp), "[%s]", str); ++ ++ pr_out("comm %s ", tmp); ++} ++ ++static void print_dev(struct rd *rd, uint32_t idx, const char *name) ++{ ++ if (rd->json_output) { ++ jsonw_uint_field(rd->jw, "ifindex", idx); ++ jsonw_string_field(rd->jw, "ifname", name); ++ } else { ++ pr_out("dev %s ", name); ++ } ++} ++ ++static void print_link(struct rd *rd, uint32_t idx, const char *name, ++ uint32_t port, struct nlattr **nla_line) ++{ ++ if (rd->json_output) { ++ jsonw_uint_field(rd->jw, "ifindex", idx); ++ ++ if (nla_line[RDMA_NLDEV_ATTR_PORT_INDEX]) ++ jsonw_uint_field(rd->jw, "port", port); ++ ++ jsonw_string_field(rd->jw, "ifname", name); ++ } else { ++ if (nla_line[RDMA_NLDEV_ATTR_PORT_INDEX]) ++ pr_out("link %s/%u ", name, port); ++ else ++ pr_out("link %s/- ", name); ++ } ++} ++ ++static char *get_task_name(uint32_t pid) ++{ ++ char *comm; ++ FILE *f; ++ ++ if (asprintf(&comm, "/proc/%d/comm", pid) < 0) ++ return NULL; ++ ++ f = fopen(comm, "r"); ++ free(comm); ++ if (!f) ++ return NULL; ++ ++ if (fscanf(f, "%ms\n", &comm) != 1) ++ comm = NULL; ++ ++ fclose(f); ++ ++ return comm; ++} ++ ++static int res_qp_parse_cb(const struct nlmsghdr *nlh, void *data) ++{ ++ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {}; ++ struct nlattr *nla_table, *nla_entry; ++ struct rd *rd = data; ++ const char *name; ++ uint32_t idx; ++ ++ mnl_attr_parse(nlh, 0, rd_attr_cb, tb); ++ if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || ++ !tb[RDMA_NLDEV_ATTR_DEV_NAME] || ++ !tb[RDMA_NLDEV_ATTR_RES_QP]) ++ return MNL_CB_ERROR; ++ ++ name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_DEV_NAME]); ++ idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); ++ nla_table = tb[RDMA_NLDEV_ATTR_RES_QP]; ++ ++ mnl_attr_for_each_nested(nla_entry, nla_table) { ++ struct nlattr *nla_line[RDMA_NLDEV_ATTR_MAX] = {}; ++ uint32_t lqpn, rqpn = 0, rq_psn = 0, sq_psn; ++ uint8_t type, state, path_mig_state = 0; ++ uint32_t port = 0, pid = 0; ++ char *comm = NULL; ++ int err; ++ ++ err = mnl_attr_parse_nested(nla_entry, rd_attr_cb, nla_line); ++ if (err != MNL_CB_OK) ++ return MNL_CB_ERROR; ++ ++ if (!nla_line[RDMA_NLDEV_ATTR_RES_LQPN] || ++ !nla_line[RDMA_NLDEV_ATTR_RES_SQ_PSN] || ++ !nla_line[RDMA_NLDEV_ATTR_RES_TYPE] || ++ !nla_line[RDMA_NLDEV_ATTR_RES_STATE] || ++ (!nla_line[RDMA_NLDEV_ATTR_RES_PID] && ++ !nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME])) { ++ return MNL_CB_ERROR; ++ } ++ ++ if (nla_line[RDMA_NLDEV_ATTR_PORT_INDEX]) ++ port = mnl_attr_get_u32(nla_line[RDMA_NLDEV_ATTR_PORT_INDEX]); ++ ++ if (port != rd->port_idx) ++ continue; ++ ++ lqpn = mnl_attr_get_u32(nla_line[RDMA_NLDEV_ATTR_RES_LQPN]); ++ if (rd_check_is_filtered(rd, "lqpn", lqpn)) ++ continue; ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_RQPN]) { ++ rqpn = mnl_attr_get_u32(nla_line[RDMA_NLDEV_ATTR_RES_RQPN]); ++ if (rd_check_is_filtered(rd, "rqpn", rqpn)) ++ continue; ++ } else { ++ if (rd_check_is_key_exist(rd, "rqpn")) ++ continue; ++ } ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_RQ_PSN]) { ++ rq_psn = mnl_attr_get_u32(nla_line[RDMA_NLDEV_ATTR_RES_RQ_PSN]); ++ if (rd_check_is_filtered(rd, "rq-psn", rq_psn)) ++ continue; ++ } else { ++ if (rd_check_is_key_exist(rd, "rq-psn")) ++ continue; ++ } ++ ++ sq_psn = mnl_attr_get_u32(nla_line[RDMA_NLDEV_ATTR_RES_SQ_PSN]); ++ if (rd_check_is_filtered(rd, "sq-psn", sq_psn)) ++ continue; ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE]) { ++ path_mig_state = mnl_attr_get_u8(nla_line[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE]); ++ if (rd_check_is_string_filtered(rd, "path-mig-state", path_mig_to_str(path_mig_state))) ++ continue; ++ } else { ++ if (rd_check_is_key_exist(rd, "path-mig-state")) ++ continue; ++ } ++ ++ type = mnl_attr_get_u8(nla_line[RDMA_NLDEV_ATTR_RES_TYPE]); ++ if (rd_check_is_string_filtered(rd, "type", qp_types_to_str(type))) ++ continue; ++ ++ state = mnl_attr_get_u8(nla_line[RDMA_NLDEV_ATTR_RES_STATE]); ++ if (rd_check_is_string_filtered(rd, "state", qp_states_to_str(state))) ++ continue; ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_PID]) { ++ pid = mnl_attr_get_u32(nla_line[RDMA_NLDEV_ATTR_RES_PID]); ++ comm = get_task_name(pid); ++ } ++ ++ if (rd_check_is_filtered(rd, "pid", pid)) { ++ free(comm); ++ continue; ++ } ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME]) ++ /* discard const from mnl_attr_get_str */ ++ comm = (char *)mnl_attr_get_str(nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME]); ++ ++ if (rd->json_output) ++ jsonw_start_array(rd->jw); ++ ++ print_link(rd, idx, name, port, nla_line); ++ ++ print_lqpn(rd, lqpn); ++ print_rqpn(rd, rqpn, nla_line); ++ ++ print_type(rd, type); ++ print_state(rd, state); ++ ++ print_rqpsn(rd, rq_psn, nla_line); ++ print_sqpsn(rd, sq_psn); ++ ++ print_pathmig(rd, path_mig_state, nla_line); ++ print_pid(rd, pid); ++ print_comm(rd, comm, nla_line); ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_PID]) ++ free(comm); ++ ++ print_driver_table(rd, nla_line[RDMA_NLDEV_ATTR_DRIVER]); ++ newline(rd); ++ } ++ return MNL_CB_OK; ++} ++ ++static void print_qp_type(struct rd *rd, uint32_t val) ++{ ++ if (rd->json_output) ++ jsonw_string_field(rd->jw, "qp-type", ++ qp_types_to_str(val)); ++ else ++ pr_out("qp-type %s ", qp_types_to_str(val)); ++} ++ ++static const char *cm_id_state_to_str(uint8_t idx) ++{ ++ static const char * const cm_id_states_str[] = { ++ "IDLE", "ADDR_QUERY", "ADDR_RESOLVED", "ROUTE_QUERY", ++ "ROUTE_RESOLVED", "CONNECT", "DISCONNECT", "ADDR_BOUND", ++ "LISTEN", "DEVICE_REMOVAL", "DESTROYING" }; ++ ++ if (idx < ARRAY_SIZE(cm_id_states_str)) ++ return cm_id_states_str[idx]; ++ return "UNKNOWN"; ++} ++ ++static const char *cm_id_ps_to_str(uint32_t ps) ++{ ++ switch (ps) { ++ case RDMA_PS_IPOIB: ++ return "IPoIB"; ++ case RDMA_PS_IB: ++ return "IPoIB"; ++ case RDMA_PS_TCP: ++ return "TCP"; ++ case RDMA_PS_UDP: ++ return "UDP"; ++ default: ++ return "---"; ++ } ++} ++ ++static void print_cm_id_state(struct rd *rd, uint8_t state) ++{ ++ if (rd->json_output) { ++ jsonw_string_field(rd->jw, "state", cm_id_state_to_str(state)); ++ return; ++ } ++ pr_out("state %s ", cm_id_state_to_str(state)); ++} ++ ++static void print_ps(struct rd *rd, uint32_t ps) ++{ ++ if (rd->json_output) { ++ jsonw_string_field(rd->jw, "ps", cm_id_ps_to_str(ps)); ++ return; ++ } ++ pr_out("ps %s ", cm_id_ps_to_str(ps)); ++} ++ ++static void print_ipaddr(struct rd *rd, const char *key, char *addrstr, ++ uint16_t port) ++{ ++ if (rd->json_output) { ++ int name_size = INET6_ADDRSTRLEN+strlen(":65535"); ++ char json_name[name_size]; ++ ++ snprintf(json_name, name_size, "%s:%u", addrstr, port); ++ jsonw_string_field(rd->jw, key, json_name); ++ return; ++ } ++ pr_out("%s %s:%u ", key, addrstr, port); ++} ++ ++static int ss_ntop(struct nlattr *nla_line, char *addr_str, uint16_t *port) ++{ ++ struct __kernel_sockaddr_storage *addr; ++ ++ addr = (struct __kernel_sockaddr_storage *) ++ mnl_attr_get_payload(nla_line); ++ switch (addr->ss_family) { ++ case AF_INET: { ++ struct sockaddr_in *sin = (struct sockaddr_in *)addr; ++ ++ if (!inet_ntop(AF_INET, (const void *)&sin->sin_addr, addr_str, ++ INET6_ADDRSTRLEN)) ++ return -EINVAL; ++ *port = ntohs(sin->sin_port); ++ break; ++ } ++ case AF_INET6: { ++ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; ++ ++ if (!inet_ntop(AF_INET6, (const void *)&sin6->sin6_addr, ++ addr_str, INET6_ADDRSTRLEN)) ++ return -EINVAL; ++ *port = ntohs(sin6->sin6_port); ++ break; ++ } ++ default: ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++static int res_cm_id_parse_cb(const struct nlmsghdr *nlh, void *data) ++{ ++ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {}; ++ struct nlattr *nla_table, *nla_entry; ++ struct rd *rd = data; ++ const char *name; ++ int idx; ++ ++ mnl_attr_parse(nlh, 0, rd_attr_cb, tb); ++ if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || ++ !tb[RDMA_NLDEV_ATTR_DEV_NAME] || ++ !tb[RDMA_NLDEV_ATTR_RES_CM_ID]) ++ return MNL_CB_ERROR; ++ ++ name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_DEV_NAME]); ++ idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); ++ nla_table = tb[RDMA_NLDEV_ATTR_RES_CM_ID]; ++ mnl_attr_for_each_nested(nla_entry, nla_table) { ++ struct nlattr *nla_line[RDMA_NLDEV_ATTR_MAX] = {}; ++ char src_addr_str[INET6_ADDRSTRLEN]; ++ char dst_addr_str[INET6_ADDRSTRLEN]; ++ uint16_t src_port, dst_port; ++ uint32_t port = 0, pid = 0; ++ uint8_t type = 0, state; ++ uint32_t lqpn = 0, ps; ++ char *comm = NULL; ++ int err; ++ ++ err = mnl_attr_parse_nested(nla_entry, rd_attr_cb, nla_line); ++ if (err != MNL_CB_OK) ++ return -EINVAL; ++ ++ if (!nla_line[RDMA_NLDEV_ATTR_RES_STATE] || ++ !nla_line[RDMA_NLDEV_ATTR_RES_PS] || ++ (!nla_line[RDMA_NLDEV_ATTR_RES_PID] && ++ !nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME])) { ++ return MNL_CB_ERROR; ++ } ++ ++ if (nla_line[RDMA_NLDEV_ATTR_PORT_INDEX]) ++ port = mnl_attr_get_u32( ++ nla_line[RDMA_NLDEV_ATTR_PORT_INDEX]); ++ ++ if (port && port != rd->port_idx) ++ continue; ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_LQPN]) { ++ lqpn = mnl_attr_get_u32( ++ nla_line[RDMA_NLDEV_ATTR_RES_LQPN]); ++ if (rd_check_is_filtered(rd, "lqpn", lqpn)) ++ continue; ++ } ++ if (nla_line[RDMA_NLDEV_ATTR_RES_TYPE]) { ++ type = mnl_attr_get_u8( ++ nla_line[RDMA_NLDEV_ATTR_RES_TYPE]); ++ if (rd_check_is_string_filtered(rd, "qp-type", ++ qp_types_to_str(type))) ++ continue; ++ } ++ ++ ps = mnl_attr_get_u32(nla_line[RDMA_NLDEV_ATTR_RES_PS]); ++ if (rd_check_is_string_filtered(rd, "ps", cm_id_ps_to_str(ps))) ++ continue; ++ ++ state = mnl_attr_get_u8(nla_line[RDMA_NLDEV_ATTR_RES_STATE]); ++ if (rd_check_is_string_filtered(rd, "state", ++ cm_id_state_to_str(state))) ++ continue; ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_SRC_ADDR]) { ++ if (ss_ntop(nla_line[RDMA_NLDEV_ATTR_RES_SRC_ADDR], ++ src_addr_str, &src_port)) ++ continue; ++ if (rd_check_is_string_filtered(rd, "src-addr", ++ src_addr_str)) ++ continue; ++ if (rd_check_is_filtered(rd, "src-port", src_port)) ++ continue; ++ } ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_DST_ADDR]) { ++ if (ss_ntop(nla_line[RDMA_NLDEV_ATTR_RES_DST_ADDR], ++ dst_addr_str, &dst_port)) ++ continue; ++ if (rd_check_is_string_filtered(rd, "dst-addr", ++ dst_addr_str)) ++ continue; ++ if (rd_check_is_filtered(rd, "dst-port", dst_port)) ++ continue; ++ } ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_PID]) { ++ pid = mnl_attr_get_u32( ++ nla_line[RDMA_NLDEV_ATTR_RES_PID]); ++ comm = get_task_name(pid); ++ } ++ ++ if (rd_check_is_filtered(rd, "pid", pid)) { ++ free(comm); ++ continue; ++ } ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME]) { ++ /* discard const from mnl_attr_get_str */ ++ comm = (char *)mnl_attr_get_str( ++ nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME]); ++ } ++ ++ if (rd->json_output) ++ jsonw_start_array(rd->jw); ++ ++ print_link(rd, idx, name, port, nla_line); ++ if (nla_line[RDMA_NLDEV_ATTR_RES_LQPN]) ++ print_lqpn(rd, lqpn); ++ if (nla_line[RDMA_NLDEV_ATTR_RES_TYPE]) ++ print_qp_type(rd, type); ++ print_cm_id_state(rd, state); ++ print_ps(rd, ps); ++ print_pid(rd, pid); ++ print_comm(rd, comm, nla_line); ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_SRC_ADDR]) ++ print_ipaddr(rd, "src-addr", src_addr_str, src_port); ++ if (nla_line[RDMA_NLDEV_ATTR_RES_DST_ADDR]) ++ print_ipaddr(rd, "dst-addr", dst_addr_str, dst_port); ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_PID]) ++ free(comm); ++ ++ print_driver_table(rd, nla_line[RDMA_NLDEV_ATTR_DRIVER]); ++ newline(rd); ++ } ++ return MNL_CB_OK; ++} ++ ++static void print_cqe(struct rd *rd, uint32_t val) ++{ ++ if (rd->json_output) ++ jsonw_uint_field(rd->jw, "cqe", val); ++ else ++ pr_out("cqe %u ", val); ++} ++ ++static void print_users(struct rd *rd, uint64_t val) ++{ ++ if (rd->json_output) ++ jsonw_uint_field(rd->jw, "users", val); ++ else ++ pr_out("users %" PRIu64 " ", val); ++} ++ ++static const char *poll_ctx_to_str(uint8_t idx) ++{ ++ static const char * const cm_id_states_str[] = { ++ "DIRECT", "SOFTIRQ", "WORKQUEUE"}; ++ ++ if (idx < ARRAY_SIZE(cm_id_states_str)) ++ return cm_id_states_str[idx]; ++ return "UNKNOWN"; ++} ++ ++static void print_poll_ctx(struct rd *rd, uint8_t poll_ctx) ++{ ++ if (rd->json_output) { ++ jsonw_string_field(rd->jw, "poll-ctx", ++ poll_ctx_to_str(poll_ctx)); ++ return; ++ } ++ pr_out("poll-ctx %s ", poll_ctx_to_str(poll_ctx)); ++} ++ ++static int res_cq_parse_cb(const struct nlmsghdr *nlh, void *data) ++{ ++ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {}; ++ struct nlattr *nla_table, *nla_entry; ++ struct rd *rd = data; ++ const char *name; ++ uint32_t idx; ++ ++ mnl_attr_parse(nlh, 0, rd_attr_cb, tb); ++ if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || ++ !tb[RDMA_NLDEV_ATTR_DEV_NAME] || ++ !tb[RDMA_NLDEV_ATTR_RES_CQ]) ++ return MNL_CB_ERROR; ++ ++ name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_DEV_NAME]); ++ idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); ++ nla_table = tb[RDMA_NLDEV_ATTR_RES_CQ]; ++ ++ mnl_attr_for_each_nested(nla_entry, nla_table) { ++ struct nlattr *nla_line[RDMA_NLDEV_ATTR_MAX] = {}; ++ char *comm = NULL; ++ uint32_t pid = 0; ++ uint8_t poll_ctx = 0; ++ uint64_t users; ++ uint32_t cqe; ++ int err; ++ ++ err = mnl_attr_parse_nested(nla_entry, rd_attr_cb, nla_line); ++ if (err != MNL_CB_OK) ++ return MNL_CB_ERROR; ++ ++ if (!nla_line[RDMA_NLDEV_ATTR_RES_CQE] || ++ !nla_line[RDMA_NLDEV_ATTR_RES_USECNT] || ++ (!nla_line[RDMA_NLDEV_ATTR_RES_PID] && ++ !nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME])) { ++ return MNL_CB_ERROR; ++ } ++ ++ cqe = mnl_attr_get_u32(nla_line[RDMA_NLDEV_ATTR_RES_CQE]); ++ ++ users = mnl_attr_get_u64(nla_line[RDMA_NLDEV_ATTR_RES_USECNT]); ++ if (rd_check_is_filtered(rd, "users", users)) ++ continue; ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_POLL_CTX]) { ++ poll_ctx = mnl_attr_get_u8( ++ nla_line[RDMA_NLDEV_ATTR_RES_POLL_CTX]); ++ if (rd_check_is_string_filtered(rd, "poll-ctx", ++ poll_ctx_to_str(poll_ctx))) ++ continue; ++ } ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_PID]) { ++ pid = mnl_attr_get_u32( ++ nla_line[RDMA_NLDEV_ATTR_RES_PID]); ++ comm = get_task_name(pid); ++ } ++ ++ if (rd_check_is_filtered(rd, "pid", pid)) { ++ free(comm); ++ continue; ++ } ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME]) ++ /* discard const from mnl_attr_get_str */ ++ comm = (char *)mnl_attr_get_str( ++ nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME]); ++ ++ if (rd->json_output) ++ jsonw_start_array(rd->jw); ++ ++ print_dev(rd, idx, name); ++ print_cqe(rd, cqe); ++ print_users(rd, users); ++ if (nla_line[RDMA_NLDEV_ATTR_RES_POLL_CTX]) ++ print_poll_ctx(rd, poll_ctx); ++ print_pid(rd, pid); ++ print_comm(rd, comm, nla_line); ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_PID]) ++ free(comm); ++ ++ print_driver_table(rd, nla_line[RDMA_NLDEV_ATTR_DRIVER]); ++ newline(rd); ++ } ++ return MNL_CB_OK; ++} ++ ++static void print_key(struct rd *rd, const char *name, uint32_t val) ++{ ++ if (rd->json_output) ++ jsonw_xint_field(rd->jw, name, val); ++ else ++ pr_out("%s 0x%x ", name, val); ++} ++ ++static void print_iova(struct rd *rd, uint64_t val) ++{ ++ if (rd->json_output) ++ jsonw_xint_field(rd->jw, "iova", val); ++ else ++ pr_out("iova 0x%" PRIx64 " ", val); ++} ++ ++static void print_mrlen(struct rd *rd, uint64_t val) ++{ ++ if (rd->json_output) ++ jsonw_uint_field(rd->jw, "mrlen", val); ++ else ++ pr_out("mrlen %" PRIu64 " ", val); ++} ++ ++static int res_mr_parse_cb(const struct nlmsghdr *nlh, void *data) ++{ ++ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {}; ++ struct nlattr *nla_table, *nla_entry; ++ struct rd *rd = data; ++ const char *name; ++ uint32_t idx; ++ ++ mnl_attr_parse(nlh, 0, rd_attr_cb, tb); ++ if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || ++ !tb[RDMA_NLDEV_ATTR_DEV_NAME] || ++ !tb[RDMA_NLDEV_ATTR_RES_MR]) ++ return MNL_CB_ERROR; ++ ++ name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_DEV_NAME]); ++ idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); ++ nla_table = tb[RDMA_NLDEV_ATTR_RES_MR]; ++ ++ mnl_attr_for_each_nested(nla_entry, nla_table) { ++ struct nlattr *nla_line[RDMA_NLDEV_ATTR_MAX] = {}; ++ uint32_t rkey = 0, lkey = 0; ++ uint64_t iova = 0, mrlen; ++ char *comm = NULL; ++ uint32_t pid = 0; ++ int err; ++ ++ err = mnl_attr_parse_nested(nla_entry, rd_attr_cb, nla_line); ++ if (err != MNL_CB_OK) ++ return MNL_CB_ERROR; ++ ++ if (!nla_line[RDMA_NLDEV_ATTR_RES_MRLEN] || ++ (!nla_line[RDMA_NLDEV_ATTR_RES_PID] && ++ !nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME])) { ++ return MNL_CB_ERROR; ++ } ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_RKEY]) ++ rkey = mnl_attr_get_u32( ++ nla_line[RDMA_NLDEV_ATTR_RES_RKEY]); ++ if (nla_line[RDMA_NLDEV_ATTR_RES_LKEY]) ++ lkey = mnl_attr_get_u32( ++ nla_line[RDMA_NLDEV_ATTR_RES_LKEY]); ++ if (nla_line[RDMA_NLDEV_ATTR_RES_IOVA]) ++ iova = mnl_attr_get_u64( ++ nla_line[RDMA_NLDEV_ATTR_RES_IOVA]); ++ ++ mrlen = mnl_attr_get_u64(nla_line[RDMA_NLDEV_ATTR_RES_MRLEN]); ++ if (rd_check_is_filtered(rd, "mrlen", mrlen)) ++ continue; ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_PID]) { ++ pid = mnl_attr_get_u32( ++ nla_line[RDMA_NLDEV_ATTR_RES_PID]); ++ comm = get_task_name(pid); ++ } ++ ++ if (rd_check_is_filtered(rd, "pid", pid)) { ++ free(comm); ++ continue; ++ } ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME]) ++ /* discard const from mnl_attr_get_str */ ++ comm = (char *)mnl_attr_get_str( ++ nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME]); ++ ++ if (rd->json_output) ++ jsonw_start_array(rd->jw); ++ ++ print_dev(rd, idx, name); ++ if (nla_line[RDMA_NLDEV_ATTR_RES_RKEY]) ++ print_key(rd, "rkey", rkey); ++ if (nla_line[RDMA_NLDEV_ATTR_RES_LKEY]) ++ print_key(rd, "lkey", lkey); ++ if (nla_line[RDMA_NLDEV_ATTR_RES_IOVA]) ++ print_iova(rd, iova); ++ print_mrlen(rd, mrlen); ++ print_pid(rd, pid); ++ print_comm(rd, comm, nla_line); ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_PID]) ++ free(comm); ++ ++ print_driver_table(rd, nla_line[RDMA_NLDEV_ATTR_DRIVER]); ++ newline(rd); ++ } ++ return MNL_CB_OK; ++} ++ ++static int res_pd_parse_cb(const struct nlmsghdr *nlh, void *data) ++{ ++ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {}; ++ struct nlattr *nla_table, *nla_entry; ++ struct rd *rd = data; ++ const char *name; ++ uint32_t idx; ++ ++ mnl_attr_parse(nlh, 0, rd_attr_cb, tb); ++ if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || ++ !tb[RDMA_NLDEV_ATTR_DEV_NAME] || ++ !tb[RDMA_NLDEV_ATTR_RES_PD]) ++ return MNL_CB_ERROR; ++ ++ name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_DEV_NAME]); ++ idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); ++ nla_table = tb[RDMA_NLDEV_ATTR_RES_PD]; ++ ++ mnl_attr_for_each_nested(nla_entry, nla_table) { ++ uint32_t local_dma_lkey = 0, unsafe_global_rkey = 0; ++ struct nlattr *nla_line[RDMA_NLDEV_ATTR_MAX] = {}; ++ char *comm = NULL; ++ uint32_t pid = 0; ++ uint64_t users; ++ int err; ++ ++ err = mnl_attr_parse_nested(nla_entry, rd_attr_cb, nla_line); ++ if (err != MNL_CB_OK) ++ return MNL_CB_ERROR; ++ ++ if (!nla_line[RDMA_NLDEV_ATTR_RES_USECNT] || ++ (!nla_line[RDMA_NLDEV_ATTR_RES_PID] && ++ !nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME])) { ++ return MNL_CB_ERROR; ++ } ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]) ++ local_dma_lkey = mnl_attr_get_u32( ++ nla_line[RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]); ++ ++ users = mnl_attr_get_u64(nla_line[RDMA_NLDEV_ATTR_RES_USECNT]); ++ if (rd_check_is_filtered(rd, "users", users)) ++ continue; ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]) ++ unsafe_global_rkey = mnl_attr_get_u32( ++ nla_line[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]); ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_PID]) { ++ pid = mnl_attr_get_u32( ++ nla_line[RDMA_NLDEV_ATTR_RES_PID]); ++ comm = get_task_name(pid); ++ } ++ ++ if (rd_check_is_filtered(rd, "pid", pid)) ++ continue; ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME]) ++ /* discard const from mnl_attr_get_str */ ++ comm = (char *)mnl_attr_get_str( ++ nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME]); ++ ++ if (rd->json_output) ++ jsonw_start_array(rd->jw); ++ ++ print_dev(rd, idx, name); ++ if (nla_line[RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]) ++ print_key(rd, "local_dma_lkey", local_dma_lkey); ++ print_users(rd, users); ++ if (nla_line[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]) ++ print_key(rd, "unsafe_global_rkey", unsafe_global_rkey); ++ print_pid(rd, pid); ++ print_comm(rd, comm, nla_line); ++ ++ if (nla_line[RDMA_NLDEV_ATTR_RES_PID]) ++ free(comm); ++ ++ print_driver_table(rd, nla_line[RDMA_NLDEV_ATTR_DRIVER]); ++ newline(rd); ++ } ++ return MNL_CB_OK; ++} ++ ++RES_FUNC(res_no_args, RDMA_NLDEV_CMD_RES_GET, NULL, true); ++ ++static const struct ++filters qp_valid_filters[MAX_NUMBER_OF_FILTERS] = {{ .name = "link", ++ .is_number = false }, ++ { .name = "lqpn", ++ .is_number = true }, ++ { .name = "rqpn", ++ .is_number = true }, ++ { .name = "pid", ++ .is_number = true }, ++ { .name = "sq-psn", ++ .is_number = true }, ++ { .name = "rq-psn", ++ .is_number = true }, ++ { .name = "type", ++ .is_number = false }, ++ { .name = "path-mig-state", ++ .is_number = false }, ++ { .name = "state", ++ .is_number = false } }; ++ ++RES_FUNC(res_qp, RDMA_NLDEV_CMD_RES_QP_GET, qp_valid_filters, false); ++ ++static const ++struct filters cm_id_valid_filters[MAX_NUMBER_OF_FILTERS] = { ++ { .name = "link", .is_number = false }, ++ { .name = "lqpn", .is_number = true }, ++ { .name = "qp-type", .is_number = false }, ++ { .name = "state", .is_number = false }, ++ { .name = "ps", .is_number = false }, ++ { .name = "dev-type", .is_number = false }, ++ { .name = "transport-type", .is_number = false }, ++ { .name = "pid", .is_number = true }, ++ { .name = "src-addr", .is_number = false }, ++ { .name = "src-port", .is_number = true }, ++ { .name = "dst-addr", .is_number = false }, ++ { .name = "dst-port", .is_number = true } ++}; ++ ++RES_FUNC(res_cm_id, RDMA_NLDEV_CMD_RES_CM_ID_GET, cm_id_valid_filters, false); ++ ++static const ++struct filters cq_valid_filters[MAX_NUMBER_OF_FILTERS] = { ++ { .name = "dev", .is_number = false }, ++ { .name = "users", .is_number = true }, ++ { .name = "poll-ctx", .is_number = false }, ++ { .name = "pid", .is_number = true } ++}; ++ ++RES_FUNC(res_cq, RDMA_NLDEV_CMD_RES_CQ_GET, cq_valid_filters, true); ++ ++static const ++struct filters mr_valid_filters[MAX_NUMBER_OF_FILTERS] = { ++ { .name = "dev", .is_number = false }, ++ { .name = "rkey", .is_number = true }, ++ { .name = "lkey", .is_number = true }, ++ { .name = "mrlen", .is_number = true }, ++ { .name = "pid", .is_number = true } ++}; ++ ++RES_FUNC(res_mr, RDMA_NLDEV_CMD_RES_MR_GET, mr_valid_filters, true); ++ ++static const ++struct filters pd_valid_filters[MAX_NUMBER_OF_FILTERS] = { ++ { .name = "dev", .is_number = false }, ++ { .name = "users", .is_number = true }, ++ { .name = "pid", .is_number = true } ++}; ++ ++RES_FUNC(res_pd, RDMA_NLDEV_CMD_RES_PD_GET, pd_valid_filters, true); ++ ++static int res_show(struct rd *rd) ++{ ++ const struct rd_cmd cmds[] = { ++ { NULL, res_no_args }, ++ { "qp", res_qp }, ++ { "cm_id", res_cm_id }, ++ { "cq", res_cq }, ++ { "mr", res_mr }, ++ { "pd", res_pd }, ++ { 0 } ++ }; ++ ++ /* ++ * Special case to support "rdma res show DEV_NAME" ++ */ ++ if (rd_argc(rd) == 1 && dev_map_lookup(rd, false)) ++ return rd_exec_dev(rd, _res_no_args); ++ ++ return rd_exec_cmd(rd, cmds, "parameter"); ++} ++ ++int cmd_res(struct rd *rd) ++{ ++ const struct rd_cmd cmds[] = { ++ { NULL, res_show }, ++ { "show", res_show }, ++ { "list", res_show }, ++ { "help", res_help }, ++ { 0 } ++ }; ++ ++ return rd_exec_cmd(rd, cmds, "resource command"); ++} +diff --git a/rdma/utils.c b/rdma/utils.c +new file mode 100644 +index 0000000000000..069d44fece101 +--- /dev/null ++++ b/rdma/utils.c +@@ -0,0 +1,868 @@ ++/* ++ * utils.c RDMA tool ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * Authors: Leon Romanovsky ++ */ ++ ++#include "rdma.h" ++#include ++#include ++ ++int rd_argc(struct rd *rd) ++{ ++ return rd->argc; ++} ++ ++char *rd_argv(struct rd *rd) ++{ ++ if (!rd_argc(rd)) ++ return NULL; ++ return *rd->argv; ++} ++ ++int strcmpx(const char *str1, const char *str2) ++{ ++ if (strlen(str1) > strlen(str2)) ++ return -1; ++ return strncmp(str1, str2, strlen(str1)); ++} ++ ++static bool rd_argv_match(struct rd *rd, const char *pattern) ++{ ++ if (!rd_argc(rd)) ++ return false; ++ return strcmpx(rd_argv(rd), pattern) == 0; ++} ++ ++void rd_arg_inc(struct rd *rd) ++{ ++ if (!rd_argc(rd)) ++ return; ++ rd->argc--; ++ rd->argv++; ++} ++ ++bool rd_no_arg(struct rd *rd) ++{ ++ return rd_argc(rd) == 0; ++} ++ ++/* ++ * Possible input:output ++ * dev/port | first port | is_dump_all ++ * mlx5_1 | 0 | true ++ * mlx5_1/ | 0 | true ++ * mlx5_1/0 | 0 | false ++ * mlx5_1/1 | 1 | false ++ * mlx5_1/- | 0 | false ++ * ++ * In strict mode, /- will return error. ++ */ ++static int get_port_from_argv(struct rd *rd, uint32_t *port, ++ bool *is_dump_all, bool strict_port) ++{ ++ char *slash; ++ ++ *port = 0; ++ *is_dump_all = true; ++ ++ slash = strchr(rd_argv(rd), '/'); ++ /* if no port found, return 0 */ ++ if (slash++) { ++ if (*slash == '-') { ++ if (strict_port) ++ return -EINVAL; ++ *is_dump_all = false; ++ return 0; ++ } ++ ++ if (isdigit(*slash)) { ++ *is_dump_all = false; ++ *port = atoi(slash); ++ } ++ if (!*port && strlen(slash)) ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++static struct dev_map *dev_map_alloc(const char *dev_name) ++{ ++ struct dev_map *dev_map; ++ ++ dev_map = calloc(1, sizeof(*dev_map)); ++ if (!dev_map) ++ return NULL; ++ dev_map->dev_name = strdup(dev_name); ++ if (!dev_map->dev_name) { ++ free(dev_map); ++ return NULL; ++ } ++ ++ return dev_map; ++} ++ ++static void dev_map_cleanup(struct rd *rd) ++{ ++ struct dev_map *dev_map, *tmp; ++ ++ list_for_each_entry_safe(dev_map, tmp, ++ &rd->dev_map_list, list) { ++ list_del(&dev_map->list); ++ free(dev_map->dev_name); ++ free(dev_map); ++ } ++} ++ ++static int add_filter(struct rd *rd, char *key, char *value, ++ const struct filters valid_filters[]) ++{ ++ char cset[] = "1234567890,-"; ++ struct filter_entry *fe; ++ bool key_found = false; ++ int idx = 0; ++ int ret; ++ ++ fe = calloc(1, sizeof(*fe)); ++ if (!fe) ++ return -ENOMEM; ++ ++ while (idx < MAX_NUMBER_OF_FILTERS && valid_filters[idx].name) { ++ if (!strcmpx(key, valid_filters[idx].name)) { ++ key_found = true; ++ break; ++ } ++ idx++; ++ } ++ if (!key_found) { ++ pr_err("Unsupported filter option: %s\n", key); ++ ret = -EINVAL; ++ goto err; ++ } ++ ++ /* ++ * Check the filter validity, not optimal, but works ++ * ++ * Actually, there are three types of filters ++ * numeric - for example PID or QPN ++ * string - for example states ++ * link - user requested to filter on specific link ++ * e.g. mlx5_1/1, mlx5_1/-, mlx5_1 ... ++ */ ++ if (valid_filters[idx].is_number && ++ strspn(value, cset) != strlen(value)) { ++ pr_err("%s filter accepts \"%s\" characters only\n", key, cset); ++ ret = -EINVAL; ++ goto err; ++ } ++ ++ fe->key = strdup(key); ++ fe->value = strdup(value); ++ if (!fe->key || !fe->value) { ++ ret = -ENOMEM; ++ goto err_alloc; ++ } ++ ++ for (idx = 0; idx < strlen(fe->value); idx++) ++ fe->value[idx] = tolower(fe->value[idx]); ++ ++ list_add_tail(&fe->list, &rd->filter_list); ++ return 0; ++ ++err_alloc: ++ free(fe->value); ++ free(fe->key); ++err: ++ free(fe); ++ return ret; ++} ++ ++int rd_build_filter(struct rd *rd, const struct filters valid_filters[]) ++{ ++ int ret = 0; ++ int idx = 0; ++ ++ if (!valid_filters || !rd_argc(rd)) ++ goto out; ++ ++ if (rd_argc(rd) == 1) { ++ pr_err("No filter data was supplied to filter option %s\n", rd_argv(rd)); ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (rd_argc(rd) % 2) { ++ pr_err("There is filter option without data\n"); ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ while (idx != rd_argc(rd)) { ++ /* ++ * We can do micro-optimization and skip "dev" ++ * and "link" filters, but it is not worth of it. ++ */ ++ ret = add_filter(rd, *(rd->argv + idx), ++ *(rd->argv + idx + 1), valid_filters); ++ if (ret) ++ goto out; ++ idx += 2; ++ } ++ ++out: ++ return ret; ++} ++ ++bool rd_check_is_key_exist(struct rd *rd, const char *key) ++{ ++ struct filter_entry *fe; ++ ++ list_for_each_entry(fe, &rd->filter_list, list) { ++ if (!strcmpx(fe->key, key)) ++ return true; ++ } ++ ++ return false; ++} ++ ++/* ++ * Check if string entry is filtered: ++ * * key doesn't exist -> user didn't request -> not filtered ++ */ ++bool rd_check_is_string_filtered(struct rd *rd, ++ const char *key, const char *val) ++{ ++ bool key_is_filtered = false; ++ struct filter_entry *fe; ++ char *p = NULL; ++ char *str; ++ ++ list_for_each_entry(fe, &rd->filter_list, list) { ++ if (!strcmpx(fe->key, key)) { ++ /* We found the key */ ++ p = strdup(fe->value); ++ key_is_filtered = true; ++ if (!p) { ++ /* ++ * Something extremely wrong if we fail ++ * to allocate small amount of bytes. ++ */ ++ pr_err("Found key, but failed to allocate memory to store value\n"); ++ return key_is_filtered; ++ } ++ ++ /* ++ * Need to check if value in range ++ * It can come in the following formats ++ * and their permutations: ++ * str ++ * str1,str2 ++ */ ++ str = strtok(p, ","); ++ while (str) { ++ if (strlen(str) == strlen(val) && ++ !strcasecmp(str, val)) { ++ key_is_filtered = false; ++ goto out; ++ } ++ str = strtok(NULL, ","); ++ } ++ goto out; ++ } ++ } ++ ++out: ++ free(p); ++ return key_is_filtered; ++} ++ ++/* ++ * Check if key is filtered: ++ * key doesn't exist -> user didn't request -> not filtered ++ */ ++bool rd_check_is_filtered(struct rd *rd, const char *key, uint32_t val) ++{ ++ bool key_is_filtered = false; ++ struct filter_entry *fe; ++ ++ list_for_each_entry(fe, &rd->filter_list, list) { ++ uint32_t left_val = 0, fe_value = 0; ++ bool range_check = false; ++ char *p = fe->value; ++ ++ if (!strcmpx(fe->key, key)) { ++ /* We found the key */ ++ key_is_filtered = true; ++ /* ++ * Need to check if value in range ++ * It can come in the following formats ++ * (and their permutations): ++ * numb ++ * numb1,numb2 ++ * ,numb1,numb2 ++ * numb1-numb2 ++ * numb1,numb2-numb3,numb4-numb5 ++ */ ++ while (*p) { ++ if (isdigit(*p)) { ++ fe_value = strtol(p, &p, 10); ++ if (fe_value == val || ++ (range_check && left_val < val && ++ val < fe_value)) { ++ key_is_filtered = false; ++ goto out; ++ } ++ range_check = false; ++ } else { ++ if (*p == '-') { ++ left_val = fe_value; ++ range_check = true; ++ } ++ p++; ++ } ++ } ++ goto out; ++ } ++ } ++ ++out: ++ return key_is_filtered; ++} ++ ++static void filters_cleanup(struct rd *rd) ++{ ++ struct filter_entry *fe, *tmp; ++ ++ list_for_each_entry_safe(fe, tmp, ++ &rd->filter_list, list) { ++ list_del(&fe->list); ++ free(fe->key); ++ free(fe->value); ++ free(fe); ++ } ++} ++ ++static const enum mnl_attr_data_type nldev_policy[RDMA_NLDEV_ATTR_MAX] = { ++ [RDMA_NLDEV_ATTR_DEV_INDEX] = MNL_TYPE_U32, ++ [RDMA_NLDEV_ATTR_DEV_NAME] = MNL_TYPE_NUL_STRING, ++ [RDMA_NLDEV_ATTR_PORT_INDEX] = MNL_TYPE_U32, ++ [RDMA_NLDEV_ATTR_CAP_FLAGS] = MNL_TYPE_U64, ++ [RDMA_NLDEV_ATTR_FW_VERSION] = MNL_TYPE_NUL_STRING, ++ [RDMA_NLDEV_ATTR_NODE_GUID] = MNL_TYPE_U64, ++ [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = MNL_TYPE_U64, ++ [RDMA_NLDEV_ATTR_LID] = MNL_TYPE_U32, ++ [RDMA_NLDEV_ATTR_SM_LID] = MNL_TYPE_U32, ++ [RDMA_NLDEV_ATTR_LMC] = MNL_TYPE_U8, ++ [RDMA_NLDEV_ATTR_PORT_STATE] = MNL_TYPE_U8, ++ [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = MNL_TYPE_U8, ++ [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = MNL_TYPE_U8, ++ [RDMA_NLDEV_ATTR_RES_SUMMARY] = MNL_TYPE_NESTED, ++ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = MNL_TYPE_NESTED, ++ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = MNL_TYPE_NUL_STRING, ++ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = MNL_TYPE_U64, ++ [RDMA_NLDEV_ATTR_RES_QP] = MNL_TYPE_NESTED, ++ [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = MNL_TYPE_NESTED, ++ [RDMA_NLDEV_ATTR_RES_LQPN] = MNL_TYPE_U32, ++ [RDMA_NLDEV_ATTR_RES_RQPN] = MNL_TYPE_U32, ++ [RDMA_NLDEV_ATTR_RES_RQ_PSN] = MNL_TYPE_U32, ++ [RDMA_NLDEV_ATTR_RES_SQ_PSN] = MNL_TYPE_U32, ++ [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = MNL_TYPE_U8, ++ [RDMA_NLDEV_ATTR_RES_TYPE] = MNL_TYPE_U8, ++ [RDMA_NLDEV_ATTR_RES_STATE] = MNL_TYPE_U8, ++ [RDMA_NLDEV_ATTR_RES_PID] = MNL_TYPE_U32, ++ [RDMA_NLDEV_ATTR_RES_KERN_NAME] = MNL_TYPE_NUL_STRING, ++ [RDMA_NLDEV_ATTR_RES_CM_ID] = MNL_TYPE_NESTED, ++ [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = MNL_TYPE_NESTED, ++ [RDMA_NLDEV_ATTR_RES_PS] = MNL_TYPE_U32, ++ [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = MNL_TYPE_UNSPEC, ++ [RDMA_NLDEV_ATTR_RES_DST_ADDR] = MNL_TYPE_UNSPEC, ++ [RDMA_NLDEV_ATTR_RES_CQ] = MNL_TYPE_NESTED, ++ [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = MNL_TYPE_NESTED, ++ [RDMA_NLDEV_ATTR_RES_CQE] = MNL_TYPE_U32, ++ [RDMA_NLDEV_ATTR_RES_USECNT] = MNL_TYPE_U64, ++ [RDMA_NLDEV_ATTR_RES_POLL_CTX] = MNL_TYPE_U8, ++ [RDMA_NLDEV_ATTR_RES_MR] = MNL_TYPE_NESTED, ++ [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = MNL_TYPE_NESTED, ++ [RDMA_NLDEV_ATTR_RES_RKEY] = MNL_TYPE_U32, ++ [RDMA_NLDEV_ATTR_RES_LKEY] = MNL_TYPE_U32, ++ [RDMA_NLDEV_ATTR_RES_IOVA] = MNL_TYPE_U64, ++ [RDMA_NLDEV_ATTR_RES_MRLEN] = MNL_TYPE_U64, ++ [RDMA_NLDEV_ATTR_NDEV_INDEX] = MNL_TYPE_U32, ++ [RDMA_NLDEV_ATTR_NDEV_NAME] = MNL_TYPE_NUL_STRING, ++ [RDMA_NLDEV_ATTR_DRIVER] = MNL_TYPE_NESTED, ++ [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = MNL_TYPE_NESTED, ++ [RDMA_NLDEV_ATTR_DRIVER_STRING] = MNL_TYPE_NUL_STRING, ++ [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = MNL_TYPE_U8, ++ [RDMA_NLDEV_ATTR_DRIVER_S32] = MNL_TYPE_U32, ++ [RDMA_NLDEV_ATTR_DRIVER_U32] = MNL_TYPE_U32, ++ [RDMA_NLDEV_ATTR_DRIVER_S64] = MNL_TYPE_U64, ++ [RDMA_NLDEV_ATTR_DRIVER_U64] = MNL_TYPE_U64, ++}; ++ ++int rd_attr_check(const struct nlattr *attr, int *typep) ++{ ++ int type; ++ ++ if (mnl_attr_type_valid(attr, RDMA_NLDEV_ATTR_MAX) < 0) ++ return MNL_CB_ERROR; ++ ++ type = mnl_attr_get_type(attr); ++ ++ if (mnl_attr_validate(attr, nldev_policy[type]) < 0) ++ return MNL_CB_ERROR; ++ ++ *typep = nldev_policy[type]; ++ return MNL_CB_OK; ++} ++ ++int rd_attr_cb(const struct nlattr *attr, void *data) ++{ ++ const struct nlattr **tb = data; ++ int type; ++ ++ if (mnl_attr_type_valid(attr, RDMA_NLDEV_ATTR_MAX - 1) < 0) ++ /* We received unknown attribute */ ++ return MNL_CB_OK; ++ ++ type = mnl_attr_get_type(attr); ++ ++ if (mnl_attr_validate(attr, nldev_policy[type]) < 0) ++ return MNL_CB_ERROR; ++ ++ tb[type] = attr; ++ return MNL_CB_OK; ++} ++ ++int rd_dev_init_cb(const struct nlmsghdr *nlh, void *data) ++{ ++ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {}; ++ struct dev_map *dev_map; ++ struct rd *rd = data; ++ const char *dev_name; ++ ++ mnl_attr_parse(nlh, 0, rd_attr_cb, tb); ++ if (!tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) ++ return MNL_CB_ERROR; ++ if (!tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { ++ pr_err("This tool doesn't support switches yet\n"); ++ return MNL_CB_ERROR; ++ } ++ ++ dev_name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_DEV_NAME]); ++ ++ dev_map = dev_map_alloc(dev_name); ++ if (!dev_map) ++ /* The main function will cleanup the allocations */ ++ return MNL_CB_ERROR; ++ list_add_tail(&dev_map->list, &rd->dev_map_list); ++ ++ dev_map->num_ports = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); ++ dev_map->idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); ++ return MNL_CB_OK; ++} ++ ++void rd_free(struct rd *rd) ++{ ++ if (!rd) ++ return; ++ free(rd->buff); ++ dev_map_cleanup(rd); ++ filters_cleanup(rd); ++} ++ ++int rd_set_arg_to_devname(struct rd *rd) ++{ ++ int ret = 0; ++ ++ while (!rd_no_arg(rd)) { ++ if (rd_argv_match(rd, "dev") || rd_argv_match(rd, "link")) { ++ rd_arg_inc(rd); ++ if (rd_no_arg(rd)) { ++ pr_err("No device name was supplied\n"); ++ ret = -EINVAL; ++ } ++ goto out; ++ } ++ rd_arg_inc(rd); ++ } ++out: ++ return ret; ++} ++ ++int rd_exec_link(struct rd *rd, int (*cb)(struct rd *rd), bool strict_port) ++{ ++ struct dev_map *dev_map; ++ uint32_t port; ++ int ret = 0; ++ ++ if (rd->json_output) ++ jsonw_start_array(rd->jw); ++ if (rd_no_arg(rd)) { ++ list_for_each_entry(dev_map, &rd->dev_map_list, list) { ++ rd->dev_idx = dev_map->idx; ++ port = (strict_port) ? 1 : 0; ++ for (; port < dev_map->num_ports + 1; port++) { ++ rd->port_idx = port; ++ ret = cb(rd); ++ if (ret) ++ goto out; ++ } ++ } ++ ++ } else { ++ bool is_dump_all; ++ ++ dev_map = dev_map_lookup(rd, true); ++ ret = get_port_from_argv(rd, &port, &is_dump_all, strict_port); ++ if (!dev_map || port > dev_map->num_ports || (!port && ret)) { ++ pr_err("Wrong device name\n"); ++ ret = -ENOENT; ++ goto out; ++ } ++ rd_arg_inc(rd); ++ rd->dev_idx = dev_map->idx; ++ rd->port_idx = port; ++ for (; rd->port_idx < dev_map->num_ports + 1; rd->port_idx++) { ++ ret = cb(rd); ++ if (ret) ++ goto out; ++ if (!is_dump_all) ++ /* ++ * We got request to show link for devname ++ * with port index. ++ */ ++ break; ++ } ++ } ++ ++out: ++ if (rd->json_output) ++ jsonw_end_array(rd->jw); ++ return ret; ++} ++ ++int rd_exec_dev(struct rd *rd, int (*cb)(struct rd *rd)) ++{ ++ struct dev_map *dev_map; ++ int ret = 0; ++ ++ if (rd->json_output) ++ jsonw_start_array(rd->jw); ++ if (rd_no_arg(rd)) { ++ list_for_each_entry(dev_map, &rd->dev_map_list, list) { ++ rd->dev_idx = dev_map->idx; ++ ret = cb(rd); ++ if (ret) ++ goto out; ++ } ++ } else { ++ dev_map = dev_map_lookup(rd, false); ++ if (!dev_map) { ++ pr_err("Wrong device name - %s\n", rd_argv(rd)); ++ ret = -ENOENT; ++ goto out; ++ } ++ rd_arg_inc(rd); ++ rd->dev_idx = dev_map->idx; ++ ret = cb(rd); ++ } ++out: ++ if (rd->json_output) ++ jsonw_end_array(rd->jw); ++ return ret; ++} ++ ++int rd_exec_require_dev(struct rd *rd, int (*cb)(struct rd *rd)) ++{ ++ if (rd_no_arg(rd)) { ++ pr_err("Please provide device name.\n"); ++ return -EINVAL; ++ } ++ ++ return rd_exec_dev(rd, cb); ++} ++ ++int rd_exec_cmd(struct rd *rd, const struct rd_cmd *cmds, const char *str) ++{ ++ const struct rd_cmd *c; ++ ++ /* First argument in objs table is default variant */ ++ if (rd_no_arg(rd)) ++ return cmds->func(rd); ++ ++ for (c = cmds + 1; c->cmd; ++c) { ++ if (rd_argv_match(rd, c->cmd)) { ++ /* Move to next argument */ ++ rd_arg_inc(rd); ++ return c->func(rd); ++ } ++ } ++ ++ pr_err("Unknown %s '%s'.\n", str, rd_argv(rd)); ++ return 0; ++} ++ ++void rd_prepare_msg(struct rd *rd, uint32_t cmd, uint32_t *seq, uint16_t flags) ++{ ++ *seq = time(NULL); ++ ++ rd->nlh = mnl_nlmsg_put_header(rd->buff); ++ rd->nlh->nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, cmd); ++ rd->nlh->nlmsg_seq = *seq; ++ rd->nlh->nlmsg_flags = flags; ++} ++ ++int rd_send_msg(struct rd *rd) ++{ ++ int ret; ++ ++ rd->nl = mnl_socket_open(NETLINK_RDMA); ++ if (!rd->nl) { ++ pr_err("Failed to open NETLINK_RDMA socket\n"); ++ return -ENODEV; ++ } ++ ++ ret = mnl_socket_bind(rd->nl, 0, MNL_SOCKET_AUTOPID); ++ if (ret < 0) { ++ pr_err("Failed to bind socket with err %d\n", ret); ++ goto err; ++ } ++ ++ ret = mnl_socket_sendto(rd->nl, rd->nlh, rd->nlh->nlmsg_len); ++ if (ret < 0) { ++ pr_err("Failed to send to socket with err %d\n", ret); ++ goto err; ++ } ++ return 0; ++ ++err: ++ mnl_socket_close(rd->nl); ++ return ret; ++} ++ ++int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, unsigned int seq) ++{ ++ int ret; ++ unsigned int portid; ++ char buf[MNL_SOCKET_BUFFER_SIZE]; ++ ++ portid = mnl_socket_get_portid(rd->nl); ++ do { ++ ret = mnl_socket_recvfrom(rd->nl, buf, sizeof(buf)); ++ if (ret <= 0) ++ break; ++ ++ ret = mnl_cb_run(buf, ret, seq, portid, callback, data); ++ } while (ret > 0); ++ ++ mnl_socket_close(rd->nl); ++ return ret; ++} ++ ++static struct dev_map *_dev_map_lookup(struct rd *rd, const char *dev_name) ++{ ++ struct dev_map *dev_map; ++ ++ list_for_each_entry(dev_map, &rd->dev_map_list, list) ++ if (strcmp(dev_name, dev_map->dev_name) == 0) ++ return dev_map; ++ ++ return NULL; ++} ++ ++struct dev_map *dev_map_lookup(struct rd *rd, bool allow_port_index) ++{ ++ struct dev_map *dev_map; ++ char *dev_name; ++ char *slash; ++ ++ if (rd_no_arg(rd)) ++ return NULL; ++ ++ dev_name = strdup(rd_argv(rd)); ++ if (allow_port_index) { ++ slash = strrchr(dev_name, '/'); ++ if (slash) ++ *slash = '\0'; ++ } ++ ++ dev_map = _dev_map_lookup(rd, dev_name); ++ free(dev_name); ++ return dev_map; ++} ++ ++#define nla_type(attr) ((attr)->nla_type & NLA_TYPE_MASK) ++ ++void newline(struct rd *rd) ++{ ++ if (rd->json_output) ++ jsonw_end_array(rd->jw); ++ else ++ pr_out("\n"); ++} ++ ++void newline_indent(struct rd *rd) ++{ ++ newline(rd); ++ if (!rd->json_output) ++ pr_out(" "); ++} ++ ++static int print_driver_string(struct rd *rd, const char *key_str, ++ const char *val_str) ++{ ++ if (rd->json_output) { ++ jsonw_string_field(rd->jw, key_str, val_str); ++ return 0; ++ } else { ++ return pr_out("%s %s ", key_str, val_str); ++ } ++} ++ ++static int print_driver_s32(struct rd *rd, const char *key_str, int32_t val, ++ enum rdma_nldev_print_type print_type) ++{ ++ if (rd->json_output) { ++ jsonw_int_field(rd->jw, key_str, val); ++ return 0; ++ } ++ switch (print_type) { ++ case RDMA_NLDEV_PRINT_TYPE_UNSPEC: ++ return pr_out("%s %d ", key_str, val); ++ case RDMA_NLDEV_PRINT_TYPE_HEX: ++ return pr_out("%s 0x%x ", key_str, val); ++ default: ++ return -EINVAL; ++ } ++} ++ ++static int print_driver_u32(struct rd *rd, const char *key_str, uint32_t val, ++ enum rdma_nldev_print_type print_type) ++{ ++ if (rd->json_output) { ++ jsonw_int_field(rd->jw, key_str, val); ++ return 0; ++ } ++ switch (print_type) { ++ case RDMA_NLDEV_PRINT_TYPE_UNSPEC: ++ return pr_out("%s %u ", key_str, val); ++ case RDMA_NLDEV_PRINT_TYPE_HEX: ++ return pr_out("%s 0x%x ", key_str, val); ++ default: ++ return -EINVAL; ++ } ++} ++ ++static int print_driver_s64(struct rd *rd, const char *key_str, int64_t val, ++ enum rdma_nldev_print_type print_type) ++{ ++ if (rd->json_output) { ++ jsonw_int_field(rd->jw, key_str, val); ++ return 0; ++ } ++ switch (print_type) { ++ case RDMA_NLDEV_PRINT_TYPE_UNSPEC: ++ return pr_out("%s %" PRId64 " ", key_str, val); ++ case RDMA_NLDEV_PRINT_TYPE_HEX: ++ return pr_out("%s 0x%" PRIx64 " ", key_str, val); ++ default: ++ return -EINVAL; ++ } ++} ++ ++static int print_driver_u64(struct rd *rd, const char *key_str, uint64_t val, ++ enum rdma_nldev_print_type print_type) ++{ ++ if (rd->json_output) { ++ jsonw_int_field(rd->jw, key_str, val); ++ return 0; ++ } ++ switch (print_type) { ++ case RDMA_NLDEV_PRINT_TYPE_UNSPEC: ++ return pr_out("%s %" PRIu64 " ", key_str, val); ++ case RDMA_NLDEV_PRINT_TYPE_HEX: ++ return pr_out("%s 0x%" PRIx64 " ", key_str, val); ++ default: ++ return -EINVAL; ++ } ++} ++ ++static int print_driver_entry(struct rd *rd, struct nlattr *key_attr, ++ struct nlattr *val_attr, ++ enum rdma_nldev_print_type print_type) ++{ ++ const char *key_str = mnl_attr_get_str(key_attr); ++ int attr_type = nla_type(val_attr); ++ ++ switch (attr_type) { ++ case RDMA_NLDEV_ATTR_DRIVER_STRING: ++ return print_driver_string(rd, key_str, ++ mnl_attr_get_str(val_attr)); ++ case RDMA_NLDEV_ATTR_DRIVER_S32: ++ return print_driver_s32(rd, key_str, ++ mnl_attr_get_u32(val_attr), print_type); ++ case RDMA_NLDEV_ATTR_DRIVER_U32: ++ return print_driver_u32(rd, key_str, ++ mnl_attr_get_u32(val_attr), print_type); ++ case RDMA_NLDEV_ATTR_DRIVER_S64: ++ return print_driver_s64(rd, key_str, ++ mnl_attr_get_u64(val_attr), print_type); ++ case RDMA_NLDEV_ATTR_DRIVER_U64: ++ return print_driver_u64(rd, key_str, ++ mnl_attr_get_u64(val_attr), print_type); ++ } ++ return -EINVAL; ++} ++ ++void print_driver_table(struct rd *rd, struct nlattr *tb) ++{ ++ int print_type = RDMA_NLDEV_PRINT_TYPE_UNSPEC; ++ struct nlattr *tb_entry, *key = NULL, *val; ++ int type, cc = 0; ++ int ret; ++ ++ if (!rd->show_driver_details || !tb) ++ return; ++ ++ if (rd->pretty_output) ++ newline_indent(rd); ++ ++ /* ++ * Driver attrs are tuples of {key, [print-type], value}. ++ * The key must be a string. If print-type is present, it ++ * defines an alternate printf format type vs the native format ++ * for the attribute. And the value can be any available ++ * driver type. ++ */ ++ mnl_attr_for_each_nested(tb_entry, tb) { ++ ++ if (cc > MAX_LINE_LENGTH) { ++ if (rd->pretty_output) ++ newline_indent(rd); ++ cc = 0; ++ } ++ if (rd_attr_check(tb_entry, &type) != MNL_CB_OK) ++ return; ++ if (!key) { ++ if (type != MNL_TYPE_NUL_STRING) ++ return; ++ key = tb_entry; ++ } else if (type == MNL_TYPE_U8) { ++ print_type = mnl_attr_get_u8(tb_entry); ++ } else { ++ val = tb_entry; ++ ret = print_driver_entry(rd, key, val, print_type); ++ if (ret < 0) ++ return; ++ cc += ret; ++ print_type = RDMA_NLDEV_PRINT_TYPE_UNSPEC; ++ key = NULL; ++ } ++ } ++ return; ++} +-- +2.20.1 + diff --git a/SOURCES/0068-rdma-add-man-pages-for-RDMA-tool.patch b/SOURCES/0068-rdma-add-man-pages-for-RDMA-tool.patch new file mode 100644 index 0000000..ec21dae --- /dev/null +++ b/SOURCES/0068-rdma-add-man-pages-for-RDMA-tool.patch @@ -0,0 +1,422 @@ +From 4bee4617fa17405a52e11ed47e21feb20a277cc2 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Thu, 28 Mar 2019 15:00:33 +0100 +Subject: [PATCH] rdma: add man pages for RDMA tool + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1642479 +Upstream Status: RHEL-only + +commit 379afb6274462dee196d5909f6988b1ce5466c0b +Author: Andrea Claudi +Date: Thu Mar 28 13:02:20 2019 +0100 + + rdma: add man pages for RDMA tool + + Checkout to the v5.0.0 upstream tag and update man8 Makefile + + Signed-off-by: Andrea Claudi +--- + man/man8/Makefile | 2 +- + man/man8/rdma-dev.8 | 69 +++++++++++++++++++++++ + man/man8/rdma-link.8 | 56 ++++++++++++++++++ + man/man8/rdma-resource.8 | 109 +++++++++++++++++++++++++++++++++++ + man/man8/rdma.8 | 119 +++++++++++++++++++++++++++++++++++++++ + 5 files changed, 354 insertions(+), 1 deletion(-) + create mode 100644 man/man8/rdma-dev.8 + create mode 100644 man/man8/rdma-link.8 + create mode 100644 man/man8/rdma-resource.8 + create mode 100644 man/man8/rdma.8 + +diff --git a/man/man8/Makefile b/man/man8/Makefile +index f33186446819e..416443f3f5361 100644 +--- a/man/man8/Makefile ++++ b/man/man8/Makefile +@@ -19,7 +19,7 @@ MAN8PAGES = $(TARGETS) ip.8 arpd.8 lnstat.8 routel.8 rtacct.8 rtmon.8 rtpr.8 ss. + tc-simple.8 tc-skbedit.8 tc-vlan.8 tc-xt.8 tc-skbmod.8 tc-ife.8 \ + tc-tunnel_key.8 tc-sample.8 \ + devlink.8 devlink-dev.8 devlink-monitor.8 devlink-port.8 devlink-sb.8 \ +- ifstat.8 ++ ifstat.8 rdma.8 rdma-dev.8 rdma-link.8 rdma-resource.8 + + all: $(TARGETS) + +diff --git a/man/man8/rdma-dev.8 b/man/man8/rdma-dev.8 +new file mode 100644 +index 0000000000000..069f471791904 +--- /dev/null ++++ b/man/man8/rdma-dev.8 +@@ -0,0 +1,69 @@ ++.TH RDMA\-DEV 8 "06 Jul 2017" "iproute2" "Linux" ++.SH NAME ++rdma-dev \- RDMA device configuration ++.SH SYNOPSIS ++.sp ++.ad l ++.in +8 ++.ti -8 ++.B rdma ++.RI "[ " OPTIONS " ]" ++.B dev ++.RI " { " COMMAND " | " ++.BR help " }" ++.sp ++ ++.ti -8 ++.IR OPTIONS " := { " ++\fB\-V\fR[\fIersion\fR] | ++\fB\-d\fR[\fIetails\fR] } ++ ++.ti -8 ++.B rdma dev show ++.RI "[ " DEV " ]" ++ ++.ti -8 ++.B rdma dev set ++.RI "[ " DEV " ]" ++.BR name ++.BR NEWNAME ++ ++.ti -8 ++.B rdma dev help ++ ++.SH "DESCRIPTION" ++.SS rdma dev set - rename rdma device ++ ++.SS rdma dev show - display rdma device attributes ++ ++.PP ++.I "DEV" ++- specifies the RDMA device to show. ++If this argument is omitted all devices are listed. ++ ++.SH "EXAMPLES" ++.PP ++rdma dev ++.RS 4 ++Shows the state of all RDMA devices on the system. ++.RE ++.PP ++rdma dev show mlx5_3 ++.RS 4 ++Shows the state of specified RDMA device. ++.RE ++.PP ++rdma dev set mlx5_3 name rdma_0 ++.RS 4 ++Renames the mlx5_3 device to rdma_0. ++.RE ++.PP ++ ++.SH SEE ALSO ++.BR rdma (8), ++.BR rdma-link (8), ++.BR rdma-resource (8), ++.br ++ ++.SH AUTHOR ++Leon Romanovsky +diff --git a/man/man8/rdma-link.8 b/man/man8/rdma-link.8 +new file mode 100644 +index 0000000000000..bddf34746e8b2 +--- /dev/null ++++ b/man/man8/rdma-link.8 +@@ -0,0 +1,56 @@ ++.TH RDMA\-LINK 8 "06 Jul 2017" "iproute2" "Linux" ++.SH NAME ++rdma-link \- rdma link configuration ++.SH SYNOPSIS ++.sp ++.ad l ++.in +8 ++.ti -8 ++.B devlink ++.RI "[ " OPTIONS " ]" ++.B link ++.RI " { " COMMAND " | " ++.BR help " }" ++.sp ++ ++.ti -8 ++.IR OPTIONS " := { " ++\fB\-V\fR[\fIersion\fR] | ++\fB\-d\fR[\fIetails\fR] } ++ ++.ti -8 ++.B rdma link show ++.RI "[ " DEV/PORT_INDEX " ]" ++ ++.ti -8 ++.B rdma link help ++ ++.SH "DESCRIPTION" ++.SS rdma link show - display rdma link attributes ++ ++.PP ++.I "DEV/PORT_INDEX" ++- specifies the RDMA link to show. ++If this argument is omitted all links are listed. ++ ++.SH "EXAMPLES" ++.PP ++rdma link show ++.RS 4 ++Shows the state of all rdma links on the system. ++.RE ++.PP ++rdma link show mlx5_2/1 ++.RS 4 ++Shows the state of specified rdma link. ++.RE ++.PP ++ ++.SH SEE ALSO ++.BR rdma (8), ++.BR rdma-dev (8), ++.BR rdma-resource (8), ++.br ++ ++.SH AUTHOR ++Leon Romanovsky +diff --git a/man/man8/rdma-resource.8 b/man/man8/rdma-resource.8 +new file mode 100644 +index 0000000000000..40b073dbfcf24 +--- /dev/null ++++ b/man/man8/rdma-resource.8 +@@ -0,0 +1,109 @@ ++.TH RDMA\-RESOURCE 8 "26 Dec 2017" "iproute2" "Linux" ++.SH NAME ++rdma-resource \- rdma resource configuration ++.SH SYNOPSIS ++.sp ++.ad l ++.in +8 ++.ti -8 ++.B rdma ++.RI "[ " OPTIONS " ] " RESOURCE " { " COMMAND " | " ++.BR help " }" ++.sp ++ ++.ti -8 ++.IR RESOURCE " := { " ++.BR cm_id " | " cq " | " mr " | " pd " | " qp " }" ++.sp ++ ++.ti -8 ++.IR OPTIONS " := { " ++\fB\-j\fR[\fIson\fR] | ++\fB\-d\fR[\fIetails\fR] } ++ ++.ti -8 ++.B rdma resource show ++.RI "[ " DEV/PORT_INDEX " ]" ++ ++.ti -8 ++.B rdma resource help ++ ++.SH "DESCRIPTION" ++.SS rdma resource show - display rdma resource tracking information ++ ++.PP ++.I "DEV/PORT_INDEX" ++- specifies the RDMA link to show. ++If this argument is omitted all links are listed. ++ ++.SH "EXAMPLES" ++.PP ++rdma resource show ++.RS 4 ++Shows summary for all devices on the system. ++.RE ++.PP ++rdma resource show mlx5_2 ++.RS 4 ++Shows the state of specified rdma device. ++.RE ++.PP ++rdma res show qp link mlx5_4 ++.RS 4 ++Get all QPs for the specific device. ++.RE ++.PP ++rdma res show qp link mlx5_4/1 ++.RS 4 ++Get QPs of specific port. ++.RE ++.PP ++rdma res show qp link mlx5_4/0 ++.RS 4 ++Provide illegal port number (0 is illegal). ++.RE ++.PP ++rdma res show qp link mlx5_4/- ++.RS 4 ++Get QPs which have not assigned port yet. ++.RE ++.PP ++rdma res show qp link mlx5_4/- -d ++.RS 4 ++Detailed view. ++.RE ++.PP ++rdma res show qp link mlx5_4/- -dd ++.RS 4 ++Detailed view including driver-specific details. ++.RE ++.PP ++rdma res show qp link mlx5_4/1 lqpn 0-6 ++.RS 4 ++Limit to specific Local QPNs. ++.RE ++.PP ++rdma resource show cm_id dst-port 7174 ++.RS 4 ++Show CM_IDs with destination ip port of 7174. ++.RE ++.PP ++rdma resource show cm_id src-addr 172.16.0.100 ++.RS 4 ++Show CM_IDs bound to local ip address 172.16.0.100 ++.RE ++.PP ++rdma resource show cq pid 30489 ++.RS 4 ++Show CQs belonging to pid 30489 ++.RE ++.PP ++ ++.SH SEE ALSO ++.BR rdma (8), ++.BR rdma-dev (8), ++.BR rdma-link (8), ++.br ++ ++.SH AUTHOR ++Leon Romanovsky +diff --git a/man/man8/rdma.8 b/man/man8/rdma.8 +new file mode 100644 +index 0000000000000..b2b5aef866ab0 +--- /dev/null ++++ b/man/man8/rdma.8 +@@ -0,0 +1,119 @@ ++.TH RDMA 8 "28 Mar 2017" "iproute2" "Linux" ++.SH NAME ++rdma \- RDMA tool ++.SH SYNOPSIS ++.sp ++.ad l ++.in +8 ++.ti -8 ++.B rdma ++.RI "[ " OPTIONS " ] " OBJECT " { " COMMAND " | " ++.BR help " }" ++.sp ++ ++.ti -8 ++.B rdma ++.RB "[ " -force " ] " ++.BI "-batch " filename ++.sp ++ ++.ti -8 ++.IR OBJECT " := { " ++.BR dev " | " link " }" ++.sp ++ ++.ti -8 ++.IR OPTIONS " := { " ++\fB\-V\fR[\fIersion\fR] | ++\fB\-d\fR[\fIetails\fR] } ++\fB\-j\fR[\fIson\fR] } ++\fB\-p\fR[\fIretty\fR] } ++ ++.SH OPTIONS ++ ++.TP ++.BR "\-V" , " -Version" ++Print the version of the ++.B rdma ++tool and exit. ++ ++.TP ++.BR "\-b", " \-batch " ++Read commands from provided file or standard input and invoke them. ++First failure will cause termination of rdma. ++ ++.TP ++.BR "\-force" ++Don't terminate rdma on errors in batch mode. ++If there were any errors during execution of the commands, the application return code will be non zero. ++ ++.TP ++.BR "\-d" , " --details" ++Output detailed information. Adding a second \-d includes driver-specific details. ++ ++.TP ++.BR "\-p" , " --pretty" ++When combined with -j generate a pretty JSON output. ++ ++.TP ++.BR "\-j" , " --json" ++Generate JSON output. ++ ++.SS ++.I OBJECT ++ ++.TP ++.B dev ++- RDMA device. ++ ++.TP ++.B link ++- RDMA port related. ++ ++.PP ++The names of all objects may be written in full or ++abbreviated form, for example ++.B stats ++can be abbreviated as ++.B stat ++or just ++.B s. ++ ++.SS ++.I COMMAND ++ ++Specifies the action to perform on the object. ++The set of possible actions depends on the object type. ++As a rule, it is possible to ++.B show ++(or ++.B list ++) objects, but some objects do not allow all of these operations ++or have some additional commands. The ++.B help ++command is available for all objects. It prints ++out a list of available commands and argument syntax conventions. ++.sp ++If no command is given, some default command is assumed. ++Usually it is ++.B list ++or, if the objects of this class cannot be listed, ++.BR "help" . ++ ++.SH EXIT STATUS ++Exit status is 0 if command was successful or a positive integer upon failure. ++ ++.SH SEE ALSO ++.BR rdma-dev (8), ++.BR rdma-link (8), ++.BR rdma-resource (8), ++.br ++ ++.SH REPORTING BUGS ++Report any bugs to the Linux RDMA mailing list ++.B ++where the development and maintenance is primarily done. ++You do not have to be subscribed to the list to send a message there. ++ ++.SH AUTHOR ++Leon Romanovsky +-- +2.20.1 + diff --git a/SOURCES/0069-tc-f_flower-Add-support-for-matching-first-frag-pack.patch b/SOURCES/0069-tc-f_flower-Add-support-for-matching-first-frag-pack.patch new file mode 100644 index 0000000..5c83a04 --- /dev/null +++ b/SOURCES/0069-tc-f_flower-Add-support-for-matching-first-frag-pack.patch @@ -0,0 +1,72 @@ +From 69685a7aa7fb408cce256e469430e10e99a43e2d Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 25 Mar 2019 16:54:31 +0100 +Subject: [PATCH] tc: f_flower: Add support for matching first frag packets + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1559814 +Upstream Status: iproute2.git commit fb4e6abfca2c4 + +commit fb4e6abfca2c48380210d48c1e7f3685f8bb58fd +Author: Pieter Jansen van Vuuren +Date: Fri Mar 9 11:07:22 2018 +0100 + + tc: f_flower: Add support for matching first frag packets + + Add matching support for distinguishing between first and later fragmented + packets. + + # tc filter add dev eth0 protocol ip parent ffff: \ + flower indev eth0 \ + ip_flags firstfrag \ + ip_proto udp \ + action mirred egress redirect dev eth1 + + # tc filter add dev eth0 protocol ip parent ffff: \ + flower indev eth0 \ + ip_flags nofirstfrag \ + ip_proto udp \ + action mirred egress redirect dev eth1 + + Signed-off-by: Pieter Jansen van Vuuren + Reviewed-by: Jakub Kicinski + Signed-off-by: Simon Horman + Signed-off-by: David Ahern +--- + man/man8/tc-flower.8 | 8 ++++++-- + tc/f_flower.c | 1 + + 2 files changed, 7 insertions(+), 2 deletions(-) + +diff --git a/man/man8/tc-flower.8 b/man/man8/tc-flower.8 +index 387f73f5cd2e9..661f42200bdfb 100644 +--- a/man/man8/tc-flower.8 ++++ b/man/man8/tc-flower.8 +@@ -255,8 +255,12 @@ is an 8 bit time-to-live value. + .BI ip_flags " IP_FLAGS" + .I IP_FLAGS + may be either +-.BR frag " or " nofrag +-to match on fragmented packets or not respectively. ++.BR frag ", " nofrag ", " firstfrag " or " nofirstfrag ++where frag and nofrag could be used to match on fragmented packets or not, ++respectively. firstfrag and nofirstfrag can be used to further distinguish ++fragmented packet. firstfrag can be used to indicate the first fragmented ++packet. nofirstfrag can be used to indicates subsequent fragmented packets ++or non-fragmented packets. + .SH NOTES + As stated above where applicable, matches of a certain layer implicitly depend + on the matches of the next lower layer. Precisely, layer one and two matches +diff --git a/tc/f_flower.c b/tc/f_flower.c +index 40dcfbd687a20..e2c7daa0b8e03 100644 +--- a/tc/f_flower.c ++++ b/tc/f_flower.c +@@ -162,6 +162,7 @@ struct flag_to_string { + + static struct flag_to_string flags_str[] = { + { TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOWER_IP_FLAGS, "frag" }, ++ { TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST, FLOWER_IP_FLAGS, "firstfrag" }, + }; + + static int flower_parse_matching_flags(char *str, +-- +2.20.1 + diff --git a/SOURCES/0070-ss-enclose-IPv6-address-in-brackets.patch b/SOURCES/0070-ss-enclose-IPv6-address-in-brackets.patch new file mode 100644 index 0000000..6c82bd1 --- /dev/null +++ b/SOURCES/0070-ss-enclose-IPv6-address-in-brackets.patch @@ -0,0 +1,62 @@ +From 765baea7751f7140571dfb0285b1fca974b3450b Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 18:03:01 +0200 +Subject: [PATCH] ss: enclose IPv6 address in brackets + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1588122 +Upstream Status: iproute2.git commit aba9c23a6e1cb + +commit aba9c23a6e1cb134840c998df14888dca469a485 +Author: Stephen Hemminger +Date: Fri Aug 4 12:02:41 2017 -0700 + + ss: enclose IPv6 address in brackets + + Based on patch by Lehner Florian + + Adds support for RFC2732 IPv6 address format with brackets. + + Signed-off-by: Stephen Hemminger +--- + misc/ss.c | 25 +++++++++++++++++++------ + 1 file changed, 19 insertions(+), 6 deletions(-) + +diff --git a/misc/ss.c b/misc/ss.c +index c0cb33e96d9ec..86defc71fabc4 100644 +--- a/misc/ss.c ++++ b/misc/ss.c +@@ -1093,12 +1093,25 @@ static void inet_addr_print(const inet_prefix *a, int port, unsigned int ifindex + ap = format_host(AF_INET, 4, a->data); + } + } else { +- ap = format_host(a->family, 16, a->data); +- est_len = strlen(ap); +- if (est_len <= addr_width) +- est_len = addr_width; +- else +- est_len = addr_width + ((est_len-addr_width+3)/4)*4; ++ if (!memcmp(a->data, &in6addr_any, sizeof(in6addr_any))) { ++ buf[0] = '*'; ++ buf[1] = 0; ++ } else { ++ ap = format_host(a->family, 16, a->data); ++ ++ /* Numeric IPv6 addresses should be bracketed */ ++ if (strchr(ap, ':')) { ++ snprintf(buf, sizeof(buf), ++ "[%s]", ap); ++ ap = buf; ++ } ++ ++ est_len = strlen(ap); ++ if (est_len <= addr_width) ++ est_len = addr_width; ++ else ++ est_len = addr_width + ((est_len-addr_width+3)/4)*4; ++ } + } + + if (ifindex) { +-- +2.20.1 + diff --git a/SOURCES/0071-ip-address-Use-correct-max-attribute-value-in-print_.patch b/SOURCES/0071-ip-address-Use-correct-max-attribute-value-in-print_.patch new file mode 100644 index 0000000..111178e --- /dev/null +++ b/SOURCES/0071-ip-address-Use-correct-max-attribute-value-in-print_.patch @@ -0,0 +1,40 @@ +From 861fe3293afa0907f9883df005e7a09a5f4b710b Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 18:14:04 +0200 +Subject: [PATCH] ip-address: Use correct max attribute value in + print_vf_stats64() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1679749 +Upstream Status: iproute2.git commit d7cf2416fc3a0 + +commit d7cf2416fc3a08b411beffb93a9e118f6593892d +Author: Phil Sutter +Date: Thu Feb 21 19:37:51 2019 +0100 + + ip-address: Use correct max attribute value in print_vf_stats64() + + IFLA_VF_MAX is larger than the highest valid index in vf array. + + Fixes: a1b99717c7cd7 ("Add displaying VF traffic statistics") + Signed-off-by: Phil Sutter + Signed-off-by: Stephen Hemminger +--- + ip/ipaddress.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/ip/ipaddress.c b/ip/ipaddress.c +index 44111a27501a9..bed2d3801809b 100644 +--- a/ip/ipaddress.c ++++ b/ip/ipaddress.c +@@ -467,7 +467,7 @@ static void print_vf_stats64(FILE *fp, struct rtattr *vfstats) + return; + } + +- parse_rtattr_nested(vf, IFLA_VF_MAX, vfstats); ++ parse_rtattr_nested(vf, IFLA_VF_STATS_MAX, vfstats); + + /* RX stats */ + fprintf(fp, "%s", _SL_); +-- +2.20.1 + diff --git a/SOURCES/0072-examples-Some-shell-fixes-to-cbq.init.patch b/SOURCES/0072-examples-Some-shell-fixes-to-cbq.init.patch new file mode 100644 index 0000000..116adf8 --- /dev/null +++ b/SOURCES/0072-examples-Some-shell-fixes-to-cbq.init.patch @@ -0,0 +1,131 @@ +From c1aa1bc599f0ced53b5e9d21d01a03d78ae2b37f Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:05:37 +0200 +Subject: [PATCH] examples: Some shell fixes to cbq.init + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 2313b6bfe4f5e + +commit 2313b6bfe4f5e6b60fcdfaaeaa1eabcfd3f550f4 +Author: Phil Sutter +Date: Thu Aug 17 19:09:31 2017 +0200 + + examples: Some shell fixes to cbq.init + + This addresses the following issues: + + - $@ is an array, so don't use it in quoted strings - use $* instead. + + - Add missing quotes to components of [ ] expressions. These are not + strictly necessary since the output of 'wc -l' should be a single word + only, but in case of errors, bash prints "integer expression expected" + instead of "too many arguments". + + - Use -print0/-0 when piping from find to xargs to allow for filenames + which contain whitespace. + + - Quote arguments to 'eval' to prevent word-splitting. + + Signed-off-by: Phil Sutter +--- + examples/cbq.init-v0.7.3 | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +diff --git a/examples/cbq.init-v0.7.3 b/examples/cbq.init-v0.7.3 +index 1bc0d446f8983..66448d88f0053 100644 +--- a/examples/cbq.init-v0.7.3 ++++ b/examples/cbq.init-v0.7.3 +@@ -532,7 +532,7 @@ cbq_off () { + + ### Prefixed message + cbq_message () { +- echo -e "**CBQ: $@" ++ echo -e "**CBQ: $*" + } # cbq_message + + ### Failure message +@@ -560,15 +560,15 @@ cbq_time2abs () { + ### Display CBQ setup + cbq_show () { + for dev in `cbq_device_list`; do +- [ `tc qdisc show dev $dev| wc -l` -eq 0 ] && continue ++ [ "`tc qdisc show dev $dev| wc -l`" -eq 0 ] && continue + echo -e "### $dev: queueing disciplines\n" + tc $1 qdisc show dev $dev; echo + +- [ `tc class show dev $dev| wc -l` -eq 0 ] && continue ++ [ "`tc class show dev $dev| wc -l`" -eq 0 ] && continue + echo -e "### $dev: traffic classes\n" + tc $1 class show dev $dev; echo + +- [ `tc filter show dev $dev| wc -l` -eq 0 ] && continue ++ [ "`tc filter show dev $dev| wc -l`" -eq 0 ] && continue + echo -e "### $dev: filtering rules\n" + tc $1 filter show dev $dev; echo + done +@@ -585,7 +585,7 @@ cbq_init () { + + ### Gather all DEVICE fields from $1/cbq-* + DEVFIELDS=`find $1 -maxdepth 1 \( -type f -or -type l \) -name 'cbq-*' \ +- -not -name '*~' | xargs sed -n 's/#.*//; \ ++ -not -name '*~' -print0 | xargs -0 sed -n 's/#.*//; \ + s/[[:space:]]//g; /^DEVICE=[^,]*,[^,]*\(,[^,]*\)\?/ \ + { s/.*=//; p; }'| sort -u` + [ -z "$DEVFIELDS" ] && +@@ -593,7 +593,7 @@ cbq_init () { + + ### Check for different DEVICE fields for the same device + DEVICES=`echo "$DEVFIELDS"| sed 's/,.*//'| sort -u` +- [ `echo "$DEVICES"| wc -l` -ne `echo "$DEVFIELDS"| wc -l` ] && ++ [ "`echo "$DEVICES"| wc -l`" -ne "`echo "$DEVFIELDS"| wc -l`" ] && + cbq_failure "different DEVICE fields for single device!\n$DEVFIELDS" + } # cbq_init + +@@ -618,7 +618,7 @@ cbq_load_class () { + PRIO_MARK=$PRIO_MARK_DEFAULT + PRIO_REALM=$PRIO_REALM_DEFAULT + +- eval `echo "$CFILE"| grep -E "^($CBQ_WORDS)="` ++ eval "`echo "$CFILE"| grep -E "^($CBQ_WORDS)="`" + + ### Require RATE/WEIGHT + [ -z "$RATE" -o -z "$WEIGHT" ] && +@@ -661,7 +661,7 @@ if [ "$1" = "compile" ]; then + + ### echo-only version of "tc" command + tc () { +- echo "$TC $@" ++ echo "$TC $*" + } # tc + + elif [ -n "$CBQ_DEBUG" ]; then +@@ -669,13 +669,13 @@ elif [ -n "$CBQ_DEBUG" ]; then + + ### Logging version of "ip" command + ip () { +- echo -e "\n# ip $@" >> $CBQ_DEBUG ++ echo -e "\n# ip $*" >> $CBQ_DEBUG + $IP "$@" 2>&1 | tee -a $CBQ_DEBUG + } # ip + + ### Logging version of "tc" command + tc () { +- echo -e "\n# tc $@" >> $CBQ_DEBUG ++ echo -e "\n# tc $*" >> $CBQ_DEBUG + $TC "$@" 2>&1 | tee -a $CBQ_DEBUG + } # tc + else +@@ -711,8 +711,8 @@ if [ "$1" != "compile" -a "$2" != "nocache" -a -z "$CBQ_DEBUG" ]; then + ### validate the cache + [ "$2" = "invalidate" -o ! -f $CBQ_CACHE ] && VALID=0 + if [ $VALID -eq 1 ]; then +- [ `find $CBQ_PATH -maxdepth 1 -newer $CBQ_CACHE| \ +- wc -l` -gt 0 ] && VALID=0 ++ [ "`find $CBQ_PATH -maxdepth 1 -newer $CBQ_CACHE| \ ++ wc -l`" -gt 0 ] && VALID=0 + fi + + ### compile the config if the cache is invalid +-- +2.20.1 + diff --git a/SOURCES/0073-ifcfg-Quote-left-hand-side-of-expression.patch b/SOURCES/0073-ifcfg-Quote-left-hand-side-of-expression.patch new file mode 100644 index 0000000..14be257 --- /dev/null +++ b/SOURCES/0073-ifcfg-Quote-left-hand-side-of-expression.patch @@ -0,0 +1,38 @@ +From 955ce06f1f4be5a8733e5829e3c8cadf9fc68c40 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:05:37 +0200 +Subject: [PATCH] ifcfg: Quote left-hand side of [ ] expression + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 1e3197e0fdbf2 + +commit 1e3197e0fdbf299fe24cdba7c0d613317ed82063 +Author: Phil Sutter +Date: Thu Aug 17 19:09:32 2017 +0200 + + ifcfg: Quote left-hand side of [ ] expression + + This prevents word-splitting and therefore leads to more accurate error + message in case 'grep -c' prints something other than a number. + + Signed-off-by: Phil Sutter +--- + ip/ifcfg | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/ip/ifcfg b/ip/ifcfg +index 083d9df36742f..30a2dc49816cd 100644 +--- a/ip/ifcfg ++++ b/ip/ifcfg +@@ -131,7 +131,7 @@ noarp=$? + + ip route add unreachable 224.0.0.0/24 >& /dev/null + ip route add unreachable 255.255.255.255 >& /dev/null +-if [ `ip link ls $dev | grep -c MULTICAST` -ge 1 ]; then ++if [ "`ip link ls $dev | grep -c MULTICAST`" -ge 1 ]; then + ip route add 224.0.0.0/4 dev $dev scope global >& /dev/null + fi + +-- +2.20.1 + diff --git a/SOURCES/0074-tipc-node-Fix-socket-fd-check-in-cmd_node_get_addr.patch b/SOURCES/0074-tipc-node-Fix-socket-fd-check-in-cmd_node_get_addr.patch new file mode 100644 index 0000000..e7192ee --- /dev/null +++ b/SOURCES/0074-tipc-node-Fix-socket-fd-check-in-cmd_node_get_addr.patch @@ -0,0 +1,38 @@ +From 8743a7a8978270195693441f370cea552f100cae Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:05:38 +0200 +Subject: [PATCH] tipc/node: Fix socket fd check in cmd_node_get_addr() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 436270a45dea2 + +commit 436270a45dea2fe5dbc4680f9c8e31f07d167f20 +Author: Phil Sutter +Date: Thu Aug 17 19:09:32 2017 +0200 + + tipc/node: Fix socket fd check in cmd_node_get_addr() + + socket() returns -1 on error, not 0. + + Signed-off-by: Phil Sutter +--- + tipc/node.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tipc/node.c b/tipc/node.c +index 201fe1a4df3bd..fe085aec9b4ac 100644 +--- a/tipc/node.c ++++ b/tipc/node.c +@@ -109,7 +109,8 @@ static int cmd_node_get_addr(struct nlmsghdr *nlh, const struct cmd *cmd, + socklen_t sz = sizeof(struct sockaddr_tipc); + struct sockaddr_tipc addr; + +- if (!(sk = socket(AF_TIPC, SOCK_RDM, 0))) { ++ sk = socket(AF_TIPC, SOCK_RDM, 0); ++ if (sk < 0) { + fprintf(stderr, "opening TIPC socket: %s\n", strerror(errno)); + return -1; + } +-- +2.20.1 + diff --git a/SOURCES/0075-iproute_lwtunnel-Argument-to-strerror-must-be-positi.patch b/SOURCES/0075-iproute_lwtunnel-Argument-to-strerror-must-be-positi.patch new file mode 100644 index 0000000..97c715e --- /dev/null +++ b/SOURCES/0075-iproute_lwtunnel-Argument-to-strerror-must-be-positi.patch @@ -0,0 +1,35 @@ +From 4e55e568493084c458ef96f10a2a3dab93e8464a Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:05:38 +0200 +Subject: [PATCH] iproute_lwtunnel: Argument to strerror must be positive + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 58a15e6c7e7cb + +commit 58a15e6c7e7cb4c0d25e6bb3762ac2b1c94ff523 +Author: Phil Sutter +Date: Thu Aug 17 19:09:31 2017 +0200 + + iproute_lwtunnel: Argument to strerror must be positive + + Signed-off-by: Phil Sutter +--- + ip/iproute_lwtunnel.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/ip/iproute_lwtunnel.c b/ip/iproute_lwtunnel.c +index b6f08f073ef02..92ea2c87787ec 100644 +--- a/ip/iproute_lwtunnel.c ++++ b/ip/iproute_lwtunnel.c +@@ -480,7 +480,7 @@ static int lwt_parse_bpf(struct rtattr *rta, size_t len, + err = bpf_parse_common(bpf_type, &cfg, &bpf_cb_ops, &x); + if (err < 0) { + fprintf(stderr, "Failed to parse eBPF program: %s\n", +- strerror(err)); ++ strerror(-err)); + return -1; + } + rta_nest_end(rta, nest); +-- +2.20.1 + diff --git a/SOURCES/0076-iproute_lwtunnel-csum_mode-value-checking-was-ineffe.patch b/SOURCES/0076-iproute_lwtunnel-csum_mode-value-checking-was-ineffe.patch new file mode 100644 index 0000000..df39404 --- /dev/null +++ b/SOURCES/0076-iproute_lwtunnel-csum_mode-value-checking-was-ineffe.patch @@ -0,0 +1,61 @@ +From db11067fb37cc3a77cc70fb9233a454102c4854c Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:05:38 +0200 +Subject: [PATCH] iproute_lwtunnel: csum_mode value checking was ineffective + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 08806fb0191e9 +Conflicts: adjust rta_addattr8() call to handle return value + +commit 08806fb0191e9ee8769507dc93b722fd021feb34 +Author: Phil Sutter +Date: Thu Aug 17 19:09:30 2017 +0200 + + iproute_lwtunnel: csum_mode value checking was ineffective + + ila_csum_name2mode() returning -1 on error but being declared as + returning __u8 doesn't make much sense. Change the code to correctly + detect this issue. Checking for __u8 overruns shouldn't be necessary + though since ila_csum_name2mode() return values are well-defined. + + Signed-off-by: Phil Sutter +--- + ip/iproute_lwtunnel.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/ip/iproute_lwtunnel.c b/ip/iproute_lwtunnel.c +index 92ea2c87787ec..5da3a1b488cbd 100644 +--- a/ip/iproute_lwtunnel.c ++++ b/ip/iproute_lwtunnel.c +@@ -125,7 +125,7 @@ static char *ila_csum_mode2name(__u8 csum_mode) + } + } + +-static __u8 ila_csum_name2mode(char *name) ++static int ila_csum_name2mode(char *name) + { + if (strcmp(name, "adj-transport") == 0) + return ILA_CSUM_ADJUST_TRANSPORT; +@@ -348,7 +348,7 @@ static int parse_encap_ila(struct rtattr *rta, size_t len, + + while (argc > 0) { + if (strcmp(*argv, "csum-mode") == 0) { +- __u8 csum_mode; ++ int csum_mode; + + NEXT_ARG(); + +@@ -357,8 +357,8 @@ static int parse_encap_ila(struct rtattr *rta, size_t len, + invarg("\"csum-mode\" value is invalid\n", + *argv); + +- ret = rta_addattr8(rta, len, ILA_ATTR_CSUM_MODE, +- (__u8)csum_mode); ++ ret = rta_addattr8(rta, 1024, ILA_ATTR_CSUM_MODE, ++ (__u8)csum_mode); + + argc--; argv++; + } else { +-- +2.20.1 + diff --git a/SOURCES/0077-ss-Don-t-leak-fd-in-tcp_show_netlink_file.patch b/SOURCES/0077-ss-Don-t-leak-fd-in-tcp_show_netlink_file.patch new file mode 100644 index 0000000..3ae8c4b --- /dev/null +++ b/SOURCES/0077-ss-Don-t-leak-fd-in-tcp_show_netlink_file.patch @@ -0,0 +1,107 @@ +From fa8b9f8fa8a6762bb0151e65a11eca9dca7aca83 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:07:22 +0200 +Subject: [PATCH] ss: Don't leak fd in tcp_show_netlink_file() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 4b45ae221e949 + +commit 4b45ae221e949b604d968a10d5d996c7c7cec1a6 +Author: Phil Sutter +Date: Thu Aug 17 19:09:30 2017 +0200 + + ss: Don't leak fd in tcp_show_netlink_file() + + Signed-off-by: Phil Sutter +--- + misc/ss.c | 32 ++++++++++++++++++++------------ + 1 file changed, 20 insertions(+), 12 deletions(-) + +diff --git a/misc/ss.c b/misc/ss.c +index 86defc71fabc4..eb46e0c4b95fb 100644 +--- a/misc/ss.c ++++ b/misc/ss.c +@@ -2764,41 +2764,44 @@ static int tcp_show_netlink_file(struct filter *f) + { + FILE *fp; + char buf[16384]; ++ int err = -1; + + if ((fp = fopen(getenv("TCPDIAG_FILE"), "r")) == NULL) { + perror("fopen($TCPDIAG_FILE)"); +- return -1; ++ return err; + } + + while (1) { +- int status, err; ++ int status, err2; + struct nlmsghdr *h = (struct nlmsghdr *)buf; + struct sockstat s = {}; + + status = fread(buf, 1, sizeof(*h), fp); + if (status < 0) { + perror("Reading header from $TCPDIAG_FILE"); +- return -1; ++ break; + } + if (status != sizeof(*h)) { + perror("Unexpected EOF reading $TCPDIAG_FILE"); +- return -1; ++ break; + } + + status = fread(h+1, 1, NLMSG_ALIGN(h->nlmsg_len-sizeof(*h)), fp); + + if (status < 0) { + perror("Reading $TCPDIAG_FILE"); +- return -1; ++ break; + } + if (status + sizeof(*h) < h->nlmsg_len) { + perror("Unexpected EOF reading $TCPDIAG_FILE"); +- return -1; ++ break; + } + + /* The only legal exit point */ +- if (h->nlmsg_type == NLMSG_DONE) +- return 0; ++ if (h->nlmsg_type == NLMSG_DONE) { ++ err = 0; ++ break; ++ } + + if (h->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h); +@@ -2809,7 +2812,7 @@ static int tcp_show_netlink_file(struct filter *f) + errno = -err->error; + perror("TCPDIAG answered"); + } +- return -1; ++ break; + } + + parse_diag_msg(h, &s); +@@ -2818,10 +2821,15 @@ static int tcp_show_netlink_file(struct filter *f) + if (f && f->f && run_ssfilter(f->f, &s) == 0) + continue; + +- err = inet_show_sock(h, &s); +- if (err < 0) +- return err; ++ err2 = inet_show_sock(h, &s); ++ if (err2 < 0) { ++ err = err2; ++ break; ++ } + } ++ ++ fclose(fp); ++ return err; + } + + static int tcp_show(struct filter *f) +-- +2.20.1 + diff --git a/SOURCES/0078-tc-em_ipset-Don-t-leak-sockfd-on-error-path.patch b/SOURCES/0078-tc-em_ipset-Don-t-leak-sockfd-on-error-path.patch new file mode 100644 index 0000000..30c146e --- /dev/null +++ b/SOURCES/0078-tc-em_ipset-Don-t-leak-sockfd-on-error-path.patch @@ -0,0 +1,34 @@ +From d28ee4b622ad9fa10a81d88bb6b5ded02c085acd Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:07:22 +0200 +Subject: [PATCH] tc/em_ipset: Don't leak sockfd on error path + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 3e587d9f43891 + +commit 3e587d9f438910df6c1751c45fd898cec1477ae6 +Author: Phil Sutter +Date: Thu Aug 17 19:09:31 2017 +0200 + + tc/em_ipset: Don't leak sockfd on error path + + Signed-off-by: Phil Sutter +--- + tc/em_ipset.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tc/em_ipset.c b/tc/em_ipset.c +index fab975f5ea563..b59756515d239 100644 +--- a/tc/em_ipset.c ++++ b/tc/em_ipset.c +@@ -84,6 +84,7 @@ static int get_version(unsigned int *version) + res = getsockopt(sockfd, SOL_IP, SO_IP_SET, &req_version, &size); + if (res != 0) { + perror("xt_set getsockopt"); ++ close(sockfd); + return -1; + } + +-- +2.20.1 + diff --git a/SOURCES/0079-ipvrf-Fix-error-path-of-vrf_switch.patch b/SOURCES/0079-ipvrf-Fix-error-path-of-vrf_switch.patch new file mode 100644 index 0000000..ebf3bbc --- /dev/null +++ b/SOURCES/0079-ipvrf-Fix-error-path-of-vrf_switch.patch @@ -0,0 +1,60 @@ +From 7ea6dbec34ae5166dd93fd4dbfcab35512e86e94 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:07:22 +0200 +Subject: [PATCH] ipvrf: Fix error path of vrf_switch() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 6ac5943bdd5ac + +commit 6ac5943bdd5ac5bb8c22b99f5a1d5907ebbcae2b +Author: Phil Sutter +Date: Thu Aug 17 19:09:27 2017 +0200 + + ipvrf: Fix error path of vrf_switch() + + Apart from trying to close(-1), this also leaked memory. + + Signed-off-by: Phil Sutter +--- + ip/ipvrf.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/ip/ipvrf.c b/ip/ipvrf.c +index 0f611b44b78ab..ae3b48fa81996 100644 +--- a/ip/ipvrf.c ++++ b/ip/ipvrf.c +@@ -369,12 +369,12 @@ static int vrf_switch(const char *name) + + /* -1 on length to add '/' to the end */ + if (ipvrf_get_netns(netns, sizeof(netns) - 1) < 0) +- return -1; ++ goto out; + + if (vrf_path(vpath, sizeof(vpath)) < 0) { + fprintf(stderr, "Failed to get base cgroup path: %s\n", + strerror(errno)); +- return -1; ++ goto out; + } + + /* if path already ends in netns then don't add it again */ +@@ -425,13 +425,14 @@ static int vrf_switch(const char *name) + snprintf(pid, sizeof(pid), "%d", getpid()); + if (write(fd, pid, strlen(pid)) < 0) { + fprintf(stderr, "Failed to join cgroup\n"); +- goto out; ++ goto out2; + } + + rc = 0; ++out2: ++ close(fd); + out: + free(mnt); +- close(fd); + + return rc; + } +-- +2.20.1 + diff --git a/SOURCES/0080-ifstat-Fix-memleak-in-error-case.patch b/SOURCES/0080-ifstat-Fix-memleak-in-error-case.patch new file mode 100644 index 0000000..aac3573 --- /dev/null +++ b/SOURCES/0080-ifstat-Fix-memleak-in-error-case.patch @@ -0,0 +1,38 @@ +From 78ff1fa1a2ff22e6fb7dc0a689e5a4861826431e Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:07:22 +0200 +Subject: [PATCH] ifstat: Fix memleak in error case + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 35f6adefb8f9d + +commit 35f6adefb8f9d56437f5455ac8c0c3cc329e3317 +Author: Phil Sutter +Date: Thu Aug 17 19:09:28 2017 +0200 + + ifstat: Fix memleak in error case + + Signed-off-by: Phil Sutter +--- + misc/ifstat.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/misc/ifstat.c b/misc/ifstat.c +index a853ee6d7e3b3..8fa354265a9a1 100644 +--- a/misc/ifstat.c ++++ b/misc/ifstat.c +@@ -143,8 +143,10 @@ static int get_nlmsg_extended(const struct sockaddr_nl *who, + struct rtattr *attr; + + attr = parse_rtattr_one_nested(sub_type, tb[filter_type]); +- if (attr == NULL) ++ if (attr == NULL) { ++ free(n); + return 0; ++ } + memcpy(&n->val, RTA_DATA(attr), sizeof(n->val)); + } + memset(&n->rate, 0, sizeof(n->rate)); +-- +2.20.1 + diff --git a/SOURCES/0081-ifstat-Fix-memleak-in-dump_kern_db-for-json-output.patch b/SOURCES/0081-ifstat-Fix-memleak-in-dump_kern_db-for-json-output.patch new file mode 100644 index 0000000..520bac0 --- /dev/null +++ b/SOURCES/0081-ifstat-Fix-memleak-in-dump_kern_db-for-json-output.patch @@ -0,0 +1,45 @@ +From 3f74fdcd943982101775db3b4240a6f953d1198d Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:07:22 +0200 +Subject: [PATCH] ifstat: Fix memleak in dump_kern_db() for json output + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit b530cef0e3bbd + +commit b530cef0e3bbd27510e19f5f720a7ec94f3fa723 +Author: Phil Sutter +Date: Thu Aug 17 19:09:29 2017 +0200 + + ifstat: Fix memleak in dump_kern_db() for json output + + Looks like this was forgotten when converting to common json output + formatter. + + Fixes: fcc16c2287bf8 ("provide common json output formatter") + Signed-off-by: Phil Sutter +--- + misc/ifstat.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/misc/ifstat.c b/misc/ifstat.c +index 8fa354265a9a1..1be21703bf14c 100644 +--- a/misc/ifstat.c ++++ b/misc/ifstat.c +@@ -535,8 +535,12 @@ static void dump_kern_db(FILE *fp) + else + print_one_if(fp, n, n->val); + } +- if (json_output) +- fprintf(fp, "\n} }\n"); ++ if (jw) { ++ jsonw_end_object(jw); ++ ++ jsonw_end_object(jw); ++ jsonw_destroy(&jw); ++ } + } + + static void dump_incr_db(FILE *fp) +-- +2.20.1 + diff --git a/SOURCES/0082-ss-Fix-potential-memleak-in-unix_stats_print.patch b/SOURCES/0082-ss-Fix-potential-memleak-in-unix_stats_print.patch new file mode 100644 index 0000000..57bec49 --- /dev/null +++ b/SOURCES/0082-ss-Fix-potential-memleak-in-unix_stats_print.patch @@ -0,0 +1,39 @@ +From 125c0e845acd690c9dce5702413294304a328fd1 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:07:22 +0200 +Subject: [PATCH] ss: Fix potential memleak in unix_stats_print() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 46131577cf1ba + +commit 46131577cf1ba37198c82e1ce89c9bbca2153ef4 +Author: Phil Sutter +Date: Thu Aug 17 19:09:30 2017 +0200 + + ss: Fix potential memleak in unix_stats_print() + + Fixes: 2d0e538f3e1cd ("ss: Drop list traversal from unix_stats_print()") + Signed-off-by: Phil Sutter +--- + misc/ss.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/misc/ss.c b/misc/ss.c +index eb46e0c4b95fb..c97f05a4c7033 100644 +--- a/misc/ss.c ++++ b/misc/ss.c +@@ -3258,8 +3258,10 @@ static int unix_show(struct filter *f) + + if (name[0]) { + u->name = strdup(name); +- if (!u->name) ++ if (!u->name) { ++ free(u); + break; ++ } + } + + if (u->rport) { +-- +2.20.1 + diff --git a/SOURCES/0083-tipc-bearer-Fix-resource-leak-in-error-path.patch b/SOURCES/0083-tipc-bearer-Fix-resource-leak-in-error-path.patch new file mode 100644 index 0000000..db1fd9d --- /dev/null +++ b/SOURCES/0083-tipc-bearer-Fix-resource-leak-in-error-path.patch @@ -0,0 +1,43 @@ +From b6bf156c4d4abab8176112e48a595c3e7bb7f825 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:07:22 +0200 +Subject: [PATCH] tipc/bearer: Fix resource leak in error path + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit be55416addf76 + +commit be55416addf76e76836af6a4dd94b19c4186e1b2 +Author: Phil Sutter +Date: Thu Aug 17 19:09:31 2017 +0200 + + tipc/bearer: Fix resource leak in error path + + Signed-off-by: Phil Sutter +--- + tipc/bearer.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/tipc/bearer.c b/tipc/bearer.c +index 810344f672af1..c3d4491f8f6ef 100644 +--- a/tipc/bearer.c ++++ b/tipc/bearer.c +@@ -163,6 +163,7 @@ static int nl_add_udp_enable_opts(struct nlmsghdr *nlh, struct opt *opts, + if (!remip) { + if (generate_multicast(loc->ai_family, buf, sizeof(buf))) { + fprintf(stderr, "Failed to generate multicast address\n"); ++ freeaddrinfo(loc); + return -EINVAL; + } + remip = buf; +@@ -177,6 +178,8 @@ static int nl_add_udp_enable_opts(struct nlmsghdr *nlh, struct opt *opts, + + if (rem->ai_family != loc->ai_family) { + fprintf(stderr, "UDP local and remote AF mismatch\n"); ++ freeaddrinfo(rem); ++ freeaddrinfo(loc); + return -EINVAL; + } + +-- +2.20.1 + diff --git a/SOURCES/0084-devlink-No-need-for-this-self-assignment.patch b/SOURCES/0084-devlink-No-need-for-this-self-assignment.patch new file mode 100644 index 0000000..7809ca6 --- /dev/null +++ b/SOURCES/0084-devlink-No-need-for-this-self-assignment.patch @@ -0,0 +1,39 @@ +From 1fe740ceabb0b965224678a69a02255e20d5a47a Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:07:22 +0200 +Subject: [PATCH] devlink: No need for this self-assignment + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 8579a398c5ab0 + +commit 8579a398c5ab0d26bce0ed9b4b6b6e5d62fcc89d +Author: Phil Sutter +Date: Thu Aug 17 19:09:25 2017 +0200 + + devlink: No need for this self-assignment + + dl_argv_handle_both() will either assign to handle_bit or error out in + which case the variable is not used by the caller. + + Signed-off-by: Phil Sutter + Acked-by: Jiri Pirko +--- + devlink/devlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/devlink/devlink.c b/devlink/devlink.c +index 2000db81aabb0..ae295b5632e8c 100644 +--- a/devlink/devlink.c ++++ b/devlink/devlink.c +@@ -845,7 +845,7 @@ static int dl_argv_parse(struct dl *dl, uint32_t o_required, + int err; + + if (o_required & DL_OPT_HANDLE && o_required & DL_OPT_HANDLEP) { +- uint32_t handle_bit = handle_bit; ++ uint32_t handle_bit; + + err = dl_argv_handle_both(dl, &opts->bus_name, &opts->dev_name, + &opts->port_index, &handle_bit); +-- +2.20.1 + diff --git a/SOURCES/0085-ipntable-No-need-to-check-and-assign-to-parms_rta.patch b/SOURCES/0085-ipntable-No-need-to-check-and-assign-to-parms_rta.patch new file mode 100644 index 0000000..5d26732 --- /dev/null +++ b/SOURCES/0085-ipntable-No-need-to-check-and-assign-to-parms_rta.patch @@ -0,0 +1,38 @@ +From d62d2ffc71194068af509ec3285ecd6823d883fb Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:07:22 +0200 +Subject: [PATCH] ipntable: No need to check and assign to parms_rta + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 2869262144271 + +commit 28692621442710f4a67fe33742f56efc582ee33a +Author: Phil Sutter +Date: Thu Aug 17 19:09:26 2017 +0200 + + ipntable: No need to check and assign to parms_rta + + This variable is initialized at declaration and nowhere else does any + assignment to it happen, so just drop the check. + + Signed-off-by: Phil Sutter +--- + ip/ipntable.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/ip/ipntable.c b/ip/ipntable.c +index 65063321c85f8..ae8c74ead2cb8 100644 +--- a/ip/ipntable.c ++++ b/ip/ipntable.c +@@ -202,8 +202,6 @@ static int ipntable_modify(int cmd, int flags, int argc, char **argv) + if (get_u32(&queue, *argv, 0)) + invarg("\"queue\" value is invalid", *argv); + +- if (!parms_rta) +- parms_rta = (struct rtattr *)&parms_buf; + rta_addattr32(parms_rta, sizeof(parms_buf), + NDTPA_QUEUE_LEN, queue); + parms_change = 1; +-- +2.20.1 + diff --git a/SOURCES/0086-iproute-Fix-for-missing-Oifs-display.patch b/SOURCES/0086-iproute-Fix-for-missing-Oifs-display.patch new file mode 100644 index 0000000..32254db --- /dev/null +++ b/SOURCES/0086-iproute-Fix-for-missing-Oifs-display.patch @@ -0,0 +1,55 @@ +From 2cb971cefe001a66677c2d1d23b1596cbffb3989 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:07:22 +0200 +Subject: [PATCH] iproute: Fix for missing 'Oifs:' display + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 2a866256197f8 + +commit 2a866256197f8b86e61fa1afc99b11d7056d5686 +Author: Phil Sutter +Date: Thu Aug 17 19:09:27 2017 +0200 + + iproute: Fix for missing 'Oifs:' display + + Covscan complained about dead code but after reading it, I assume the + author's intention was to prefix the interface list with 'Oifs: '. + Initializing first to 1 and setting it to 0 after above prefix was + printed should fix it. + + Signed-off-by: Phil Sutter +--- + ip/iproute.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/ip/iproute.c b/ip/iproute.c +index d4db035fc7b24..6ebc6214c45ee 100644 +--- a/ip/iproute.c ++++ b/ip/iproute.c +@@ -618,7 +618,7 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) + } + if (tb[RTA_MULTIPATH]) { + struct rtnexthop *nh = RTA_DATA(tb[RTA_MULTIPATH]); +- int first = 0; ++ int first = 1; + + len = RTA_PAYLOAD(tb[RTA_MULTIPATH]); + +@@ -628,10 +628,12 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) + if (nh->rtnh_len > len) + break; + if (r->rtm_flags&RTM_F_CLONED && r->rtm_type == RTN_MULTICAST) { +- if (first) ++ if (first) { + fprintf(fp, "Oifs: "); +- else ++ first = 0; ++ } else { + fprintf(fp, " "); ++ } + } else + fprintf(fp, "%s\tnexthop ", _SL_); + if (nh->rtnh_len > sizeof(*nh)) { +-- +2.20.1 + diff --git a/SOURCES/0087-lib-rt_names-Drop-dead-code-in-rtnl_rttable_n2a.patch b/SOURCES/0087-lib-rt_names-Drop-dead-code-in-rtnl_rttable_n2a.patch new file mode 100644 index 0000000..4f32b28 --- /dev/null +++ b/SOURCES/0087-lib-rt_names-Drop-dead-code-in-rtnl_rttable_n2a.patch @@ -0,0 +1,40 @@ +From cd4f6a9976a969d4981b3b3d09b60ed311f3e9a5 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:07:22 +0200 +Subject: [PATCH] lib/rt_names: Drop dead code in rtnl_rttable_n2a() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit b3c5f84493d33 + +commit b3c5f84493d3399a546566475203207aa5b64d54 +Author: Phil Sutter +Date: Thu Aug 17 19:09:28 2017 +0200 + + lib/rt_names: Drop dead code in rtnl_rttable_n2a() + + Since 'id' is 32bit unsigned, it can never exceed RT_TABLE_MAX (which is + defined to 0xFFFFFFFF). Therefore drop that never matching conditional. + + Signed-off-by: Phil Sutter +--- + lib/rt_names.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/lib/rt_names.c b/lib/rt_names.c +index 04c15ff5b15f8..e5efd78e6f810 100644 +--- a/lib/rt_names.c ++++ b/lib/rt_names.c +@@ -410,10 +410,6 @@ const char *rtnl_rttable_n2a(__u32 id, char *buf, int len) + { + struct rtnl_hash_entry *entry; + +- if (id > RT_TABLE_MAX) { +- snprintf(buf, len, "%u", id); +- return buf; +- } + if (!rtnl_rttable_init) + rtnl_rttable_initialize(); + entry = rtnl_rttable_hash[id & 255]; +-- +2.20.1 + diff --git a/SOURCES/0088-ss-Skip-useless-check-in-parse_hostcond.patch b/SOURCES/0088-ss-Skip-useless-check-in-parse_hostcond.patch new file mode 100644 index 0000000..ce300e1 --- /dev/null +++ b/SOURCES/0088-ss-Skip-useless-check-in-parse_hostcond.patch @@ -0,0 +1,39 @@ +From 4b2f0a5a479f2714b8b44932ba961ba8cf07e18e Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:07:22 +0200 +Subject: [PATCH] ss: Skip useless check in parse_hostcond() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 44448a90eab34 + +commit 44448a90eab34713af019356926828720c67a268 +Author: Phil Sutter +Date: Thu Aug 17 19:09:29 2017 +0200 + + ss: Skip useless check in parse_hostcond() + + The passed 'addr' parameter is dereferenced by caller before and in + parse_hostcond() multiple times before this check, so assume it is + always true. + + Signed-off-by: Phil Sutter +--- + misc/ss.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/misc/ss.c b/misc/ss.c +index c97f05a4c7033..38f4017e4a8c8 100644 +--- a/misc/ss.c ++++ b/misc/ss.c +@@ -1747,7 +1747,7 @@ void *parse_hostcond(char *addr, bool is_port) + } + } + } +- if (!is_port && addr && *addr && *addr != '*') { ++ if (!is_port && *addr && *addr != '*') { + if (get_prefix_1(&a.addr, addr, fam)) { + if (get_dns_host(&a, addr, fam)) { + fprintf(stderr, "Error: an inet prefix is expected rather than \"%s\".\n", addr); +-- +2.20.1 + diff --git a/SOURCES/0089-ss-Drop-useless-assignment.patch b/SOURCES/0089-ss-Drop-useless-assignment.patch new file mode 100644 index 0000000..c406ed9 --- /dev/null +++ b/SOURCES/0089-ss-Drop-useless-assignment.patch @@ -0,0 +1,36 @@ +From 23e13f60728a68b2c4a5b3656a1ce79affaafc6d Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:07:22 +0200 +Subject: [PATCH] ss: Drop useless assignment + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit e469523e8e8d1 + +commit e469523e8e8d1d31c3b35251105e2a843216d687 +Author: Phil Sutter +Date: Thu Aug 17 19:09:30 2017 +0200 + + ss: Drop useless assignment + + After '*b = *a', 'b->next' already has the same value as 'a->next'. + + Signed-off-by: Phil Sutter +--- + misc/ss.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/misc/ss.c b/misc/ss.c +index 38f4017e4a8c8..cc38fc499c210 100644 +--- a/misc/ss.c ++++ b/misc/ss.c +@@ -1476,7 +1476,6 @@ static int remember_he(struct aafilter *a, struct hostent *he) + if ((b = malloc(sizeof(*b))) == NULL) + return cnt; + *b = *a; +- b->next = a->next; + a->next = b; + } + memcpy(b->addr.data, *ptr, len); +-- +2.20.1 + diff --git a/SOURCES/0090-tc-m_gact-Drop-dead-code.patch b/SOURCES/0090-tc-m_gact-Drop-dead-code.patch new file mode 100644 index 0000000..b3702ee --- /dev/null +++ b/SOURCES/0090-tc-m_gact-Drop-dead-code.patch @@ -0,0 +1,90 @@ +From c6e0fc7a5ec0b890c35a3b5d4cc5e1f7794cc47f Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:07:22 +0200 +Subject: [PATCH] tc/m_gact: Drop dead code + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 73aa988868e7e +Conflicts: context change due to missing commits: +* e67aba5595811 ("tc: actions: add helpers to parse and print control actions") +* 18f05d06016d9 ("tc: gact: fix control action parsing") + +commit 73aa988868e7e068b4fc0daaca7cfdb3e07fe744 +Author: Phil Sutter +Date: Thu Aug 17 19:09:31 2017 +0200 + + tc/m_gact: Drop dead code + + The use of 'ok' variable in parse_gact() is ineffective: The second + conditional increments it either if *argv is 'gact' or if + parse_action_control() doesn't fail (in which case exit() is called). + So this is effectively an unconditional increment and since no decrement + happens anywhere, all remaining checks for 'ok != 0' can be dropped. + + Signed-off-by: Phil Sutter +--- + tc/m_gact.c | 18 +++++------------- + 1 file changed, 5 insertions(+), 13 deletions(-) + +diff --git a/tc/m_gact.c b/tc/m_gact.c +index 755a3bee2c2f2..0cb5222fd3817 100644 +--- a/tc/m_gact.c ++++ b/tc/m_gact.c +@@ -86,7 +86,6 @@ parse_gact(struct action_util *a, int *argc_p, char ***argv_p, + { + int argc = *argc_p; + char **argv = *argv_p; +- int ok = 0; + int action = TC_POLICE_RECLASSIFY; + struct tc_gact p = { .action = TC_POLICE_RECLASSIFY }; + #ifdef CONFIG_GACT_PROB +@@ -100,25 +99,22 @@ parse_gact(struct action_util *a, int *argc_p, char ***argv_p, + + + if (matches(*argv, "gact") == 0) { +- ok++; ++ argc--; ++ argv++; + } else { + action = get_act(&argv); + if (action != -10) { + p.action = action; +- ok++; ++ argc--; ++ argv++; + } else { + explain(); + return action; + } + } + +- if (ok) { +- argc--; +- argv++; +- } +- + #ifdef CONFIG_GACT_PROB +- if (ok && argc > 0) { ++ if (argc > 0) { + if (matches(*argv, "random") == 0) { + rd = 1; + NEXT_ARG(); +@@ -167,15 +163,11 @@ parse_gact(struct action_util *a, int *argc_p, char ***argv_p, + } + argc--; + argv++; +- ok++; + } else if (matches(*argv, "help") == 0) { + usage(); + } + } + +- if (!ok) +- return -1; +- + tail = NLMSG_TAIL(n); + addattr_l(n, MAX_MSG, tca_id, NULL, 0); + addattr_l(n, MAX_MSG, TCA_GACT_PARMS, &p, sizeof(p)); +-- +2.20.1 + diff --git a/SOURCES/0091-ipaddress-Avoid-accessing-uninitialized-variable-lcl.patch b/SOURCES/0091-ipaddress-Avoid-accessing-uninitialized-variable-lcl.patch new file mode 100644 index 0000000..fcf2770 --- /dev/null +++ b/SOURCES/0091-ipaddress-Avoid-accessing-uninitialized-variable-lcl.patch @@ -0,0 +1,38 @@ +From a7150dc1d46b73f65bfedd728aeca1dcf5ec20eb Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:07 +0200 +Subject: [PATCH] ipaddress: Avoid accessing uninitialized variable lcl + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit d044ea3e784d1 + +commit d044ea3e784d1a4f0a61f306b86ce95c9a26b0b5 +Author: Phil Sutter +Date: Mon Aug 21 11:26:59 2017 +0200 + + ipaddress: Avoid accessing uninitialized variable lcl + + If no address was given, ipaddr_modify() accesses uninitialized data + when assigning to req.ifa.ifa_prefixlen. + + Signed-off-by: Phil Sutter +--- + ip/ipaddress.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/ip/ipaddress.c b/ip/ipaddress.c +index bed2d3801809b..2c27da3a1f079 100644 +--- a/ip/ipaddress.c ++++ b/ip/ipaddress.c +@@ -1887,7 +1887,7 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv) + char *lcl_arg = NULL; + char *valid_lftp = NULL; + char *preferred_lftp = NULL; +- inet_prefix lcl; ++ inet_prefix lcl = {}; + inet_prefix peer; + int local_len = 0; + int peer_len = 0; +-- +2.20.1 + diff --git a/SOURCES/0092-iplink_can-Prevent-overstepping-array-bounds.patch b/SOURCES/0092-iplink_can-Prevent-overstepping-array-bounds.patch new file mode 100644 index 0000000..0804c5b --- /dev/null +++ b/SOURCES/0092-iplink_can-Prevent-overstepping-array-bounds.patch @@ -0,0 +1,49 @@ +From 4c775c035e2751b1aec52dcc2ca0e4fc99bac793 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:07 +0200 +Subject: [PATCH] iplink_can: Prevent overstepping array bounds + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 258b7c0fa70c2 + +commit 258b7c0fa70c2d6b5f9776cc35c38c80b4ee5752 +Author: Phil Sutter +Date: Mon Aug 21 11:27:00 2017 +0200 + + iplink_can: Prevent overstepping array bounds + + can_state_names array contains at most CAN_STATE_MAX fields, so allowing + an index to it to be equal to that number is wrong. While here, also + make sure the array is indeed that big so nothing bad happens if + CAN_STATE_MAX ever increases. + + Signed-off-by: Phil Sutter +--- + ip/iplink_can.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/ip/iplink_can.c b/ip/iplink_can.c +index 20d4d37d0d087..4133a658a059e 100644 +--- a/ip/iplink_can.c ++++ b/ip/iplink_can.c +@@ -241,7 +241,7 @@ static int can_parse_opt(struct link_util *lu, int argc, char **argv, + return 0; + } + +-static const char *can_state_names[] = { ++static const char *can_state_names[CAN_STATE_MAX] = { + [CAN_STATE_ERROR_ACTIVE] = "ERROR-ACTIVE", + [CAN_STATE_ERROR_WARNING] = "ERROR-WARNING", + [CAN_STATE_ERROR_PASSIVE] = "ERROR-PASSIVE", +@@ -265,7 +265,7 @@ static void can_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) + if (tb[IFLA_CAN_STATE]) { + uint32_t state = rta_getattr_u32(tb[IFLA_CAN_STATE]); + +- fprintf(f, "state %s ", state <= CAN_STATE_MAX ? ++ fprintf(f, "state %s ", state < CAN_STATE_MAX ? + can_state_names[state] : "UNKNOWN"); + } + +-- +2.20.1 + diff --git a/SOURCES/0093-ipmaddr-Avoid-accessing-uninitialized-data.patch b/SOURCES/0093-ipmaddr-Avoid-accessing-uninitialized-data.patch new file mode 100644 index 0000000..03b06ec --- /dev/null +++ b/SOURCES/0093-ipmaddr-Avoid-accessing-uninitialized-data.patch @@ -0,0 +1,38 @@ +From e5d32611010d4694562980b790ed7849342f594b Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:07 +0200 +Subject: [PATCH] ipmaddr: Avoid accessing uninitialized data + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit b48a1161f5f9b + +commit b48a1161f5f9b6a0cda399a224bbbf72eba4a5c6 +Author: Phil Sutter +Date: Mon Aug 21 11:27:01 2017 +0200 + + ipmaddr: Avoid accessing uninitialized data + + Looks like this can only happen if /proc/net/igmp is malformed, but + better be sure. + + Signed-off-by: Phil Sutter +--- + ip/ipmaddr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/ip/ipmaddr.c b/ip/ipmaddr.c +index 4f726fdd976f1..85a69e779563d 100644 +--- a/ip/ipmaddr.c ++++ b/ip/ipmaddr.c +@@ -136,7 +136,7 @@ static void read_igmp(struct ma_info **result_p) + + while (fgets(buf, sizeof(buf), fp)) { + struct ma_info *ma; +- size_t len; ++ size_t len = 0; + + if (buf[0] != '\t') { + sscanf(buf, "%d%s", &m.index, m.name); +-- +2.20.1 + diff --git a/SOURCES/0094-ss-Use-C99-initializer-in-netlink_show_one.patch b/SOURCES/0094-ss-Use-C99-initializer-in-netlink_show_one.patch new file mode 100644 index 0000000..4914cd9 --- /dev/null +++ b/SOURCES/0094-ss-Use-C99-initializer-in-netlink_show_one.patch @@ -0,0 +1,54 @@ +From 60a5c300c8cd0fbd4c378900d298eb2444c0343d Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:07 +0200 +Subject: [PATCH] ss: Use C99 initializer in netlink_show_one() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 301826beb3baa + +commit 301826beb3baa902e2057d81912d1586459f605f +Author: Phil Sutter +Date: Mon Aug 21 11:27:02 2017 +0200 + + ss: Use C99 initializer in netlink_show_one() + + This has the additional benefit of initializing st.ino to zero which is + used later in is_sctp_assoc() function. + + Signed-off-by: Phil Sutter +--- + misc/ss.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +diff --git a/misc/ss.c b/misc/ss.c +index cc38fc499c210..7a38e9d830e8d 100644 +--- a/misc/ss.c ++++ b/misc/ss.c +@@ -3567,17 +3567,18 @@ static int netlink_show_one(struct filter *f, + int rq, int wq, + unsigned long long sk, unsigned long long cb) + { +- struct sockstat st; ++ struct sockstat st = { ++ .state = SS_CLOSE, ++ .rq = rq, ++ .wq = wq, ++ .local.family = AF_NETLINK, ++ .remote.family = AF_NETLINK, ++ }; + + SPRINT_BUF(prot_buf) = {}; + const char *prot_name; + char procname[64] = {}; + +- st.state = SS_CLOSE; +- st.rq = rq; +- st.wq = wq; +- st.local.family = st.remote.family = AF_NETLINK; +- + if (f->f) { + st.rport = -1; + st.lport = pid; +-- +2.20.1 + diff --git a/SOURCES/0095-netem-maketable-Check-return-value-of-fstat.patch b/SOURCES/0095-netem-maketable-Check-return-value-of-fstat.patch new file mode 100644 index 0000000..37c71a3 --- /dev/null +++ b/SOURCES/0095-netem-maketable-Check-return-value-of-fstat.patch @@ -0,0 +1,39 @@ +From 56c1a9e6c4d7d54ee27472428bcb33be471b3346 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:07 +0200 +Subject: [PATCH] netem/maketable: Check return value of fstat() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit d304b05c12b3a + +commit d304b05c12b3a0247b627ebc8e4477520bb4b969 +Author: Phil Sutter +Date: Mon Aug 21 11:27:03 2017 +0200 + + netem/maketable: Check return value of fstat() + + Otherwise info.st_size may contain garbage. + + Signed-off-by: Phil Sutter +--- + netem/maketable.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/netem/maketable.c b/netem/maketable.c +index 6aff927be7040..ad660e7d457f0 100644 +--- a/netem/maketable.c ++++ b/netem/maketable.c +@@ -24,8 +24,8 @@ readdoubles(FILE *fp, int *number) + int limit; + int n=0, i; + +- fstat(fileno(fp), &info); +- if (info.st_size > 0) { ++ if (!fstat(fileno(fp), &info) && ++ info.st_size > 0) { + limit = 2*info.st_size/sizeof(double); /* @@ approximate */ + } else { + limit = 10000; +-- +2.20.1 + diff --git a/SOURCES/0096-tc-q_multiq-Don-t-pass-garbage-in-TCA_OPTIONS.patch b/SOURCES/0096-tc-q_multiq-Don-t-pass-garbage-in-TCA_OPTIONS.patch new file mode 100644 index 0000000..eb63ef2 --- /dev/null +++ b/SOURCES/0096-tc-q_multiq-Don-t-pass-garbage-in-TCA_OPTIONS.patch @@ -0,0 +1,38 @@ +From 8a115584261b32308d604063b56f25330ce8adaf Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:07 +0200 +Subject: [PATCH] tc/q_multiq: Don't pass garbage in TCA_OPTIONS + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 82ed9ffa2bb86 + +commit 82ed9ffa2bb86eea653f68a0ade945b7708818c9 +Author: Phil Sutter +Date: Mon Aug 21 11:27:04 2017 +0200 + + tc/q_multiq: Don't pass garbage in TCA_OPTIONS + + multiq_parse_opt() doesn't change 'opt' at all. So at least make sure + it doesn't fill TCA_OPTIONS attribute with garbage from stack. + + Signed-off-by: Phil Sutter +--- + tc/q_multiq.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tc/q_multiq.c b/tc/q_multiq.c +index 7823931494563..9c09c9a7748f6 100644 +--- a/tc/q_multiq.c ++++ b/tc/q_multiq.c +@@ -43,7 +43,7 @@ static void explain(void) + static int multiq_parse_opt(struct qdisc_util *qu, int argc, char **argv, + struct nlmsghdr *n) + { +- struct tc_multiq_qopt opt; ++ struct tc_multiq_qopt opt = {}; + + if (argc) { + if (strcmp(*argv, "help") == 0) { +-- +2.20.1 + diff --git a/SOURCES/0097-iproute-Check-mark-value-input.patch b/SOURCES/0097-iproute-Check-mark-value-input.patch new file mode 100644 index 0000000..73c7ae8 --- /dev/null +++ b/SOURCES/0097-iproute-Check-mark-value-input.patch @@ -0,0 +1,46 @@ +From ce3460e0a39948054139e5bcf72130e82bf2da8d Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:07 +0200 +Subject: [PATCH] iproute: Check mark value input + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 7c66d89828a6e + +commit 7c66d89828a6ee4c5a4e3f48ef4a4cb07b50013d +Author: Phil Sutter +Date: Mon Aug 21 18:36:50 2017 +0200 + + iproute: Check mark value input + + Signed-off-by: Phil Sutter +--- + ip/iproute.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/ip/iproute.c b/ip/iproute.c +index 6ebc6214c45ee..1d92530fd3421 100644 +--- a/ip/iproute.c ++++ b/ip/iproute.c +@@ -1481,7 +1481,8 @@ static int iproute_list_flush_or_save(int argc, char **argv, int action) + id = *argv; + } else if (strcmp(*argv, "mark") == 0) { + NEXT_ARG(); +- get_unsigned(&mark, *argv, 0); ++ if (get_unsigned(&mark, *argv, 0)) ++ invarg("invalid mark value", *argv); + filter.markmask = -1; + } else if (strcmp(*argv, "via") == 0) { + int family; +@@ -1698,7 +1699,8 @@ static int iproute_get(int argc, char **argv) + idev = *argv; + } else if (matches(*argv, "mark") == 0) { + NEXT_ARG(); +- get_unsigned(&mark, *argv, 0); ++ if (get_unsigned(&mark, *argv, 0)) ++ invarg("invalid mark value", *argv); + } else if (matches(*argv, "oif") == 0 || + strcmp(*argv, "dev") == 0) { + NEXT_ARG(); +-- +2.20.1 + diff --git a/SOURCES/0098-iplink_vrf-Complain-if-main-table-is-not-found.patch b/SOURCES/0098-iplink_vrf-Complain-if-main-table-is-not-found.patch new file mode 100644 index 0000000..e42872e --- /dev/null +++ b/SOURCES/0098-iplink_vrf-Complain-if-main-table-is-not-found.patch @@ -0,0 +1,38 @@ +From b9d228a8c22f1d9069fa0c8f98e6bd94011c5714 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:07 +0200 +Subject: [PATCH] iplink_vrf: Complain if main table is not found + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 84b6a3f4b5720 + +commit 84b6a3f4b5720aaf673c2eaad2cf60f786de077b +Author: Phil Sutter +Date: Mon Aug 21 18:36:51 2017 +0200 + + iplink_vrf: Complain if main table is not found + + Signed-off-by: Phil Sutter + Acked-by: David Ahern +--- + ip/iplink_vrf.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/ip/iplink_vrf.c b/ip/iplink_vrf.c +index 370bb86815a80..9c2de2732a88e 100644 +--- a/ip/iplink_vrf.c ++++ b/ip/iplink_vrf.c +@@ -127,7 +127,9 @@ __u32 ipvrf_get_table(const char *name) + if (rtnl_talk_suppress_rtnl_errmsg(&rth, &req.n, &answer) < 0) { + /* special case "default" vrf to be the main table */ + if (errno == ENODEV && !strcmp(name, "default")) +- rtnl_rttable_a2n(&tb_id, "main"); ++ if (rtnl_rttable_a2n(&tb_id, "main")) ++ fprintf(stderr, ++ "BUG: RTTable \"main\" not found.\n"); + + return tb_id; + } +-- +2.20.1 + diff --git a/SOURCES/0099-devlink-Check-return-code-of-strslashrsplit.patch b/SOURCES/0099-devlink-Check-return-code-of-strslashrsplit.patch new file mode 100644 index 0000000..4aa717f --- /dev/null +++ b/SOURCES/0099-devlink-Check-return-code-of-strslashrsplit.patch @@ -0,0 +1,62 @@ +From ea11b95042171f254fe0127ea0f1f2786d81dc83 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:07 +0200 +Subject: [PATCH] devlink: Check return code of strslashrsplit() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 6e33f7b0f6e04 + +commit 6e33f7b0f6e04dd46bea24c3ab28d61e54625dd7 +Author: Phil Sutter +Date: Mon Aug 21 18:36:52 2017 +0200 + + devlink: Check return code of strslashrsplit() + + This function shouldn't fail because all callers of + __dl_argv_handle_port() make sure the passed string contains enough + slashes already, but better make sure if this changes in future the + function won't access uninitialized data. + + Signed-off-by: Phil Sutter +--- + devlink/devlink.c | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +diff --git a/devlink/devlink.c b/devlink/devlink.c +index ae295b5632e8c..082eeafa1146a 100644 +--- a/devlink/devlink.c ++++ b/devlink/devlink.c +@@ -576,18 +576,26 @@ static int __dl_argv_handle_port(char *str, + char **p_bus_name, char **p_dev_name, + uint32_t *p_port_index) + { +- char *handlestr = handlestr; +- char *portstr = portstr; ++ char *handlestr; ++ char *portstr; + int err; + +- strslashrsplit(str, &handlestr, &portstr); ++ err = strslashrsplit(str, &handlestr, &portstr); ++ if (err) { ++ pr_err("Port identification \"%s\" is invalid\n", str); ++ return err; ++ } + err = strtouint32_t(portstr, p_port_index); + if (err) { + pr_err("Port index \"%s\" is not a number or not within range\n", + portstr); + return err; + } +- strslashrsplit(handlestr, p_bus_name, p_dev_name); ++ err = strslashrsplit(handlestr, p_bus_name, p_dev_name); ++ if (err) { ++ pr_err("Port identification \"%s\" is invalid\n", str); ++ return err; ++ } + return 0; + } + +-- +2.20.1 + diff --git a/SOURCES/0100-lib-bpf-Don-t-leak-fp-in-bpf_find_mntpt.patch b/SOURCES/0100-lib-bpf-Don-t-leak-fp-in-bpf_find_mntpt.patch new file mode 100644 index 0000000..2e22047 --- /dev/null +++ b/SOURCES/0100-lib-bpf-Don-t-leak-fp-in-bpf_find_mntpt.patch @@ -0,0 +1,43 @@ +From bafabe7a067e647f97ae0df277bded8b9349db50 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:07 +0200 +Subject: [PATCH] lib/bpf: Don't leak fp in bpf_find_mntpt() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit c3724e4bc3a6c + +commit c3724e4bc3a6c40dc846f0c3b02934d711bf81fb +Author: Phil Sutter +Date: Mon Aug 21 16:46:51 2017 +0200 + + lib/bpf: Don't leak fp in bpf_find_mntpt() + + If fopen() succeeded but len != PATH_MAX, the function leaks the open + FILE pointer. Fix this by checking len value before calling fopen(). + + Signed-off-by: Phil Sutter + Acked-by: Daniel Borkmann +--- + lib/bpf.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/lib/bpf.c b/lib/bpf.c +index 3aabf44d1abf8..33c5288e82187 100644 +--- a/lib/bpf.c ++++ b/lib/bpf.c +@@ -432,8 +432,11 @@ static const char *bpf_find_mntpt(const char *fstype, unsigned long magic, + } + } + ++ if (len != PATH_MAX) ++ return NULL; ++ + fp = fopen("/proc/mounts", "r"); +- if (fp == NULL || len != PATH_MAX) ++ if (fp == NULL) + return NULL; + + while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n", +-- +2.20.1 + diff --git a/SOURCES/0101-ifstat-nstat-Check-fdopen-return-value.patch b/SOURCES/0101-ifstat-nstat-Check-fdopen-return-value.patch new file mode 100644 index 0000000..792575a --- /dev/null +++ b/SOURCES/0101-ifstat-nstat-Check-fdopen-return-value.patch @@ -0,0 +1,84 @@ +From 5ae0f31d9c5d40dbf9eaf00435b9df1968109f5e Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:07 +0200 +Subject: [PATCH] ifstat, nstat: Check fdopen() return value + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 6d02518fdc37e + +commit 6d02518fdc37eb12abff67b6f8c741fbd81dce72 +Author: Phil Sutter +Date: Thu Aug 24 11:46:31 2017 +0200 + + ifstat, nstat: Check fdopen() return value + + Prevent passing NULL FILE pointer to fgets() later. + + Fix both tools in a single patch since the code changes are basically + identical. + + Signed-off-by: Phil Sutter +--- + misc/ifstat.c | 16 +++++++++++----- + misc/nstat.c | 16 +++++++++++----- + 2 files changed, 22 insertions(+), 10 deletions(-) + +diff --git a/misc/ifstat.c b/misc/ifstat.c +index 1be21703bf14c..ac3eff6b870a9 100644 +--- a/misc/ifstat.c ++++ b/misc/ifstat.c +@@ -992,12 +992,18 @@ int main(int argc, char *argv[]) + && verify_forging(fd) == 0) { + FILE *sfp = fdopen(fd, "r"); + +- load_raw_table(sfp); +- if (hist_db && source_mismatch) { +- fprintf(stderr, "ifstat: history is stale, ignoring it.\n"); +- hist_db = NULL; ++ if (!sfp) { ++ fprintf(stderr, "ifstat: fdopen failed: %s\n", ++ strerror(errno)); ++ close(fd); ++ } else { ++ load_raw_table(sfp); ++ if (hist_db && source_mismatch) { ++ fprintf(stderr, "ifstat: history is stale, ignoring it.\n"); ++ hist_db = NULL; ++ } ++ fclose(sfp); + } +- fclose(sfp); + } else { + if (fd >= 0) + close(fd); +diff --git a/misc/nstat.c b/misc/nstat.c +index 1212b1f2c8128..a4dd405d43a93 100644 +--- a/misc/nstat.c ++++ b/misc/nstat.c +@@ -706,12 +706,18 @@ int main(int argc, char *argv[]) + && verify_forging(fd) == 0) { + FILE *sfp = fdopen(fd, "r"); + +- load_good_table(sfp); +- if (hist_db && source_mismatch) { +- fprintf(stderr, "nstat: history is stale, ignoring it.\n"); +- hist_db = NULL; ++ if (!sfp) { ++ fprintf(stderr, "nstat: fdopen failed: %s\n", ++ strerror(errno)); ++ close(fd); ++ } else { ++ load_good_table(sfp); ++ if (hist_db && source_mismatch) { ++ fprintf(stderr, "nstat: history is stale, ignoring it.\n"); ++ hist_db = NULL; ++ } ++ fclose(sfp); + } +- fclose(sfp); + } else { + if (fd >= 0) + close(fd); +-- +2.20.1 + diff --git a/SOURCES/0102-tc-q_netem-Don-t-dereference-possibly-NULL-pointer.patch b/SOURCES/0102-tc-q_netem-Don-t-dereference-possibly-NULL-pointer.patch new file mode 100644 index 0000000..4b35e5f --- /dev/null +++ b/SOURCES/0102-tc-q_netem-Don-t-dereference-possibly-NULL-pointer.patch @@ -0,0 +1,46 @@ +From a7329f9d8681bdbd2d8257b152ae6b4959232e67 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:08 +0200 +Subject: [PATCH] tc/q_netem: Don't dereference possibly NULL pointer + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit a754de3ccd937 + +commit a754de3ccd937500940c6fcd0ad043855f56862d +Author: Phil Sutter +Date: Thu Aug 24 11:46:32 2017 +0200 + + tc/q_netem: Don't dereference possibly NULL pointer + + Assuming 'opt' might be NULL, move the call to RTA_PAYLOAD to after the + check since it dereferences its parameter. + + Signed-off-by: Phil Sutter +--- + tc/q_netem.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tc/q_netem.c b/tc/q_netem.c +index 0975ae111de97..5a9e747411e85 100644 +--- a/tc/q_netem.c ++++ b/tc/q_netem.c +@@ -538,7 +538,7 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) + int *ecn = NULL; + struct tc_netem_qopt qopt; + const struct tc_netem_rate *rate = NULL; +- int len = RTA_PAYLOAD(opt) - sizeof(qopt); ++ int len; + __u64 rate64 = 0; + + SPRINT_BUF(b1); +@@ -546,6 +546,7 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) + if (opt == NULL) + return 0; + ++ len = RTA_PAYLOAD(opt) - sizeof(qopt); + if (len < 0) { + fprintf(stderr, "options size error\n"); + return -1; +-- +2.20.1 + diff --git a/SOURCES/0103-tc-tc_filter-Make-sure-filter-name-is-not-empty.patch b/SOURCES/0103-tc-tc_filter-Make-sure-filter-name-is-not-empty.patch new file mode 100644 index 0000000..d74eae8 --- /dev/null +++ b/SOURCES/0103-tc-tc_filter-Make-sure-filter-name-is-not-empty.patch @@ -0,0 +1,39 @@ +From a4c190565a85db814ad1185ada5382e7fb8707a0 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:08 +0200 +Subject: [PATCH] tc/tc_filter: Make sure filter name is not empty + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 75716932a0af2 + +commit 75716932a0af28da207aa57c212794ab28ce9036 +Author: Phil Sutter +Date: Thu Aug 24 11:46:33 2017 +0200 + + tc/tc_filter: Make sure filter name is not empty + + The later check for 'k[0] != 0' requires a non-empty filter name, + otherwise NULL pointer dereference in 'q' might happen. + + Signed-off-by: Phil Sutter +--- + tc/tc_filter.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/tc/tc_filter.c b/tc/tc_filter.c +index e640492b25ba6..a6bb73d12eaba 100644 +--- a/tc/tc_filter.c ++++ b/tc/tc_filter.c +@@ -380,6 +380,9 @@ static int tc_filter_get(int cmd, unsigned int flags, int argc, char **argv) + usage(); + return 0; + } else { ++ if (!**argv) ++ invarg("invalid filter name", *argv); ++ + strncpy(k, *argv, sizeof(k)-1); + + q = get_filter_kind(k); +-- +2.20.1 + diff --git a/SOURCES/0104-tipc-bearer-Prevent-NULL-pointer-dereference.patch b/SOURCES/0104-tipc-bearer-Prevent-NULL-pointer-dereference.patch new file mode 100644 index 0000000..d4530f3 --- /dev/null +++ b/SOURCES/0104-tipc-bearer-Prevent-NULL-pointer-dereference.patch @@ -0,0 +1,35 @@ +From 73b2d3ee4bbdbfba7db035d9b89a2bcffc15e1ba Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:08 +0200 +Subject: [PATCH] tipc/bearer: Prevent NULL pointer dereference + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 70a6df3962b84 + +commit 70a6df3962b8448fc9c28d72606828a004ed5b6b +Author: Phil Sutter +Date: Thu Aug 24 11:46:34 2017 +0200 + + tipc/bearer: Prevent NULL pointer dereference + + Signed-off-by: Phil Sutter +--- + tipc/bearer.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tipc/bearer.c b/tipc/bearer.c +index c3d4491f8f6ef..0d84570150624 100644 +--- a/tipc/bearer.c ++++ b/tipc/bearer.c +@@ -439,7 +439,7 @@ static int cmd_bearer_enable(struct nlmsghdr *nlh, const struct cmd *cmd, + return err; + + opt = get_opt(opts, "media"); +- if (strcmp(opt->val, "udp") == 0) { ++ if (opt && strcmp(opt->val, "udp") == 0) { + err = nl_add_udp_enable_opts(nlh, opts, cmdl); + if (err) + return err; +-- +2.20.1 + diff --git a/SOURCES/0105-ipntable-Avoid-memory-allocation-for-filter.name.patch b/SOURCES/0105-ipntable-Avoid-memory-allocation-for-filter.name.patch new file mode 100644 index 0000000..da866f9 --- /dev/null +++ b/SOURCES/0105-ipntable-Avoid-memory-allocation-for-filter.name.patch @@ -0,0 +1,58 @@ +From c176919cbf8f11f666c2281785e58fd147ecfea0 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:08 +0200 +Subject: [PATCH] ipntable: Avoid memory allocation for filter.name + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 45c2ec9e95fef + +commit 45c2ec9e95fef8eb6f0807d9a7e5f14c14313c7e +Author: Phil Sutter +Date: Thu Aug 24 11:51:45 2017 +0200 + + ipntable: Avoid memory allocation for filter.name + + The original issue was that filter.name might end up unterminated if + user provided string was too long. But in fact it is not necessary to + copy the commandline parameter at all: just make filter.name point to it + instead. + + Signed-off-by: Phil Sutter +--- + ip/ipntable.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/ip/ipntable.c b/ip/ipntable.c +index ae8c74ead2cb8..2f72c989f35df 100644 +--- a/ip/ipntable.c ++++ b/ip/ipntable.c +@@ -37,7 +37,7 @@ static struct + int family; + int index; + #define NONE_DEV (-1) +- char name[1024]; ++ const char *name; + } filter; + + static void usage(void) __attribute__((noreturn)); +@@ -367,7 +367,7 @@ static int print_ntable(const struct sockaddr_nl *who, struct nlmsghdr *n, void + if (tb[NDTA_NAME]) { + const char *name = rta_getattr_str(tb[NDTA_NAME]); + +- if (strlen(filter.name) > 0 && strcmp(filter.name, name)) ++ if (filter.name && strcmp(filter.name, name)) + return 0; + } + if (tb[NDTA_PARMS]) { +@@ -631,7 +631,7 @@ static int ipntable_show(int argc, char **argv) + } else if (strcmp(*argv, "name") == 0) { + NEXT_ARG(); + +- strncpy(filter.name, *argv, sizeof(filter.name)); ++ filter.name = *argv; + } else + invarg("unknown", *argv); + +-- +2.20.1 + diff --git a/SOURCES/0106-lib-fs-Fix-format-string-in-find_fs_mount.patch b/SOURCES/0106-lib-fs-Fix-format-string-in-find_fs_mount.patch new file mode 100644 index 0000000..59d8032 --- /dev/null +++ b/SOURCES/0106-lib-fs-Fix-format-string-in-find_fs_mount.patch @@ -0,0 +1,39 @@ +From dfc6dc25fcc666ed3fa938bca5ccd87d6cf4a99e Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:08 +0200 +Subject: [PATCH] lib/fs: Fix format string in find_fs_mount() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit eab450789829e + +commit eab450789829e33a64dbd08dced3438d580d5179 +Author: Phil Sutter +Date: Thu Aug 24 11:51:46 2017 +0200 + + lib/fs: Fix format string in find_fs_mount() + + A field width of 4096 allows fscanf() to store that amount of characters + into the given buffer, though that doesn't include the terminating NULL + byte. Decrease the value by one to leave space for it. + + Signed-off-by: Phil Sutter +--- + lib/fs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/fs.c b/lib/fs.c +index c59ac564581d0..1ff881ecfcd8c 100644 +--- a/lib/fs.c ++++ b/lib/fs.c +@@ -45,7 +45,7 @@ static char *find_fs_mount(const char *fs_to_find) + return NULL; + } + +- while (fscanf(fp, "%*s %4096s %127s %*s %*d %*d\n", ++ while (fscanf(fp, "%*s %4095s %127s %*s %*d %*d\n", + path, fstype) == 2) { + if (strcmp(fstype, fs_to_find) == 0) { + mnt = strdup(path); +-- +2.20.1 + diff --git a/SOURCES/0107-lib-inet_proto-Review-inet_proto_-a2n-n2a.patch b/SOURCES/0107-lib-inet_proto-Review-inet_proto_-a2n-n2a.patch new file mode 100644 index 0000000..3bce03d --- /dev/null +++ b/SOURCES/0107-lib-inet_proto-Review-inet_proto_-a2n-n2a.patch @@ -0,0 +1,90 @@ +From e47b57df11565c51b9d8a5307a63d93f8e9a061b Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:08 +0200 +Subject: [PATCH] lib/inet_proto: Review inet_proto_{a2n,n2a}() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit cfda500a7d808 + +commit cfda500a7d808a6e0f3eca47abd75c22cfe716e5 +Author: Phil Sutter +Date: Thu Aug 24 11:51:47 2017 +0200 + + lib/inet_proto: Review inet_proto_{a2n,n2a}() + + The original intent was to make sure strings written by those functions + are NUL-terminated at all times, though it was suggested to get rid of + the 15 char protocol name limit as well which this patch accomplishes. + + In addition to that, simplify inet_proto_a2n() a bit: Use the error + checking in get_u8() to find out whether passed 'buf' contains a valid + decimal number instead of checking the first character's value manually. + + Signed-off-by: Phil Sutter +--- + lib/inet_proto.c | 24 +++++++++++++----------- + 1 file changed, 13 insertions(+), 11 deletions(-) + +diff --git a/lib/inet_proto.c b/lib/inet_proto.c +index ceda082b12a2e..53c029039b6d5 100644 +--- a/lib/inet_proto.c ++++ b/lib/inet_proto.c +@@ -25,7 +25,7 @@ + + const char *inet_proto_n2a(int proto, char *buf, int len) + { +- static char ncache[16]; ++ static char *ncache; + static int icache = -1; + struct protoent *pe; + +@@ -34,9 +34,12 @@ const char *inet_proto_n2a(int proto, char *buf, int len) + + pe = getprotobynumber(proto); + if (pe) { ++ if (icache != -1) ++ free(ncache); + icache = proto; +- strncpy(ncache, pe->p_name, 16); +- strncpy(buf, pe->p_name, len); ++ ncache = strdup(pe->p_name); ++ strncpy(buf, pe->p_name, len - 1); ++ buf[len - 1] = '\0'; + return buf; + } + snprintf(buf, len, "ipproto-%d", proto); +@@ -45,24 +48,23 @@ const char *inet_proto_n2a(int proto, char *buf, int len) + + int inet_proto_a2n(const char *buf) + { +- static char ncache[16]; ++ static char *ncache; + static int icache = -1; + struct protoent *pe; ++ __u8 ret; + +- if (icache>=0 && strcmp(ncache, buf) == 0) ++ if (icache != -1 && strcmp(ncache, buf) == 0) + return icache; + +- if (buf[0] >= '0' && buf[0] <= '9') { +- __u8 ret; +- if (get_u8(&ret, buf, 10)) +- return -1; ++ if (!get_u8(&ret, buf, 10)) + return ret; +- } + + pe = getprotobyname(buf); + if (pe) { ++ if (icache != -1) ++ free(ncache); + icache = pe->p_proto; +- strncpy(ncache, pe->p_name, 16); ++ ncache = strdup(pe->p_name); + return pe->p_proto; + } + return -1; +-- +2.20.1 + diff --git a/SOURCES/0108-lnstat_util-Simplify-alloc_and_open-a-bit.patch b/SOURCES/0108-lnstat_util-Simplify-alloc_and_open-a-bit.patch new file mode 100644 index 0000000..4a7541c --- /dev/null +++ b/SOURCES/0108-lnstat_util-Simplify-alloc_and_open-a-bit.patch @@ -0,0 +1,44 @@ +From 5cdf4d78d15b127d0f4a7a09e4700d7df16dda19 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:08 +0200 +Subject: [PATCH] lnstat_util: Simplify alloc_and_open() a bit + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit bc27878d21909 + +commit bc27878d21909b110dd21eea0c3505d023f29dc2 +Author: Phil Sutter +Date: Thu Aug 24 11:51:48 2017 +0200 + + lnstat_util: Simplify alloc_and_open() a bit + + Relying upon callers and using unsafe strcpy() is probably not the best + idea. Aside from that, using snprintf() allows to format the string for + lf->path in one go. + + Signed-off-by: Phil Sutter +--- + misc/lnstat_util.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +diff --git a/misc/lnstat_util.c b/misc/lnstat_util.c +index cc54598fe1bef..ec19238c24b94 100644 +--- a/misc/lnstat_util.c ++++ b/misc/lnstat_util.c +@@ -180,11 +180,8 @@ static struct lnstat_file *alloc_and_open(const char *path, const char *file) + } + + /* initialize */ +- /* de->d_name is guaranteed to be <= NAME_MAX */ +- strcpy(lf->basename, file); +- strcpy(lf->path, path); +- strcat(lf->path, "/"); +- strcat(lf->path, lf->basename); ++ snprintf(lf->basename, sizeof(lf->basename), "%s", file); ++ snprintf(lf->path, sizeof(lf->path), "%s/%s", path, file); + + /* initialize to default */ + lf->interval.tv_sec = 1; +-- +2.20.1 + diff --git a/SOURCES/0109-tc-m_xt-Fix-for-potential-string-buffer-overflows.patch b/SOURCES/0109-tc-m_xt-Fix-for-potential-string-buffer-overflows.patch new file mode 100644 index 0000000..c411268 --- /dev/null +++ b/SOURCES/0109-tc-m_xt-Fix-for-potential-string-buffer-overflows.patch @@ -0,0 +1,55 @@ +From 8ac8129d710b8a084ce213791874330aa30ec70e Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:08 +0200 +Subject: [PATCH] tc/m_xt: Fix for potential string buffer overflows + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 56270e54661e8 + +commit 56270e54661e8ca51d4b3661b9f9bb12a0a40d95 +Author: Phil Sutter +Date: Thu Aug 24 11:51:49 2017 +0200 + + tc/m_xt: Fix for potential string buffer overflows + + - Use strncpy() when writing to target->t->u.user.name and make sure the + final byte remains untouched (xtables_calloc() set it to zero). + - 'tname' length sanitization was completely wrong: If it's length + exceeded the 16 bytes available in 'k', passing a length value of 16 + to strncpy() would overwrite the previously NULL'ed 'k[15]'. Also, the + sanitization has to happen if 'tname' is exactly 16 bytes long as + well. + + Signed-off-by: Phil Sutter +--- + tc/m_xt.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/tc/m_xt.c b/tc/m_xt.c +index ad52d239caf61..9218b14594403 100644 +--- a/tc/m_xt.c ++++ b/tc/m_xt.c +@@ -95,7 +95,8 @@ build_st(struct xtables_target *target, struct xt_entry_target *t) + if (t == NULL) { + target->t = xtables_calloc(1, size); + target->t->u.target_size = size; +- strcpy(target->t->u.user.name, target->name); ++ strncpy(target->t->u.user.name, target->name, ++ sizeof(target->t->u.user.name) - 1); + target->t->u.user.revision = target->revision; + + if (target->init != NULL) +@@ -277,8 +278,8 @@ static int parse_ipt(struct action_util *a, int *argc_p, + } + fprintf(stdout, " index %d\n", index); + +- if (strlen(tname) > 16) { +- size = 16; ++ if (strlen(tname) >= 16) { ++ size = 15; + k[15] = 0; + } else { + size = 1 + strlen(tname); +-- +2.20.1 + diff --git a/SOURCES/0110-lib-ll_map-Choose-size-of-new-cache-items-at-run-tim.patch b/SOURCES/0110-lib-ll_map-Choose-size-of-new-cache-items-at-run-tim.patch new file mode 100644 index 0000000..3ea22e5 --- /dev/null +++ b/SOURCES/0110-lib-ll_map-Choose-size-of-new-cache-items-at-run-tim.patch @@ -0,0 +1,48 @@ +From 6ab89ff96d59c90cd6227399a065d52cc38e0ee7 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:08 +0200 +Subject: [PATCH] lib/ll_map: Choose size of new cache items at run-time + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 4b9e91782269f + +commit 4b9e91782269fc871d158ed4f11bfcfe4e3b8bf7 +Author: Phil Sutter +Date: Thu Aug 24 11:51:50 2017 +0200 + + lib/ll_map: Choose size of new cache items at run-time + + Instead of having a fixed buffer of 16 bytes for the interface name, + tailor size of new ll_cache entry using the interface name's actual + length. This also makes sure the following call to strcpy() is safe. + + Signed-off-by: Phil Sutter +--- + lib/ll_map.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/lib/ll_map.c b/lib/ll_map.c +index 4e4556c9ac80b..70684b02042b6 100644 +--- a/lib/ll_map.c ++++ b/lib/ll_map.c +@@ -30,7 +30,7 @@ struct ll_cache { + unsigned flags; + unsigned index; + unsigned short type; +- char name[IFNAMSIZ]; ++ char name[]; + }; + + #define IDXMAP_SIZE 1024 +@@ -120,7 +120,7 @@ int ll_remember_index(const struct sockaddr_nl *who, + return 0; + } + +- im = malloc(sizeof(*im)); ++ im = malloc(sizeof(*im) + strlen(ifname) + 1); + if (im == NULL) + return 0; + im->index = ifi->ifi_index; +-- +2.20.1 + diff --git a/SOURCES/0111-ss-Make-struct-tcpstat-fields-timer-and-timeout-unsi.patch b/SOURCES/0111-ss-Make-struct-tcpstat-fields-timer-and-timeout-unsi.patch new file mode 100644 index 0000000..0561037 --- /dev/null +++ b/SOURCES/0111-ss-Make-struct-tcpstat-fields-timer-and-timeout-unsi.patch @@ -0,0 +1,58 @@ +From ca6a2e6f21fc48b494216a095f5bd792a0c6e35d Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:08 +0200 +Subject: [PATCH] ss: Make struct tcpstat fields 'timer' and 'timeout' unsigned + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 4cbf5224f2b50 + +commit 4cbf5224f2b50a24e1873508e7a0f1f81cc81a81 +Author: Phil Sutter +Date: Thu Aug 24 11:41:26 2017 +0200 + + ss: Make struct tcpstat fields 'timer' and 'timeout' unsigned + + Both 'timer' and 'timeout' variables of struct tcpstat are either + scanned as unsigned values from /proc/net/tcp{,6} or copied from + 'idiag_timer' and 'idiag_expries' fields of struct inet_diag_msg, which + itself are unsigned. Therefore they may be unsigned as well, which + eliminates the need to check for negative values. + + Signed-off-by: Phil Sutter +--- + misc/ss.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +diff --git a/misc/ss.c b/misc/ss.c +index 7a38e9d830e8d..2a981d8b06918 100644 +--- a/misc/ss.c ++++ b/misc/ss.c +@@ -716,8 +716,8 @@ struct dctcpstat { + + struct tcpstat { + struct sockstat ss; +- int timer; +- int timeout; ++ unsigned int timer; ++ unsigned int timeout; + int probes; + char cong_alg[16]; + double rto, ato, rtt, rttvar; +@@ -903,13 +903,11 @@ static void sock_addr_print(const char *addr, char *delim, const char *port, + sock_addr_print_width(addr_width, addr, delim, serv_width, port, ifname); + } + +-static const char *print_ms_timer(int timeout) ++static const char *print_ms_timer(unsigned int timeout) + { + static char buf[64]; + int secs, msecs, minutes; + +- if (timeout < 0) +- timeout = 0; + secs = timeout/1000; + minutes = secs/60; + secs = secs%60; +-- +2.20.1 + diff --git a/SOURCES/0112-ss-Make-sure-scanned-index-value-to-unix_state_map-i.patch b/SOURCES/0112-ss-Make-sure-scanned-index-value-to-unix_state_map-i.patch new file mode 100644 index 0000000..48630b2 --- /dev/null +++ b/SOURCES/0112-ss-Make-sure-scanned-index-value-to-unix_state_map-i.patch @@ -0,0 +1,36 @@ +From f92edf9b3d088bf8a5619073de43b2f693590be8 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:08 +0200 +Subject: [PATCH] ss: Make sure scanned index value to unix_state_map is sane + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 0aa03350c00d7 + +commit 0aa03350c00d70edbbdab0662a2d8262be2bb68d +Author: Phil Sutter +Date: Thu Aug 24 11:41:27 2017 +0200 + + ss: Make sure scanned index value to unix_state_map is sane + + Signed-off-by: Phil Sutter +--- + misc/ss.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/misc/ss.c b/misc/ss.c +index 2a981d8b06918..fdb00a9f3f696 100644 +--- a/misc/ss.c ++++ b/misc/ss.c +@@ -3236,7 +3236,8 @@ static int unix_show(struct filter *f) + + if (flags & (1 << 16)) { + u->state = SS_LISTEN; +- } else { ++ } else if (u->state > 0 && ++ u->state <= ARRAY_SIZE(unix_state_map)) { + u->state = unix_state_map[u->state-1]; + if (u->type == SOCK_DGRAM && u->state == SS_CLOSE && u->rport) + u->state = SS_ESTABLISHED; +-- +2.20.1 + diff --git a/SOURCES/0113-netem-maketable-Check-return-value-of-fscanf.patch b/SOURCES/0113-netem-maketable-Check-return-value-of-fscanf.patch new file mode 100644 index 0000000..bf2b9b5 --- /dev/null +++ b/SOURCES/0113-netem-maketable-Check-return-value-of-fscanf.patch @@ -0,0 +1,37 @@ +From d533a60518e79593c6a1813a6f44aa3889045120 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:08 +0200 +Subject: [PATCH] netem/maketable: Check return value of fscanf() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 92963d136de8c + +commit 92963d136de8c370324716add98888b2ce6e6a94 +Author: Phil Sutter +Date: Thu Aug 24 11:41:28 2017 +0200 + + netem/maketable: Check return value of fscanf() + + Signed-off-by: Phil Sutter +--- + netem/maketable.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/netem/maketable.c b/netem/maketable.c +index ad660e7d457f0..ccb8f0c68b062 100644 +--- a/netem/maketable.c ++++ b/netem/maketable.c +@@ -38,8 +38,8 @@ readdoubles(FILE *fp, int *number) + } + + for (i=0; i +Date: Mon, 29 Apr 2019 20:08:08 +0200 +Subject: [PATCH] lib/bpf: Check return value of write() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit b5c78e1b2c868 + +commit b5c78e1b2c8681e82684f47563acd3d383893658 +Author: Phil Sutter +Date: Thu Aug 24 11:41:29 2017 +0200 + + lib/bpf: Check return value of write() + + This is merely to silence the compiler warning. If write to stderr + failed, assume that printing an error message will fail as well so don't + even try. + + Signed-off-by: Phil Sutter +--- + lib/bpf.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/lib/bpf.c b/lib/bpf.c +index 33c5288e82187..7eb754ad7cb56 100644 +--- a/lib/bpf.c ++++ b/lib/bpf.c +@@ -486,7 +486,8 @@ int bpf_trace_pipe(void) + + ret = read(fd, buff, sizeof(buff) - 1); + if (ret > 0) { +- write(2, buff, ret); ++ if (write(STDERR_FILENO, buff, ret) != ret) ++ return -1; + fflush(stderr); + } + } +-- +2.20.1 + diff --git a/SOURCES/0115-lib-fs-Fix-and-simplify-make_path.patch b/SOURCES/0115-lib-fs-Fix-and-simplify-make_path.patch new file mode 100644 index 0000000..3137352 --- /dev/null +++ b/SOURCES/0115-lib-fs-Fix-and-simplify-make_path.patch @@ -0,0 +1,65 @@ +From 7ab899539b920609712ad24f871b50a19fd8189f Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:08 +0200 +Subject: [PATCH] lib/fs: Fix and simplify make_path() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit ac3415f5c1b1d + +commit ac3415f5c1b1df2d6a4bf770ad52e2e14c09e58e +Author: Phil Sutter +Date: Thu Aug 24 11:41:30 2017 +0200 + + lib/fs: Fix and simplify make_path() + + Calling stat() before mkdir() is racey: The entry might change in + between. Also, the call to stat() seems to exist only to check if the + directory exists already. So simply call mkdir() unconditionally and + catch only errors other than EEXIST. + + Signed-off-by: Phil Sutter +--- + lib/fs.c | 20 +++++--------------- + 1 file changed, 5 insertions(+), 15 deletions(-) + +diff --git a/lib/fs.c b/lib/fs.c +index 1ff881ecfcd8c..ebe05cd44e11b 100644 +--- a/lib/fs.c ++++ b/lib/fs.c +@@ -102,7 +102,6 @@ out: + int make_path(const char *path, mode_t mode) + { + char *dir, *delim; +- struct stat sbuf; + int rc = -1; + + delim = dir = strdup(path); +@@ -120,20 +119,11 @@ int make_path(const char *path, mode_t mode) + if (delim) + *delim = '\0'; + +- if (stat(dir, &sbuf) != 0) { +- if (errno != ENOENT) { +- fprintf(stderr, +- "stat failed for %s: %s\n", +- dir, strerror(errno)); +- goto out; +- } +- +- if (mkdir(dir, mode) != 0) { +- fprintf(stderr, +- "mkdir failed for %s: %s\n", +- dir, strerror(errno)); +- goto out; +- } ++ rc = mkdir(dir, mode); ++ if (mkdir(dir, mode) != 0 && errno != EEXIST) { ++ fprintf(stderr, "mkdir failed for %s: %s\n", ++ dir, strerror(errno)); ++ goto out; + } + + if (delim == NULL) +-- +2.20.1 + diff --git a/SOURCES/0116-lib-libnetlink-Don-t-pass-NULL-parameter-to-memcpy.patch b/SOURCES/0116-lib-libnetlink-Don-t-pass-NULL-parameter-to-memcpy.patch new file mode 100644 index 0000000..dcbfc2f --- /dev/null +++ b/SOURCES/0116-lib-libnetlink-Don-t-pass-NULL-parameter-to-memcpy.patch @@ -0,0 +1,49 @@ +From 8af39fdff4f966d00571bda2610eac8fae2f7482 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:08 +0200 +Subject: [PATCH] lib/libnetlink: Don't pass NULL parameter to memcpy() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 893deac4c43b5 + +commit 893deac4c43b57ae49f736ec050724b6de181062 +Author: Phil Sutter +Date: Thu Aug 24 11:41:31 2017 +0200 + + lib/libnetlink: Don't pass NULL parameter to memcpy() + + Both addattr_l() and rta_addattr_l() may be called with NULL data + pointer and 0 alen parameters. Avoid calling memcpy() in that case. + + Signed-off-by: Phil Sutter +--- + lib/libnetlink.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/lib/libnetlink.c b/lib/libnetlink.c +index 75e20abf0b97f..ff26ddf50552b 100644 +--- a/lib/libnetlink.c ++++ b/lib/libnetlink.c +@@ -898,7 +898,8 @@ int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data, + rta = NLMSG_TAIL(n); + rta->rta_type = type; + rta->rta_len = len; +- memcpy(RTA_DATA(rta), data, alen); ++ if (alen) ++ memcpy(RTA_DATA(rta), data, alen); + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len); + return 0; + } +@@ -985,7 +986,8 @@ int rta_addattr_l(struct rtattr *rta, int maxlen, int type, + subrta = (struct rtattr *)(((char *)rta) + RTA_ALIGN(rta->rta_len)); + subrta->rta_type = type; + subrta->rta_len = len; +- memcpy(RTA_DATA(subrta), data, alen); ++ if (alen) ++ memcpy(RTA_DATA(subrta), data, alen); + rta->rta_len = NLMSG_ALIGN(rta->rta_len) + RTA_ALIGN(len); + return 0; + } +-- +2.20.1 + diff --git a/SOURCES/0117-utils-Implement-strlcpy-and-strlcat.patch b/SOURCES/0117-utils-Implement-strlcpy-and-strlcat.patch new file mode 100644 index 0000000..660b433 --- /dev/null +++ b/SOURCES/0117-utils-Implement-strlcpy-and-strlcat.patch @@ -0,0 +1,65 @@ +From 3bcdea42e7402e79a914fe3cbefdcc1caa89464c Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:08 +0200 +Subject: [PATCH] utils: Implement strlcpy() and strlcat() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 8d15e012a3227 + +commit 8d15e012a3227d79295cd95582bb6d8a6f0bdc92 +Author: Phil Sutter +Date: Fri Sep 1 18:52:51 2017 +0200 + + utils: Implement strlcpy() and strlcat() + + By making use of strncpy(), both implementations are really simple so + there is no need to add libbsd as additional dependency. + + Signed-off-by: Phil Sutter +--- + include/utils.h | 3 +++ + lib/utils.c | 19 +++++++++++++++++++ + 2 files changed, 22 insertions(+) + +diff --git a/include/utils.h b/include/utils.h +index d707a9dacdb85..d596a6fc10574 100644 +--- a/include/utils.h ++++ b/include/utils.h +@@ -264,4 +264,7 @@ int make_path(const char *path, mode_t mode); + char *find_cgroup2_mount(void); + int get_command_name(const char *pid, char *comm, size_t len); + ++size_t strlcpy(char *dst, const char *src, size_t size); ++size_t strlcat(char *dst, const char *src, size_t size); ++ + #endif /* __UTILS_H__ */ +diff --git a/lib/utils.c b/lib/utils.c +index fc9c575ba0c7d..c9ba2f332c2a7 100644 +--- a/lib/utils.c ++++ b/lib/utils.c +@@ -1228,3 +1228,22 @@ int get_real_family(int rtm_type, int rtm_family) + + return rtm_family; + } ++ ++size_t strlcpy(char *dst, const char *src, size_t size) ++{ ++ if (size) { ++ strncpy(dst, src, size - 1); ++ dst[size - 1] = '\0'; ++ } ++ return strlen(src); ++} ++ ++size_t strlcat(char *dst, const char *src, size_t size) ++{ ++ size_t dlen = strlen(dst); ++ ++ if (dlen > size) ++ return dlen + strlen(src); ++ ++ return dlen + strlcpy(dst + dlen, src, size - dlen); ++} +-- +2.20.1 + diff --git a/SOURCES/0118-Convert-the-obvious-cases-to-strlcpy.patch b/SOURCES/0118-Convert-the-obvious-cases-to-strlcpy.patch new file mode 100644 index 0000000..200a2d8 --- /dev/null +++ b/SOURCES/0118-Convert-the-obvious-cases-to-strlcpy.patch @@ -0,0 +1,135 @@ +From e557cf7984d2f06aff158a9089e714e5f445d3ac Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:08:08 +0200 +Subject: [PATCH] Convert the obvious cases to strlcpy() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 18f156bfecda2 +Conflicts: +* on iproute_lwtunnel.c, due to missing commit + e8493916a8ede ("iproute: add support for SR-IPv6 lwtunnel encapsulation") +* on lib/bpf.c, due to missing commit + 95ae9a4870e7d ("bpf: fix mnt path when from env") + fix bpf_find_mntpt() in this case, instead. + +commit 18f156bfecda20166c2fb543ba8c9c6559edef9c +Author: Phil Sutter +Date: Fri Sep 1 18:52:52 2017 +0200 + + Convert the obvious cases to strlcpy() + + This converts the typical idiom of manually terminating the buffer after + a call to strncpy(). + + Signed-off-by: Phil Sutter +--- + ip/ipnetns.c | 3 +-- + ip/ipvrf.c | 3 +-- + lib/bpf.c | 3 +-- + lib/fs.c | 3 +-- + lib/inet_proto.c | 3 +-- + misc/ss.c | 3 +-- + tc/em_ipset.c | 3 +-- + 7 files changed, 7 insertions(+), 14 deletions(-) + +diff --git a/ip/ipnetns.c b/ip/ipnetns.c +index 1c0ade90dee5e..427b59c57381d 100644 +--- a/ip/ipnetns.c ++++ b/ip/ipnetns.c +@@ -523,8 +523,7 @@ int netns_identify_pid(const char *pidstr, char *name, int len) + + if ((st.st_dev == netst.st_dev) && + (st.st_ino == netst.st_ino)) { +- strncpy(name, entry->d_name, len - 1); +- name[len - 1] = '\0'; ++ strlcpy(name, entry->d_name, len); + } + } + closedir(dir); +diff --git a/ip/ipvrf.c b/ip/ipvrf.c +index ae3b48fa81996..f58c8df728265 100644 +--- a/ip/ipvrf.c ++++ b/ip/ipvrf.c +@@ -333,8 +333,7 @@ static int vrf_path(char *vpath, size_t len) + if (vrf) + *vrf = '\0'; + +- strncpy(vpath, start, len - 1); +- vpath[len - 1] = '\0'; ++ strlcpy(vpath, start, len); + + /* if vrf path is just / then return nothing */ + if (!strcmp(vpath, "/")) +diff --git a/lib/bpf.c b/lib/bpf.c +index 7eb754ad7cb56..e072cba214067 100644 +--- a/lib/bpf.c ++++ b/lib/bpf.c +@@ -424,8 +424,7 @@ static const char *bpf_find_mntpt(const char *fstype, unsigned long magic, + ptr = known_mnts; + while (*ptr) { + if (bpf_valid_mntpt(*ptr, magic) == 0) { +- strncpy(mnt, *ptr, len - 1); +- mnt[len - 1] = 0; ++ strlcpy(mnt, *ptr, len); + return mnt; + } + ptr++; +diff --git a/lib/fs.c b/lib/fs.c +index ebe05cd44e11b..86efd4ed2ed80 100644 +--- a/lib/fs.c ++++ b/lib/fs.c +@@ -172,8 +172,7 @@ int get_command_name(const char *pid, char *comm, size_t len) + if (nl) + *nl = '\0'; + +- strncpy(comm, name, len - 1); +- comm[len - 1] = '\0'; ++ strlcpy(comm, name, len); + break; + } + +diff --git a/lib/inet_proto.c b/lib/inet_proto.c +index 53c029039b6d5..bdfd52fdafe5a 100644 +--- a/lib/inet_proto.c ++++ b/lib/inet_proto.c +@@ -38,8 +38,7 @@ const char *inet_proto_n2a(int proto, char *buf, int len) + free(ncache); + icache = proto; + ncache = strdup(pe->p_name); +- strncpy(buf, pe->p_name, len - 1); +- buf[len - 1] = '\0'; ++ strlcpy(buf, pe->p_name, len); + return buf; + } + snprintf(buf, len, "ipproto-%d", proto); +diff --git a/misc/ss.c b/misc/ss.c +index fdb00a9f3f696..6aaae1b5390e4 100644 +--- a/misc/ss.c ++++ b/misc/ss.c +@@ -444,8 +444,7 @@ static void user_ent_hash_build(void) + + user_ent_hash_build_init = 1; + +- strncpy(name, root, sizeof(name)-1); +- name[sizeof(name)-1] = 0; ++ strlcpy(name, root, sizeof(name)); + + if (strlen(name) == 0 || name[strlen(name)-1] != '/') + strcat(name, "/"); +diff --git a/tc/em_ipset.c b/tc/em_ipset.c +index b59756515d239..48b287f5ba3b2 100644 +--- a/tc/em_ipset.c ++++ b/tc/em_ipset.c +@@ -145,8 +145,7 @@ get_set_byname(const char *setname, struct xt_set_info *info) + int res; + + req.op = IP_SET_OP_GET_BYNAME; +- strncpy(req.set.name, setname, IPSET_MAXNAMELEN); +- req.set.name[IPSET_MAXNAMELEN - 1] = '\0'; ++ strlcpy(req.set.name, setname, IPSET_MAXNAMELEN); + res = do_getsockopt(&req); + if (res != 0) + return -1; +-- +2.20.1 + diff --git a/SOURCES/0119-Convert-harmful-calls-to-strncpy-to-strlcpy.patch b/SOURCES/0119-Convert-harmful-calls-to-strncpy-to-strlcpy.patch new file mode 100644 index 0000000..ee6b156 --- /dev/null +++ b/SOURCES/0119-Convert-harmful-calls-to-strncpy-to-strlcpy.patch @@ -0,0 +1,66 @@ +From 9556150792daf8f2fbea934bcb77b4b74a21b2e1 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:09:12 +0200 +Subject: [PATCH] Convert harmful calls to strncpy() to strlcpy() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 532b8874fe545 + +commit 532b8874fe545acaa8d45c4dd3b54b8f3bb41d9f +Author: Phil Sutter +Date: Fri Sep 1 18:52:53 2017 +0200 + + Convert harmful calls to strncpy() to strlcpy() + + This patch converts spots where manual buffer termination was missing to + strlcpy() since that does what is needed. + + Signed-off-by: Phil Sutter +--- + genl/ctrl.c | 2 +- + ip/ipvrf.c | 2 +- + ip/xfrm_state.c | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/genl/ctrl.c b/genl/ctrl.c +index 21e857cfcfc25..a6d31b04e5679 100644 +--- a/genl/ctrl.c ++++ b/genl/ctrl.c +@@ -317,7 +317,7 @@ static int ctrl_list(int cmd, int argc, char **argv) + + if (matches(*argv, "name") == 0) { + NEXT_ARG(); +- strncpy(d, *argv, sizeof (d) - 1); ++ strlcpy(d, *argv, sizeof(d)); + addattr_l(nlh, 128, CTRL_ATTR_FAMILY_NAME, + d, strlen(d) + 1); + } else if (matches(*argv, "id") == 0) { +diff --git a/ip/ipvrf.c b/ip/ipvrf.c +index f58c8df728265..406cddbcd44ca 100644 +--- a/ip/ipvrf.c ++++ b/ip/ipvrf.c +@@ -71,7 +71,7 @@ static int vrf_identify(pid_t pid, char *name, size_t len) + if (end) + *end = '\0'; + +- strncpy(name, vrf, len - 1); ++ strlcpy(name, vrf, len); + break; + } + } +diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c +index 04ed3492ad3b5..2222737cdd98d 100644 +--- a/ip/xfrm_state.c ++++ b/ip/xfrm_state.c +@@ -123,7 +123,7 @@ static int xfrm_algo_parse(struct xfrm_algo *alg, enum xfrm_attr_type_t type, + fprintf(stderr, "warning: ALGO-NAME/ALGO-KEYMAT values will be sent to the kernel promiscuously! (verifying them isn't implemented yet)\n"); + #endif + +- strncpy(alg->alg_name, name, sizeof(alg->alg_name)); ++ strlcpy(alg->alg_name, name, sizeof(alg->alg_name)); + + if (slen > 2 && strncmp(key, "0x", 2) == 0) { + /* split two chars "0x" from the top */ +-- +2.20.1 + diff --git a/SOURCES/0120-ipxfrm-Replace-STRBUF_CAT-macro-with-strlcat.patch b/SOURCES/0120-ipxfrm-Replace-STRBUF_CAT-macro-with-strlcat.patch new file mode 100644 index 0000000..ea251a2 --- /dev/null +++ b/SOURCES/0120-ipxfrm-Replace-STRBUF_CAT-macro-with-strlcat.patch @@ -0,0 +1,75 @@ +From 65d69021e5b8998cec1e7a13b8b297bfc606f9fd Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:09:12 +0200 +Subject: [PATCH] ipxfrm: Replace STRBUF_CAT macro with strlcat() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 44cc6c792a650 + +commit 44cc6c792a6503e024f042c65f35cd44b3283b20 +Author: Phil Sutter +Date: Fri Sep 1 18:52:54 2017 +0200 + + ipxfrm: Replace STRBUF_CAT macro with strlcat() + + Signed-off-by: Phil Sutter +--- + ip/ipxfrm.c | 21 +++++---------------- + 1 file changed, 5 insertions(+), 16 deletions(-) + +diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c +index b0cfac178f8bc..df72a0c0bf88e 100644 +--- a/ip/ipxfrm.c ++++ b/ip/ipxfrm.c +@@ -40,17 +40,6 @@ + #include "ip_common.h" + + #define STRBUF_SIZE (128) +-#define STRBUF_CAT(buf, str) \ +- do { \ +- int rest = sizeof(buf) - 1 - strlen(buf); \ +- if (rest > 0) { \ +- int len = strlen(str); \ +- if (len > rest) \ +- len = rest; \ +- strncat(buf, str, len); \ +- buf[sizeof(buf) - 1] = '\0'; \ +- } \ +- } while (0); + + struct xfrm_filter filter; + +@@ -883,8 +872,8 @@ void xfrm_state_info_print(struct xfrm_usersa_info *xsinfo, + prefix, title); + + if (prefix) +- STRBUF_CAT(buf, prefix); +- STRBUF_CAT(buf, "\t"); ++ strlcat(buf, prefix, sizeof(buf)); ++ strlcat(buf, "\t", sizeof(buf)); + + fputs(buf, fp); + fprintf(fp, "replay-window %u ", xsinfo->replay_window); +@@ -925,7 +914,7 @@ void xfrm_state_info_print(struct xfrm_usersa_info *xsinfo, + char sbuf[STRBUF_SIZE]; + + memcpy(sbuf, buf, sizeof(sbuf)); +- STRBUF_CAT(sbuf, "sel "); ++ strlcat(sbuf, "sel ", sizeof(sbuf)); + + xfrm_selector_print(&xsinfo->sel, xsinfo->family, fp, sbuf); + } +@@ -973,8 +962,8 @@ void xfrm_policy_info_print(struct xfrm_userpolicy_info *xpinfo, + } + + if (prefix) +- STRBUF_CAT(buf, prefix); +- STRBUF_CAT(buf, "\t"); ++ strlcat(buf, prefix, sizeof(buf)); ++ strlcat(buf, "\t", sizeof(buf)); + + fputs(buf, fp); + if (xpinfo->dir >= XFRM_POLICY_MAX) { +-- +2.20.1 + diff --git a/SOURCES/0121-tc_util-No-need-to-terminate-an-snprintf-ed-buffer.patch b/SOURCES/0121-tc_util-No-need-to-terminate-an-snprintf-ed-buffer.patch new file mode 100644 index 0000000..da2efeb --- /dev/null +++ b/SOURCES/0121-tc_util-No-need-to-terminate-an-snprintf-ed-buffer.patch @@ -0,0 +1,37 @@ +From 42b9cc605f54f2a3ad75a29b5f2fc308bfe5fc61 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:09:12 +0200 +Subject: [PATCH] tc_util: No need to terminate an snprintf'ed buffer + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 9376314b49a47 + +commit 9376314b49a47eb42ade3fc0d41cb51438f8dbc6 +Author: Phil Sutter +Date: Fri Sep 1 18:52:55 2017 +0200 + + tc_util: No need to terminate an snprintf'ed buffer + + snprintf() won't leave the buffer unterminated, so manually terminating + is not necessary here. + + Signed-off-by: Phil Sutter +--- + tc/tc_util.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/tc/tc_util.c b/tc/tc_util.c +index 24ca1f1c1c040..296825ae174e0 100644 +--- a/tc/tc_util.c ++++ b/tc/tc_util.c +@@ -430,7 +430,6 @@ const char *action_n2a(int action) + return "stolen"; + default: + snprintf(buf, 64, "%d", action); +- buf[63] = '\0'; + return buf; + } + } +-- +2.20.1 + diff --git a/SOURCES/0122-lnstat_util-Make-sure-buffer-is-NUL-terminated.patch b/SOURCES/0122-lnstat_util-Make-sure-buffer-is-NUL-terminated.patch new file mode 100644 index 0000000..20ccdc3 --- /dev/null +++ b/SOURCES/0122-lnstat_util-Make-sure-buffer-is-NUL-terminated.patch @@ -0,0 +1,40 @@ +From ed508c9ee5991655039d2b080191b1c70680b5c8 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:09:12 +0200 +Subject: [PATCH] lnstat_util: Make sure buffer is NUL-terminated + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit bc4a57b87990b + +commit bc4a57b87990b30c85fdf0efbc1f8f219466daf4 +Author: Phil Sutter +Date: Fri Sep 1 18:52:56 2017 +0200 + + lnstat_util: Make sure buffer is NUL-terminated + + Can't use strlcpy() here since lnstat is not linked against libutil. + + While being at it, fix coding style in that chunk as well. + + Signed-off-by: Phil Sutter +--- + misc/lnstat_util.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/misc/lnstat_util.c b/misc/lnstat_util.c +index ec19238c24b94..c2dc42ec1ff12 100644 +--- a/misc/lnstat_util.c ++++ b/misc/lnstat_util.c +@@ -150,7 +150,8 @@ static int lnstat_scan_compat_rtstat_fields(struct lnstat_file *lf) + { + char buf[FGETS_BUF_SIZE]; + +- strncpy(buf, RTSTAT_COMPAT_LINE, sizeof(buf)-1); ++ strncpy(buf, RTSTAT_COMPAT_LINE, sizeof(buf) - 1); ++ buf[sizeof(buf) - 1] = '\0'; + + return __lnstat_scan_fields(lf, buf); + } +-- +2.20.1 + diff --git a/SOURCES/0123-utils-strlcpy-and-strlcat-don-t-clobber-dst.patch b/SOURCES/0123-utils-strlcpy-and-strlcat-don-t-clobber-dst.patch new file mode 100644 index 0000000..230f3a6 --- /dev/null +++ b/SOURCES/0123-utils-strlcpy-and-strlcat-don-t-clobber-dst.patch @@ -0,0 +1,62 @@ +From 866b995355894ab8f20d22a554d47322dcf1029a Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:09:13 +0200 +Subject: [PATCH] utils: strlcpy() and strlcat() don't clobber dst + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 50ea3c64384b1 + +commit 50ea3c64384b1d1bfa9c96de86c21ac8e9fef183 +Author: Phil Sutter +Date: Wed Sep 6 18:51:42 2017 +0200 + + utils: strlcpy() and strlcat() don't clobber dst + + As David Laight correctly pointed out, the first version of strlcpy() + modified dst buffer behind the string copied into it. Fix this by + writing NUL to the byte immediately following src string instead of to + the last byte in dst. Doing so also allows to reduce overhead by using + memcpy(). + + Improve strlcat() by avoiding the call to strlcpy() if dst string is + already full, not just as sanity check. + + Signed-off-by: Phil Sutter +--- + lib/utils.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/lib/utils.c b/lib/utils.c +index c9ba2f332c2a7..228d97bfe5e9b 100644 +--- a/lib/utils.c ++++ b/lib/utils.c +@@ -1231,18 +1231,22 @@ int get_real_family(int rtm_type, int rtm_family) + + size_t strlcpy(char *dst, const char *src, size_t size) + { ++ size_t srclen = strlen(src); ++ + if (size) { +- strncpy(dst, src, size - 1); +- dst[size - 1] = '\0'; ++ size_t minlen = min(srclen, size - 1); ++ ++ memcpy(dst, src, minlen); ++ dst[minlen] = '\0'; + } +- return strlen(src); ++ return srclen; + } + + size_t strlcat(char *dst, const char *src, size_t size) + { + size_t dlen = strlen(dst); + +- if (dlen > size) ++ if (dlen >= size) + return dlen + strlen(src); + + return dlen + strlcpy(dst + dlen, src, size - dlen); +-- +2.20.1 + diff --git a/SOURCES/0124-ip-6-tunnel-Avoid-copying-user-supplied-interface-na.patch b/SOURCES/0124-ip-6-tunnel-Avoid-copying-user-supplied-interface-na.patch new file mode 100644 index 0000000..8881924 --- /dev/null +++ b/SOURCES/0124-ip-6-tunnel-Avoid-copying-user-supplied-interface-na.patch @@ -0,0 +1,152 @@ +From 74331750f118690ca3c375e52b10272b992320e7 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:09:13 +0200 +Subject: [PATCH] ip{6, }tunnel: Avoid copying user-supplied interface name + around + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 26111ab1dba82 + +commit 26111ab1dba820421ccaf283ac097a79b95023a2 +Author: Phil Sutter +Date: Mon Oct 2 13:46:35 2017 +0200 + + ip{6, }tunnel: Avoid copying user-supplied interface name around + + In both files' parse_args() functions as well as in iptunnel's do_prl() + and do_6rd() functions, a user-supplied 'dev' parameter is uselessly + copied into a temporary buffer before passing it to ll_name_to_index() + or copying into a struct ifreq. Avoid this by just caching the argv + pointer value until the later lookup/strcpy. + + Signed-off-by: Phil Sutter +--- + ip/ip6tunnel.c | 6 +++--- + ip/iptunnel.c | 22 +++++++++------------- + 2 files changed, 12 insertions(+), 16 deletions(-) + +diff --git a/ip/ip6tunnel.c b/ip/ip6tunnel.c +index b4a7def144226..c12d700e74189 100644 +--- a/ip/ip6tunnel.c ++++ b/ip/ip6tunnel.c +@@ -136,7 +136,7 @@ static void print_tunnel(struct ip6_tnl_parm2 *p) + static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p) + { + int count = 0; +- char medium[IFNAMSIZ] = {}; ++ const char *medium = NULL; + + while (argc > 0) { + if (strcmp(*argv, "mode") == 0) { +@@ -180,7 +180,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p) + memcpy(&p->laddr, &laddr.data, sizeof(p->laddr)); + } else if (strcmp(*argv, "dev") == 0) { + NEXT_ARG(); +- strncpy(medium, *argv, IFNAMSIZ - 1); ++ medium = *argv; + } else if (strcmp(*argv, "encaplimit") == 0) { + NEXT_ARG(); + if (strcmp(*argv, "none") == 0) { +@@ -285,7 +285,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p) + count++; + argc--; argv++; + } +- if (medium[0]) { ++ if (medium) { + p->link = ll_name_to_index(medium); + if (p->link == 0) { + fprintf(stderr, "Cannot find device \"%s\"\n", medium); +diff --git a/ip/iptunnel.c b/ip/iptunnel.c +index 105d0f5576f1a..0acfd0793d3cd 100644 +--- a/ip/iptunnel.c ++++ b/ip/iptunnel.c +@@ -60,7 +60,7 @@ static void set_tunnel_proto(struct ip_tunnel_parm *p, int proto) + static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p) + { + int count = 0; +- char medium[IFNAMSIZ] = {}; ++ const char *medium = NULL; + int isatap = 0; + + memset(p, 0, sizeof(*p)); +@@ -139,7 +139,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p) + p->iph.saddr = htonl(INADDR_ANY); + } else if (strcmp(*argv, "dev") == 0) { + NEXT_ARG(); +- strncpy(medium, *argv, IFNAMSIZ - 1); ++ medium = *argv; + } else if (strcmp(*argv, "ttl") == 0 || + strcmp(*argv, "hoplimit") == 0 || + strcmp(*argv, "hlim") == 0) { +@@ -216,7 +216,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p) + } + } + +- if (medium[0]) { ++ if (medium) { + p->link = ll_name_to_index(medium); + if (p->link == 0) { + fprintf(stderr, "Cannot find device \"%s\"\n", medium); +@@ -465,9 +465,8 @@ static int do_prl(int argc, char **argv) + { + struct ip_tunnel_prl p = {}; + int count = 0; +- int devname = 0; + int cmd = 0; +- char medium[IFNAMSIZ] = {}; ++ const char *medium = NULL; + + while (argc > 0) { + if (strcmp(*argv, "prl-default") == 0) { +@@ -488,8 +487,7 @@ static int do_prl(int argc, char **argv) + count++; + } else if (strcmp(*argv, "dev") == 0) { + NEXT_ARG(); +- strncpy(medium, *argv, IFNAMSIZ-1); +- devname++; ++ medium = *argv; + } else { + fprintf(stderr, + "Invalid PRL parameter \"%s\"\n", *argv); +@@ -502,7 +500,7 @@ static int do_prl(int argc, char **argv) + } + argc--; argv++; + } +- if (devname == 0) { ++ if (!medium) { + fprintf(stderr, "Must specify device\n"); + exit(-1); + } +@@ -513,9 +511,8 @@ static int do_prl(int argc, char **argv) + static int do_6rd(int argc, char **argv) + { + struct ip_tunnel_6rd ip6rd = {}; +- int devname = 0; + int cmd = 0; +- char medium[IFNAMSIZ] = {}; ++ const char *medium = NULL; + inet_prefix prefix; + + while (argc > 0) { +@@ -537,8 +534,7 @@ static int do_6rd(int argc, char **argv) + cmd = SIOCDEL6RD; + } else if (strcmp(*argv, "dev") == 0) { + NEXT_ARG(); +- strncpy(medium, *argv, IFNAMSIZ-1); +- devname++; ++ medium = *argv; + } else { + fprintf(stderr, + "Invalid 6RD parameter \"%s\"\n", *argv); +@@ -546,7 +542,7 @@ static int do_6rd(int argc, char **argv) + } + argc--; argv++; + } +- if (devname == 0) { ++ if (!medium) { + fprintf(stderr, "Must specify device\n"); + exit(-1); + } +-- +2.20.1 + diff --git a/SOURCES/0125-tc-flower-No-need-to-cache-indev-arg.patch b/SOURCES/0125-tc-flower-No-need-to-cache-indev-arg.patch new file mode 100644 index 0000000..d6312ea --- /dev/null +++ b/SOURCES/0125-tc-flower-No-need-to-cache-indev-arg.patch @@ -0,0 +1,42 @@ +From 85bcdf3ca3a76ce3b4f62769aa64adcb1c849082 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:09:13 +0200 +Subject: [PATCH] tc: flower: No need to cache indev arg + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit ee474849c8511 + +commit ee474849c85116ec36e387882447f737ac3fdefb +Author: Phil Sutter +Date: Mon Oct 2 13:46:36 2017 +0200 + + tc: flower: No need to cache indev arg + + Since addattrstrz() will copy the provided string into the attribute + payload, there is no need to cache the data. + + Signed-off-by: Phil Sutter +--- + tc/f_flower.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/tc/f_flower.c b/tc/f_flower.c +index e2c7daa0b8e03..34249254603ff 100644 +--- a/tc/f_flower.c ++++ b/tc/f_flower.c +@@ -642,11 +642,8 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, + } else if (matches(*argv, "skip_sw") == 0) { + flags |= TCA_CLS_FLAGS_SKIP_SW; + } else if (matches(*argv, "indev") == 0) { +- char ifname[IFNAMSIZ] = {}; +- + NEXT_ARG(); +- strncpy(ifname, *argv, sizeof(ifname) - 1); +- addattrstrz(n, MAX_MSG, TCA_FLOWER_INDEV, ifname); ++ addattrstrz(n, MAX_MSG, TCA_FLOWER_INDEV, *argv); + } else if (matches(*argv, "vlan_id") == 0) { + __u16 vid; + +-- +2.20.1 + diff --git a/SOURCES/0126-Check-user-supplied-interface-name-lengths.patch b/SOURCES/0126-Check-user-supplied-interface-name-lengths.patch new file mode 100644 index 0000000..ada9ba1 --- /dev/null +++ b/SOURCES/0126-Check-user-supplied-interface-name-lengths.patch @@ -0,0 +1,378 @@ +From 358ca205cfc9646aefae6572607a0a1363086e51 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 29 Apr 2019 20:09:13 +0200 +Subject: [PATCH] Check user supplied interface name lengths + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 625df645b703d + +commit 625df645b703dc858d54784c35beff64464afae2 +Author: Phil Sutter +Date: Mon Oct 2 13:46:37 2017 +0200 + + Check user supplied interface name lengths + + The original problem was that something like: + + | strncpy(ifr.ifr_name, *argv, IFNAMSIZ); + + might leave ifr.ifr_name unterminated if length of *argv exceeds + IFNAMSIZ. In order to fix this, I thought about replacing all those + cases with (equivalent) calls to snprintf() or even introducing + strlcpy(). But as Ulrich Drepper correctly pointed out when rejecting + the latter from being added to glibc, truncating a string without + notifying the user is not to be considered good practice. So let's + excercise what he suggested and reject empty, overlong or otherwise + invalid interface names right from the start - this way calls to + strncpy() like shown above become safe and the user has a chance to + reconsider what he was trying to do. + + Note that this doesn't add calls to check_ifname() to all places where + user supplied interface name is parsed. In many cases, the interface + must exist already and is therefore looked up using ll_name_to_index(), + so if_nametoindex() will perform the necessary checks already. + + Signed-off-by: Phil Sutter +--- + include/utils.h | 2 ++ + ip/ip6tunnel.c | 3 ++- + ip/ipl2tp.c | 4 +++- + ip/iplink.c | 31 ++++++++++++------------------- + ip/ipmaddr.c | 3 ++- + ip/iprule.c | 10 ++++++++-- + ip/iptunnel.c | 7 ++++++- + ip/iptuntap.c | 6 ++++-- + lib/utils.c | 29 +++++++++++++++++++++++++++++ + misc/arpd.c | 3 ++- + tc/f_flower.c | 2 ++ + 11 files changed, 72 insertions(+), 28 deletions(-) + +diff --git a/include/utils.h b/include/utils.h +index d596a6fc10574..0382460136180 100644 +--- a/include/utils.h ++++ b/include/utils.h +@@ -145,6 +145,8 @@ void missarg(const char *) __attribute__((noreturn)); + void invarg(const char *, const char *) __attribute__((noreturn)); + void duparg(const char *, const char *) __attribute__((noreturn)); + void duparg2(const char *, const char *) __attribute__((noreturn)); ++int check_ifname(const char *); ++int get_ifname(char *, const char *); + int matches(const char *arg, const char *pattern); + int inet_addr_match(const inet_prefix *a, const inet_prefix *b, int bits); + +diff --git a/ip/ip6tunnel.c b/ip/ip6tunnel.c +index c12d700e74189..bc44bef7f030c 100644 +--- a/ip/ip6tunnel.c ++++ b/ip/ip6tunnel.c +@@ -273,7 +273,8 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p) + usage(); + if (p->name[0]) + duparg2("name", *argv); +- strncpy(p->name, *argv, IFNAMSIZ - 1); ++ if (get_ifname(p->name, *argv)) ++ invarg("\"name\" not a valid ifname", *argv); + if (cmd == SIOCCHGTUNNEL && count == 0) { + struct ip6_tnl_parm2 old_p = {}; + +diff --git a/ip/ipl2tp.c b/ip/ipl2tp.c +index 742adbe4f9c3a..7c5ed313b186f 100644 +--- a/ip/ipl2tp.c ++++ b/ip/ipl2tp.c +@@ -182,7 +182,7 @@ static int create_session(struct l2tp_parm *p) + if (p->peer_cookie_len) + addattr_l(&req.n, 1024, L2TP_ATTR_PEER_COOKIE, + p->peer_cookie, p->peer_cookie_len); +- if (p->ifname && p->ifname[0]) ++ if (p->ifname) + addattrstrz(&req.n, 1024, L2TP_ATTR_IFNAME, p->ifname); + + if (rtnl_talk(&genl_rth, &req.n, NULL) < 0) +@@ -545,6 +545,8 @@ static int parse_args(int argc, char **argv, int cmd, struct l2tp_parm *p) + } + } else if (strcmp(*argv, "name") == 0) { + NEXT_ARG(); ++ if (check_ifname(*argv)) ++ invarg("\"name\" not a valid ifname", *argv); + p->ifname = *argv; + } else if (strcmp(*argv, "remote") == 0) { + NEXT_ARG(); +diff --git a/ip/iplink.c b/ip/iplink.c +index db5b2c9645ba8..50f1075d94171 100644 +--- a/ip/iplink.c ++++ b/ip/iplink.c +@@ -581,6 +581,8 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, + req->i.ifi_flags &= ~IFF_UP; + } else if (strcmp(*argv, "name") == 0) { + NEXT_ARG(); ++ if (check_ifname(*argv)) ++ invarg("\"name\" not a valid ifname", *argv); + *name = *argv; + } else if (strcmp(*argv, "index") == 0) { + NEXT_ARG(); +@@ -848,6 +850,8 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, + NEXT_ARG(); + if (*dev) + duparg2("dev", *argv); ++ if (check_ifname(*argv)) ++ invarg("\"dev\" not a valid ifname", *argv); + *dev = *argv; + dev_index = ll_name_to_index(*dev); + } +@@ -870,7 +874,6 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, + + static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv) + { +- int len; + char *dev = NULL; + char *name = NULL; + char *link = NULL; +@@ -960,13 +963,8 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv) + } + + if (name) { +- len = strlen(name) + 1; +- if (len == 1) +- invarg("\"\" is not a valid device identifier\n", +- "name"); +- if (len > IFNAMSIZ) +- invarg("\"name\" too long\n", name); +- addattr_l(&req.n, sizeof(req), IFLA_IFNAME, name, len); ++ addattr_l(&req.n, sizeof(req), ++ IFLA_IFNAME, name, strlen(name) + 1); + } + + if (type) { +@@ -1016,7 +1014,6 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv) + + int iplink_get(unsigned int flags, char *name, __u32 filt_mask) + { +- int len; + struct iplink_req req = { + .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .n.nlmsg_flags = NLM_F_REQUEST | flags, +@@ -1026,13 +1023,8 @@ int iplink_get(unsigned int flags, char *name, __u32 filt_mask) + struct nlmsghdr *answer; + + if (name) { +- len = strlen(name) + 1; +- if (len == 1) +- invarg("\"\" is not a valid device identifier\n", +- "name"); +- if (len > IFNAMSIZ) +- invarg("\"name\" too long\n", name); +- addattr_l(&req.n, sizeof(req), IFLA_IFNAME, name, len); ++ addattr_l(&req.n, sizeof(req), ++ IFLA_IFNAME, name, strlen(name) + 1); + } + addattr32(&req.n, sizeof(req), IFLA_EXT_MASK, filt_mask); + +@@ -1256,6 +1248,8 @@ static int do_set(int argc, char **argv) + flags &= ~IFF_UP; + } else if (strcmp(*argv, "name") == 0) { + NEXT_ARG(); ++ if (check_ifname(*argv)) ++ invarg("\"name\" not a valid ifname", *argv); + newname = *argv; + } else if (matches(*argv, "address") == 0) { + NEXT_ARG(); +@@ -1346,6 +1340,8 @@ static int do_set(int argc, char **argv) + + if (dev) + duparg2("dev", *argv); ++ if (check_ifname(*argv)) ++ invarg("\"dev\" not a valid ifname", *argv); + dev = *argv; + } + argc--; argv++; +@@ -1374,9 +1370,6 @@ static int do_set(int argc, char **argv) + } + + if (newname && strcmp(dev, newname)) { +- if (strlen(newname) == 0) +- invarg("\"\" is not a valid device identifier\n", +- "name"); + if (do_changename(dev, newname) < 0) + return -1; + dev = newname; +diff --git a/ip/ipmaddr.c b/ip/ipmaddr.c +index 85a69e779563d..5683f6fa830c1 100644 +--- a/ip/ipmaddr.c ++++ b/ip/ipmaddr.c +@@ -284,7 +284,8 @@ static int multiaddr_modify(int cmd, int argc, char **argv) + NEXT_ARG(); + if (ifr.ifr_name[0]) + duparg("dev", *argv); +- strncpy(ifr.ifr_name, *argv, IFNAMSIZ); ++ if (get_ifname(ifr.ifr_name, *argv)) ++ invarg("\"dev\" not a valid ifname", *argv); + } else { + if (matches(*argv, "address") == 0) { + NEXT_ARG(); +diff --git a/ip/iprule.c b/ip/iprule.c +index e64b4d7db2815..201d3bdc20427 100644 +--- a/ip/iprule.c ++++ b/ip/iprule.c +@@ -472,11 +472,13 @@ static int iprule_list_flush_or_save(int argc, char **argv, int action) + } else if (strcmp(*argv, "dev") == 0 || + strcmp(*argv, "iif") == 0) { + NEXT_ARG(); +- strncpy(filter.iif, *argv, IFNAMSIZ); ++ if (get_ifname(filter.iif, *argv)) ++ invarg("\"iif\"/\"dev\" not a valid ifname", *argv); + filter.iifmask = 1; + } else if (strcmp(*argv, "oif") == 0) { + NEXT_ARG(); +- strncpy(filter.oif, *argv, IFNAMSIZ); ++ if (get_ifname(filter.oif, *argv)) ++ invarg("\"oif\" not a valid ifname", *argv); + filter.oifmask = 1; + } else if (strcmp(*argv, "l3mdev") == 0) { + filter.l3mdev = 1; +@@ -695,10 +697,14 @@ static int iprule_modify(int cmd, int argc, char **argv) + } else if (strcmp(*argv, "dev") == 0 || + strcmp(*argv, "iif") == 0) { + NEXT_ARG(); ++ if (check_ifname(*argv)) ++ invarg("\"iif\"/\"dev\" not a valid ifname", *argv); + addattr_l(&req.n, sizeof(req), FRA_IFNAME, + *argv, strlen(*argv)+1); + } else if (strcmp(*argv, "oif") == 0) { + NEXT_ARG(); ++ if (check_ifname(*argv)) ++ invarg("\"oif\" not a valid ifname", *argv); + addattr_l(&req.n, sizeof(req), FRA_OIFNAME, + *argv, strlen(*argv)+1); + } else if (strcmp(*argv, "l3mdev") == 0) { +diff --git a/ip/iptunnel.c b/ip/iptunnel.c +index 0acfd0793d3cd..208a1f06ab12f 100644 +--- a/ip/iptunnel.c ++++ b/ip/iptunnel.c +@@ -178,7 +178,8 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p) + + if (p->name[0]) + duparg2("name", *argv); +- strncpy(p->name, *argv, IFNAMSIZ - 1); ++ if (get_ifname(p->name, *argv)) ++ invarg("\"name\" not a valid ifname", *argv); + if (cmd == SIOCCHGTUNNEL && count == 0) { + struct ip_tunnel_parm old_p = {}; + +@@ -487,6 +488,8 @@ static int do_prl(int argc, char **argv) + count++; + } else if (strcmp(*argv, "dev") == 0) { + NEXT_ARG(); ++ if (check_ifname(*argv)) ++ invarg("\"dev\" not a valid ifname", *argv); + medium = *argv; + } else { + fprintf(stderr, +@@ -534,6 +537,8 @@ static int do_6rd(int argc, char **argv) + cmd = SIOCDEL6RD; + } else if (strcmp(*argv, "dev") == 0) { + NEXT_ARG(); ++ if (check_ifname(*argv)) ++ invarg("\"dev\" not a valid ifname", *argv); + medium = *argv; + } else { + fprintf(stderr, +diff --git a/ip/iptuntap.c b/ip/iptuntap.c +index 451f7f0eac6bb..b46e452f21278 100644 +--- a/ip/iptuntap.c ++++ b/ip/iptuntap.c +@@ -176,7 +176,8 @@ static int parse_args(int argc, char **argv, + ifr->ifr_flags |= IFF_MULTI_QUEUE; + } else if (matches(*argv, "dev") == 0) { + NEXT_ARG(); +- strncpy(ifr->ifr_name, *argv, IFNAMSIZ-1); ++ if (get_ifname(ifr->ifr_name, *argv)) ++ invarg("\"dev\" not a valid ifname", *argv); + } else { + if (matches(*argv, "name") == 0) { + NEXT_ARG(); +@@ -184,7 +185,8 @@ static int parse_args(int argc, char **argv, + usage(); + if (ifr->ifr_name[0]) + duparg2("name", *argv); +- strncpy(ifr->ifr_name, *argv, IFNAMSIZ); ++ if (get_ifname(ifr->ifr_name, *argv)) ++ invarg("\"name\" not a valid ifname", *argv); + } + count++; + argc--; argv++; +diff --git a/lib/utils.c b/lib/utils.c +index 228d97bfe5e9b..0c56f0b478f23 100644 +--- a/lib/utils.c ++++ b/lib/utils.c +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -697,6 +698,34 @@ void duparg2(const char *key, const char *arg) + exit(-1); + } + ++int check_ifname(const char *name) ++{ ++ /* These checks mimic kernel checks in dev_valid_name */ ++ if (*name == '\0') ++ return -1; ++ if (strlen(name) >= IFNAMSIZ) ++ return -1; ++ ++ while (*name) { ++ if (*name == '/' || isspace(*name)) ++ return -1; ++ ++name; ++ } ++ return 0; ++} ++ ++/* buf is assumed to be IFNAMSIZ */ ++int get_ifname(char *buf, const char *name) ++{ ++ int ret; ++ ++ ret = check_ifname(name); ++ if (ret == 0) ++ strncpy(buf, name, IFNAMSIZ); ++ ++ return ret; ++} ++ + int matches(const char *cmd, const char *pattern) + { + int len = strlen(cmd); +diff --git a/misc/arpd.c b/misc/arpd.c +index c9d86475e5995..67d86b67957b8 100644 +--- a/misc/arpd.c ++++ b/misc/arpd.c +@@ -662,7 +662,8 @@ int main(int argc, char **argv) + struct ifreq ifr = {}; + + for (i = 0; i < ifnum; i++) { +- strncpy(ifr.ifr_name, ifnames[i], IFNAMSIZ); ++ if (get_ifname(ifr.ifr_name, ifnames[i])) ++ invarg("not a valid ifname", ifnames[i]); + if (ioctl(udp_sock, SIOCGIFINDEX, &ifr)) { + perror("ioctl(SIOCGIFINDEX)"); + exit(-1); +diff --git a/tc/f_flower.c b/tc/f_flower.c +index 34249254603ff..f3f8d3427c761 100644 +--- a/tc/f_flower.c ++++ b/tc/f_flower.c +@@ -643,6 +643,8 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, + flags |= TCA_CLS_FLAGS_SKIP_SW; + } else if (matches(*argv, "indev") == 0) { + NEXT_ARG(); ++ if (check_ifname(*argv)) ++ invarg("\"indev\" not a valid ifname", *argv); + addattrstrz(n, MAX_MSG, TCA_FLOWER_INDEV, *argv); + } else if (matches(*argv, "vlan_id") == 0) { + __u16 vid; +-- +2.20.1 + diff --git a/SOURCES/0127-bpf-minor-cleanups-for-bpf_trace_pipe.patch b/SOURCES/0127-bpf-minor-cleanups-for-bpf_trace_pipe.patch new file mode 100644 index 0000000..a96efb9 --- /dev/null +++ b/SOURCES/0127-bpf-minor-cleanups-for-bpf_trace_pipe.patch @@ -0,0 +1,73 @@ +From edf0ae950c5b9d3c5eed29a40f5669cf657995e6 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Tue, 30 Apr 2019 15:43:03 +0200 +Subject: [PATCH] bpf: minor cleanups for bpf_trace_pipe + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465646 +Upstream Status: iproute2.git commit 1b736dc469dca + +commit 1b736dc469dcabd4180848a1f1b3d1fef2b84dbc +Author: Daniel Borkmann +Date: Tue Sep 5 02:24:31 2017 +0200 + + bpf: minor cleanups for bpf_trace_pipe + + Just minor nits, e.g. no need to fflush() and instead of returning + right away, just break and close the fd. + + Signed-off-by: Daniel Borkmann +--- + lib/bpf.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +diff --git a/lib/bpf.c b/lib/bpf.c +index e072cba214067..f0e6c6fb732ee 100644 +--- a/lib/bpf.c ++++ b/lib/bpf.c +@@ -461,9 +461,9 @@ int bpf_trace_pipe(void) + "/trace", + 0, + }; ++ int fd_in, fd_out = STDERR_FILENO; + char tpipe[PATH_MAX]; + const char *mnt; +- int fd; + + mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt, + sizeof(tracefs_mnt), tracefs_known_mnts); +@@ -474,8 +474,8 @@ int bpf_trace_pipe(void) + + snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt); + +- fd = open(tpipe, O_RDONLY); +- if (fd < 0) ++ fd_in = open(tpipe, O_RDONLY); ++ if (fd_in < 0) + return -1; + + fprintf(stderr, "Running! Hang up with ^C!\n\n"); +@@ -483,15 +483,14 @@ int bpf_trace_pipe(void) + static char buff[4096]; + ssize_t ret; + +- ret = read(fd, buff, sizeof(buff) - 1); +- if (ret > 0) { +- if (write(STDERR_FILENO, buff, ret) != ret) +- return -1; +- fflush(stderr); +- } ++ ret = read(fd_in, buff, sizeof(buff)); ++ if (ret > 0 && write(fd_out, buff, ret) == ret) ++ continue; ++ break; + } + +- return 0; ++ close(fd_in); ++ return -1; + } + + static int bpf_gen_global(const char *bpf_sub_dir) +-- +2.20.1 + diff --git a/SOURCES/0128-ip-tunnel-Use-tnl_parse_key-to-parse-tunnel-key.patch b/SOURCES/0128-ip-tunnel-Use-tnl_parse_key-to-parse-tunnel-key.patch new file mode 100644 index 0000000..537029f --- /dev/null +++ b/SOURCES/0128-ip-tunnel-Use-tnl_parse_key-to-parse-tunnel-key.patch @@ -0,0 +1,339 @@ +From 8f2338a51859158b8699e0736f84ab1e42a3da97 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 5 Jun 2019 13:05:04 +0200 +Subject: [PATCH] ip/tunnel: Use tnl_parse_key() to parse tunnel key + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1714660 +Upstream Status: iproute2.git commit 1f44b93744f11 +Conflicts: context change due to missing commit 2a80154fde40b + ("vti6: fix local/remote any addr handling") + +commit 1f44b93744f11f2a8249e3c13751ab7debebaa5f +Author: Serhey Popovych +Date: Mon Dec 18 19:48:03 2017 +0200 + + ip/tunnel: Use tnl_parse_key() to parse tunnel key + + It is added with + commit a7ed1520ee96 ("ip/tunnel: introduce tnl_parse_key()") + to avoid code duplication in ip6?tunnel.c. + + Reuse it for gre/gre6 and vti/vti6 tunnel rtnl + configuration interface with the same purpose + it is used in tunnel ioctl interface in ip6?tunnel.c. + + While there change type of key variables from + unsigned integer to __be32 to reflect nature of the + value they store and place error message in + tnl_parse_key() on a single line to make single + call to fprintf(). + + Signed-off-by: Serhey Popovych + Signed-off-by: Stephen Hemminger +--- + ip/link_gre.c | 45 +++++---------------------------------------- + ip/link_gre6.c | 45 +++++---------------------------------------- + ip/link_vti.c | 45 +++++---------------------------------------- + ip/link_vti6.c | 45 +++++---------------------------------------- + ip/tunnel.c | 5 +++-- + 5 files changed, 23 insertions(+), 162 deletions(-) + +diff --git a/ip/link_gre.c b/ip/link_gre.c +index ced993692e6f6..1376d2e3af7de 100644 +--- a/ip/link_gre.c ++++ b/ip/link_gre.c +@@ -77,8 +77,8 @@ static int gre_parse_opt(struct link_util *lu, int argc, char **argv, + struct rtattr *greinfo[IFLA_GRE_MAX + 1]; + __u16 iflags = 0; + __u16 oflags = 0; +- unsigned int ikey = 0; +- unsigned int okey = 0; ++ __be32 ikey = 0; ++ __be32 okey = 0; + unsigned int saddr = 0; + unsigned int daddr = 0; + unsigned int link = 0; +@@ -167,53 +167,18 @@ get_failed: + + while (argc > 0) { + if (!matches(*argv, "key")) { +- unsigned int uval; +- + NEXT_ARG(); + iflags |= GRE_KEY; + oflags |= GRE_KEY; +- if (strchr(*argv, '.')) +- uval = get_addr32(*argv); +- else { +- if (get_unsigned(&uval, *argv, 0) < 0) { +- fprintf(stderr, +- "Invalid value for \"key\": \"%s\"; it should be an unsigned integer\n", *argv); +- exit(-1); +- } +- uval = htonl(uval); +- } +- +- ikey = okey = uval; ++ ikey = okey = tnl_parse_key("key", *argv); + } else if (!matches(*argv, "ikey")) { +- unsigned int uval; +- + NEXT_ARG(); + iflags |= GRE_KEY; +- if (strchr(*argv, '.')) +- uval = get_addr32(*argv); +- else { +- if (get_unsigned(&uval, *argv, 0) < 0) { +- fprintf(stderr, "invalid value for \"ikey\": \"%s\"; it should be an unsigned integer\n", *argv); +- exit(-1); +- } +- uval = htonl(uval); +- } +- ikey = uval; ++ ikey = tnl_parse_key("ikey", *argv); + } else if (!matches(*argv, "okey")) { +- unsigned int uval; +- + NEXT_ARG(); + oflags |= GRE_KEY; +- if (strchr(*argv, '.')) +- uval = get_addr32(*argv); +- else { +- if (get_unsigned(&uval, *argv, 0) < 0) { +- fprintf(stderr, "invalid value for \"okey\": \"%s\"; it should be an unsigned integer\n", *argv); +- exit(-1); +- } +- uval = htonl(uval); +- } +- okey = uval; ++ okey = tnl_parse_key("okey", *argv); + } else if (!matches(*argv, "seq")) { + iflags |= GRE_SEQ; + oflags |= GRE_SEQ; +diff --git a/ip/link_gre6.c b/ip/link_gre6.c +index a9d18ee954641..22e6e44aae29b 100644 +--- a/ip/link_gre6.c ++++ b/ip/link_gre6.c +@@ -89,8 +89,8 @@ static int gre_parse_opt(struct link_util *lu, int argc, char **argv, + struct rtattr *greinfo[IFLA_GRE_MAX + 1]; + __u16 iflags = 0; + __u16 oflags = 0; +- unsigned int ikey = 0; +- unsigned int okey = 0; ++ __be32 ikey = 0; ++ __be32 okey = 0; + struct in6_addr raddr = IN6ADDR_ANY_INIT; + struct in6_addr laddr = IN6ADDR_ANY_INIT; + unsigned int link = 0; +@@ -181,53 +181,18 @@ get_failed: + + while (argc > 0) { + if (!matches(*argv, "key")) { +- unsigned int uval; +- + NEXT_ARG(); + iflags |= GRE_KEY; + oflags |= GRE_KEY; +- if (strchr(*argv, '.')) +- uval = get_addr32(*argv); +- else { +- if (get_unsigned(&uval, *argv, 0) < 0) { +- fprintf(stderr, +- "Invalid value for \"key\"\n"); +- exit(-1); +- } +- uval = htonl(uval); +- } +- +- ikey = okey = uval; ++ ikey = okey = tnl_parse_key("key", *argv); + } else if (!matches(*argv, "ikey")) { +- unsigned int uval; +- + NEXT_ARG(); + iflags |= GRE_KEY; +- if (strchr(*argv, '.')) +- uval = get_addr32(*argv); +- else { +- if (get_unsigned(&uval, *argv, 0) < 0) { +- fprintf(stderr, "invalid value of \"ikey\"\n"); +- exit(-1); +- } +- uval = htonl(uval); +- } +- ikey = uval; ++ ikey = tnl_parse_key("ikey", *argv); + } else if (!matches(*argv, "okey")) { +- unsigned int uval; +- + NEXT_ARG(); + oflags |= GRE_KEY; +- if (strchr(*argv, '.')) +- uval = get_addr32(*argv); +- else { +- if (get_unsigned(&uval, *argv, 0) < 0) { +- fprintf(stderr, "invalid value of \"okey\"\n"); +- exit(-1); +- } +- uval = htonl(uval); +- } +- okey = uval; ++ okey = tnl_parse_key("okey", *argv); + } else if (!matches(*argv, "seq")) { + iflags |= GRE_SEQ; + oflags |= GRE_SEQ; +diff --git a/ip/link_vti.c b/ip/link_vti.c +index d2aacbe78ded1..6e4234170bb50 100644 +--- a/ip/link_vti.c ++++ b/ip/link_vti.c +@@ -62,8 +62,8 @@ static int vti_parse_opt(struct link_util *lu, int argc, char **argv, + struct rtattr *tb[IFLA_MAX + 1]; + struct rtattr *linkinfo[IFLA_INFO_MAX+1]; + struct rtattr *vtiinfo[IFLA_VTI_MAX + 1]; +- unsigned int ikey = 0; +- unsigned int okey = 0; ++ __be32 ikey = 0; ++ __be32 okey = 0; + unsigned int saddr = 0; + unsigned int daddr = 0; + unsigned int link = 0; +@@ -116,49 +116,14 @@ get_failed: + + while (argc > 0) { + if (!matches(*argv, "key")) { +- unsigned int uval; +- + NEXT_ARG(); +- if (strchr(*argv, '.')) +- uval = get_addr32(*argv); +- else { +- if (get_unsigned(&uval, *argv, 0) < 0) { +- fprintf(stderr, +- "Invalid value for \"key\": \"%s\"; it should be an unsigned integer\n", *argv); +- exit(-1); +- } +- uval = htonl(uval); +- } +- +- ikey = okey = uval; ++ ikey = okey = tnl_parse_key("key", *argv); + } else if (!matches(*argv, "ikey")) { +- unsigned int uval; +- + NEXT_ARG(); +- if (strchr(*argv, '.')) +- uval = get_addr32(*argv); +- else { +- if (get_unsigned(&uval, *argv, 0) < 0) { +- fprintf(stderr, "invalid value for \"ikey\": \"%s\"; it should be an unsigned integer\n", *argv); +- exit(-1); +- } +- uval = htonl(uval); +- } +- ikey = uval; ++ ikey = tnl_parse_key("ikey", *argv); + } else if (!matches(*argv, "okey")) { +- unsigned int uval; +- + NEXT_ARG(); +- if (strchr(*argv, '.')) +- uval = get_addr32(*argv); +- else { +- if (get_unsigned(&uval, *argv, 0) < 0) { +- fprintf(stderr, "invalid value for \"okey\": \"%s\"; it should be an unsigned integer\n", *argv); +- exit(-1); +- } +- uval = htonl(uval); +- } +- okey = uval; ++ okey = tnl_parse_key("okey", *argv); + } else if (!matches(*argv, "remote")) { + NEXT_ARG(); + if (!strcmp(*argv, "any")) { +diff --git a/ip/link_vti6.c b/ip/link_vti6.c +index aedfbeaeea0e1..e246cedbcb7a7 100644 +--- a/ip/link_vti6.c ++++ b/ip/link_vti6.c +@@ -59,8 +59,8 @@ static int vti6_parse_opt(struct link_util *lu, int argc, char **argv, + struct rtattr *vtiinfo[IFLA_VTI_MAX + 1]; + struct in6_addr saddr; + struct in6_addr daddr; +- unsigned int ikey = 0; +- unsigned int okey = 0; ++ __be32 ikey = 0; ++ __be32 okey = 0; + unsigned int link = 0; + int len; + +@@ -111,49 +111,14 @@ get_failed: + + while (argc > 0) { + if (!matches(*argv, "key")) { +- unsigned int uval; +- + NEXT_ARG(); +- if (strchr(*argv, '.')) +- uval = get_addr32(*argv); +- else { +- if (get_unsigned(&uval, *argv, 0) < 0) { +- fprintf(stderr, +- "Invalid value for \"key\": \"%s\"; it should be an unsigned integer\n", *argv); +- exit(-1); +- } +- uval = htonl(uval); +- } +- +- ikey = okey = uval; ++ ikey = okey = tnl_parse_key("key", *argv); + } else if (!matches(*argv, "ikey")) { +- unsigned int uval; +- + NEXT_ARG(); +- if (strchr(*argv, '.')) +- uval = get_addr32(*argv); +- else { +- if (get_unsigned(&uval, *argv, 0) < 0) { +- fprintf(stderr, "invalid value for \"ikey\": \"%s\"; it should be an unsigned integer\n", *argv); +- exit(-1); +- } +- uval = htonl(uval); +- } +- ikey = uval; ++ ikey = tnl_parse_key("ikey", *argv); + } else if (!matches(*argv, "okey")) { +- unsigned int uval; +- + NEXT_ARG(); +- if (strchr(*argv, '.')) +- uval = get_addr32(*argv); +- else { +- if (get_unsigned(&uval, *argv, 0) < 0) { +- fprintf(stderr, "invalid value for \"okey\": \"%s\"; it should be an unsigned integer\n", *argv); +- exit(-1); +- } +- uval = htonl(uval); +- } +- okey = uval; ++ okey = tnl_parse_key("okey", *argv); + } else if (!matches(*argv, "remote")) { + NEXT_ARG(); + if (!strcmp(*argv, "any")) { +diff --git a/ip/tunnel.c b/ip/tunnel.c +index 7956d71aa7334..3967d5df3ca1c 100644 +--- a/ip/tunnel.c ++++ b/ip/tunnel.c +@@ -189,8 +189,9 @@ __be32 tnl_parse_key(const char *name, const char *key) + return get_addr32(key); + + if (get_unsigned(&uval, key, 0) < 0) { +- fprintf(stderr, "invalid value for \"%s\": \"%s\";", name, key); +- fprintf(stderr, " it should be an unsigned integer\n"); ++ fprintf(stderr, ++ "invalid value for \"%s\": \"%s\"; it should be an unsigned integer\n", ++ name, key); + exit(-1); + } + return htonl(uval); +-- +2.20.1 + diff --git a/SOURCES/0129-man-ip-link-document-GRE-tunnels.patch b/SOURCES/0129-man-ip-link-document-GRE-tunnels.patch new file mode 100644 index 0000000..76b36b3 --- /dev/null +++ b/SOURCES/0129-man-ip-link-document-GRE-tunnels.patch @@ -0,0 +1,218 @@ +From 266b19dec4b79c4f63118dd6151c1b0a80f521f7 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 5 Jun 2019 13:08:00 +0200 +Subject: [PATCH] man: ip link: document GRE tunnels + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1714660 +Upstream Status: iproute2.git commit d21c028cf7414 +Conflicts: context change due to missing commit 1eccc5734148c + ("ip: add vxcan/veth to ip-link man page") + +commit d21c028cf74147360c530a4c53063bbe677dbe73 +Author: Sabrina Dubroca +Date: Fri Apr 20 10:31:59 2018 +0200 + + man: ip link: document GRE tunnels + + GRE tunnels are currently only documented together with IPIP and SIT + tunnels, but they actually have very different configuration + options. Let's separate them. + + Signed-off-by: Sabrina Dubroca + Signed-off-by: David Ahern +--- + man/man8/ip-link.8.in | 152 ++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 148 insertions(+), 4 deletions(-) + +diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in +index 48417dbce80aa..cfea1bdfdc030 100644 +--- a/man/man8/ip-link.8.in ++++ b/man/man8/ip-link.8.in +@@ -643,15 +643,88 @@ keyword. + .in -8 + + .TP +-GRE, IPIP, SIT Type Support +-For a link of types +-.I GRE/IPIP/SIT ++IPIP, SIT Type Support ++For a link of type ++.IR IPIP or SIT ++the following additional arguments are supported: ++ ++.BI "ip link add " DEVICE ++.BR type " { " ipip " | " sit " }" ++.BI " remote " ADDR " local " ADDR ++[ ++.BR encap " { " fou " | " gue " | " none " }" ++] [ ++.BR encap-sport " { " \fIPORT " | " auto " }" ++] [ ++.BI "encap-dport " PORT ++] [ ++.RB [ no ] encap-csum ++] [ ++.RB [ no ] encap-remcsum ++] ++ ++.in +8 ++.sp ++.BI remote " ADDR " ++- specifies the remote address of the tunnel. ++ ++.sp ++.BI local " ADDR " ++- specifies the fixed local address for tunneled packets. ++It must be an address on another interface on this host. ++ ++.sp ++.BR encap " { " fou " | " gue " | " none " }" ++- specifies type of secondary UDP encapsulation. "fou" indicates ++Foo-Over-UDP, "gue" indicates Generic UDP Encapsulation. ++ ++.sp ++.BR encap-sport " { " \fIPORT " | " auto " }" ++- specifies the source port in UDP encapsulation. ++.IR PORT ++indicates the port by number, "auto" ++indicates that the port number should be chosen automatically ++(the kernel picks a flow based on the flow hash of the ++encapsulated packet). ++ ++.sp ++.RB [ no ] encap-csum ++- specifies if UDP checksums are enabled in the secondary ++encapsulation. ++ ++.sp ++.RB [ no ] encap-remcsum ++- specifies if Remote Checksum Offload is enabled. This is only ++applicable for Generic UDP Encapsulation. ++ ++.in -8 ++.TP ++GRE Type Support ++For a link of type ++.IR GRE " or " GRETAP + the following additional arguments are supported: + + .BI "ip link add " DEVICE +-.BR type " { " gre " | " ipip " | " sit " }" ++.BR type " { " gre " | " gretap " }" + .BI " remote " ADDR " local " ADDR + [ ++.RB [ i | o ] seq ++] [ ++.RB [ i | o ] key ++.I KEY ++] [ ++.RB [ i | o ] csum ++] [ ++.BI ttl " TTL " ++] [ ++.BI tos " TOS " ++] [ ++.RB [ no ] pmtudisc ++] [ ++.RB [ no ] ignore-df ++] [ ++.BI dev " PHYS_DEV " ++] [ + .BR encap " { " fou " | " gue " | " none " }" + ] [ + .BR encap-sport " { " \fIPORT " | " auto " }" +@@ -661,6 +734,8 @@ the following additional arguments are supported: + .RB [ no ] encap-csum + ] [ + .RB [ no ] encap-remcsum ++] [ ++.BR external + ] + + .in +8 +@@ -673,6 +748,70 @@ the following additional arguments are supported: + - specifies the fixed local address for tunneled packets. + It must be an address on another interface on this host. + ++.sp ++.RB [ i | o ] seq ++- serialize packets. ++The ++.B oseq ++flag enables sequencing of outgoing packets. ++The ++.B iseq ++flag requires that all input packets are serialized. ++ ++.sp ++.RB [ i | o ] key ++.I KEY ++- use keyed GRE with key ++.IR KEY ". "KEY ++is either a number or an IPv4 address-like dotted quad. ++The ++.B key ++parameter specifies the same key to use in both directions. ++The ++.BR ikey " and " okey ++parameters specify different keys for input and output. ++ ++.sp ++.RB [ i | o ] csum ++- generate/require checksums for tunneled packets. ++The ++.B ocsum ++flag calculates checksums for outgoing packets. ++The ++.B icsum ++flag requires that all input packets have the correct ++checksum. The ++.B csum ++flag is equivalent to the combination ++.B "icsum ocsum" . ++ ++.sp ++.BI ttl " TTL" ++- specifies the TTL value to use in outgoing packets. ++ ++.sp ++.BI tos " TOS" ++- specifies the TOS value to use in outgoing packets. ++ ++.sp ++.RB [ no ] pmtudisc ++- enables/disables Path MTU Discovery on this tunnel. ++It is enabled by default. Note that a fixed ttl is incompatible ++with this option: tunneling with a fixed ttl always makes pmtu ++discovery. ++ ++.sp ++.RB [ no ] ignore-df ++- enables/disables IPv4 DF suppression on this tunnel. ++Normally datagrams that exceed the MTU will be fragmented; the presence ++of the DF flag inhibits this, resulting instead in an ICMP Unreachable ++(Fragmentation Required) message. Enabling this attribute casues the ++DF flag to be ignored. ++ ++.sp ++.BI dev " PHYS_DEV" ++- specifies the physical device to use for tunnel endpoint communication. ++ + .sp + .BR encap " { " fou " | " gue " | " none " }" + - specifies type of secondary UDP encapsulation. "fou" indicates +@@ -697,6 +836,11 @@ encapsulation. + - specifies if Remote Checksum Offload is enabled. This is only + applicable for Generic UDP Encapsulation. + ++.sp ++.BR external ++- make this tunnel externally controlled ++.RB "(e.g. " "ip route encap" ). ++ + .in -8 + + .TP +-- +2.20.1 + diff --git a/SOURCES/0130-gre-gre6-allow-clearing-i-o-key-seq-csum-flags.patch b/SOURCES/0130-gre-gre6-allow-clearing-i-o-key-seq-csum-flags.patch new file mode 100644 index 0000000..2d4af18 --- /dev/null +++ b/SOURCES/0130-gre-gre6-allow-clearing-i-o-key-seq-csum-flags.patch @@ -0,0 +1,268 @@ +From 24eec64aa52b65b606d8cc0b03619f3974f12484 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 5 Jun 2019 13:08:41 +0200 +Subject: [PATCH] gre/gre6: allow clearing {,i,o}{key,seq,csum} flags + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1714660 +Upstream Status: iproute2.git commit 7f520601f59ee +Conflicts: context change on ip/link_gre?.c due to missing commit + ae91205c4d2a7 ("gre/gre6: Unify gre_print_help()") + +commit 7f520601f59ee35da2fc48b3f1b39ed2b80c9efa +Author: Sabrina Dubroca +Date: Fri Apr 20 10:32:00 2018 +0200 + + gre/gre6: allow clearing {,i,o}{key,seq,csum} flags + + Currently, iproute allows setting those flags, but it's impossible to + clear them, since their current value is fetched from the kernel and + then we OR in the additional flags passed on the command line. + + Add no* variants to allow clearing them. + + Signed-off-by: Sabrina Dubroca + Signed-off-by: David Ahern +--- + ip/link_gre.c | 30 +++++++++++++++++++++++++++--- + ip/link_gre6.c | 30 +++++++++++++++++++++++++++--- + man/man8/ip-link.8.in | 27 ++++++++++++++++++--------- + 3 files changed, 72 insertions(+), 15 deletions(-) + +diff --git a/ip/link_gre.c b/ip/link_gre.c +index 1376d2e3af7de..41e2edbedb6eb 100644 +--- a/ip/link_gre.c ++++ b/ip/link_gre.c +@@ -28,9 +28,9 @@ static void print_usage(FILE *f) + fprintf(f, + "Usage: ... { gre | gretap } [ remote ADDR ]\n" + " [ local ADDR ]\n" +- " [ [i|o]seq ]\n" +- " [ [i|o]key KEY ]\n" +- " [ [i|o]csum ]\n" ++ " [ [no][i|o]seq ]\n" ++ " [ [i|o]key KEY | no[i|o]key ]\n" ++ " [ [no][i|o]csum ]\n" + " [ ttl TTL ]\n" + " [ tos TOS ]\n" + " [ [no]pmtudisc ]\n" +@@ -171,28 +171,52 @@ get_failed: + iflags |= GRE_KEY; + oflags |= GRE_KEY; + ikey = okey = tnl_parse_key("key", *argv); ++ } else if (!matches(*argv, "nokey")) { ++ iflags &= ~GRE_KEY; ++ oflags &= ~GRE_KEY; ++ ikey = okey = 0; + } else if (!matches(*argv, "ikey")) { + NEXT_ARG(); + iflags |= GRE_KEY; + ikey = tnl_parse_key("ikey", *argv); ++ } else if (!matches(*argv, "noikey")) { ++ iflags &= ~GRE_KEY; ++ ikey = 0; + } else if (!matches(*argv, "okey")) { + NEXT_ARG(); + oflags |= GRE_KEY; + okey = tnl_parse_key("okey", *argv); ++ } else if (!matches(*argv, "nookey")) { ++ oflags &= ~GRE_KEY; ++ okey = 0; + } else if (!matches(*argv, "seq")) { + iflags |= GRE_SEQ; + oflags |= GRE_SEQ; ++ } else if (!matches(*argv, "noseq")) { ++ iflags &= ~GRE_SEQ; ++ oflags &= ~GRE_SEQ; + } else if (!matches(*argv, "iseq")) { + iflags |= GRE_SEQ; ++ } else if (!matches(*argv, "noiseq")) { ++ iflags &= ~GRE_SEQ; + } else if (!matches(*argv, "oseq")) { + oflags |= GRE_SEQ; ++ } else if (!matches(*argv, "nooseq")) { ++ oflags &= ~GRE_SEQ; + } else if (!matches(*argv, "csum")) { + iflags |= GRE_CSUM; + oflags |= GRE_CSUM; ++ } else if (!matches(*argv, "nocsum")) { ++ iflags &= ~GRE_CSUM; ++ oflags &= ~GRE_CSUM; + } else if (!matches(*argv, "icsum")) { + iflags |= GRE_CSUM; ++ } else if (!matches(*argv, "noicsum")) { ++ iflags &= ~GRE_CSUM; + } else if (!matches(*argv, "ocsum")) { + oflags |= GRE_CSUM; ++ } else if (!matches(*argv, "noocsum")) { ++ oflags &= ~GRE_CSUM; + } else if (!matches(*argv, "nopmtudisc")) { + pmtudisc = 0; + } else if (!matches(*argv, "pmtudisc")) { +diff --git a/ip/link_gre6.c b/ip/link_gre6.c +index 22e6e44aae29b..127e51de4ab73 100644 +--- a/ip/link_gre6.c ++++ b/ip/link_gre6.c +@@ -35,9 +35,9 @@ static void print_usage(FILE *f) + fprintf(f, + "Usage: ... { ip6gre | ip6gretap } [ remote ADDR ]\n" + " [ local ADDR ]\n" +- " [ [i|o]seq ]\n" +- " [ [i|o]key KEY ]\n" +- " [ [i|o]csum ]\n" ++ " [ [no][i|o]seq ]\n" ++ " [ [i|o]key KEY | no[i|o]key ]\n" ++ " [ [no][i|o]csum ]\n" + " [ hoplimit TTL ]\n" + " [ encaplimit ELIM ]\n" + " [ tclass TCLASS ]\n" +@@ -185,28 +185,52 @@ get_failed: + iflags |= GRE_KEY; + oflags |= GRE_KEY; + ikey = okey = tnl_parse_key("key", *argv); ++ } else if (!matches(*argv, "nokey")) { ++ iflags &= ~GRE_KEY; ++ oflags &= ~GRE_KEY; ++ ikey = okey = 0; + } else if (!matches(*argv, "ikey")) { + NEXT_ARG(); + iflags |= GRE_KEY; + ikey = tnl_parse_key("ikey", *argv); ++ } else if (!matches(*argv, "noikey")) { ++ iflags &= ~GRE_KEY; ++ ikey = 0; + } else if (!matches(*argv, "okey")) { + NEXT_ARG(); + oflags |= GRE_KEY; + okey = tnl_parse_key("okey", *argv); ++ } else if (!matches(*argv, "nookey")) { ++ oflags &= ~GRE_KEY; ++ okey = 0; + } else if (!matches(*argv, "seq")) { + iflags |= GRE_SEQ; + oflags |= GRE_SEQ; ++ } else if (!matches(*argv, "noseq")) { ++ iflags &= ~GRE_SEQ; ++ oflags &= ~GRE_SEQ; + } else if (!matches(*argv, "iseq")) { + iflags |= GRE_SEQ; ++ } else if (!matches(*argv, "noiseq")) { ++ iflags &= ~GRE_SEQ; + } else if (!matches(*argv, "oseq")) { + oflags |= GRE_SEQ; ++ } else if (!matches(*argv, "nooseq")) { ++ oflags &= ~GRE_SEQ; + } else if (!matches(*argv, "csum")) { + iflags |= GRE_CSUM; + oflags |= GRE_CSUM; ++ } else if (!matches(*argv, "nocsum")) { ++ iflags &= ~GRE_CSUM; ++ oflags &= ~GRE_CSUM; + } else if (!matches(*argv, "icsum")) { + iflags |= GRE_CSUM; ++ } else if (!matches(*argv, "noicsum")) { ++ iflags &= ~GRE_CSUM; + } else if (!matches(*argv, "ocsum")) { + oflags |= GRE_CSUM; ++ } else if (!matches(*argv, "noocsum")) { ++ oflags &= ~GRE_CSUM; + } else if (!matches(*argv, "remote")) { + inet_prefix addr; + +diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in +index cfea1bdfdc030..8be5d5e1e9fd6 100644 +--- a/man/man8/ip-link.8.in ++++ b/man/man8/ip-link.8.in +@@ -708,12 +708,14 @@ the following additional arguments are supported: + .BR type " { " gre " | " gretap " }" + .BI " remote " ADDR " local " ADDR + [ +-.RB [ i | o ] seq ++.RB [ no ] "" [ i | o ] seq + ] [ + .RB [ i | o ] key + .I KEY ++| ++.BR no [ i | o ] key + ] [ +-.RB [ i | o ] csum ++.RB [ no ] "" [ i | o ] csum + ] [ + .BI ttl " TTL " + ] [ +@@ -749,7 +751,7 @@ the following additional arguments are supported: + It must be an address on another interface on this host. + + .sp +-.RB [ i | o ] seq ++.RB [ no ] "" [ i | o ] seq + - serialize packets. + The + .B oseq +@@ -761,6 +763,8 @@ flag requires that all input packets are serialized. + .sp + .RB [ i | o ] key + .I KEY ++| ++.BR no [ i | o ] key + - use keyed GRE with key + .IR KEY ". "KEY + is either a number or an IPv4 address-like dotted quad. +@@ -772,7 +776,7 @@ The + parameters specify different keys for input and output. + + .sp +-.RB [ i | o ] csum ++.RB [ no ] "" [ i | o ] csum + - generate/require checksums for tunneled packets. + The + .B ocsum +@@ -853,12 +857,14 @@ the following additional arguments are supported: + .BR type " { " ip6gre " | " ip6gretap " }" + .BI remote " ADDR " local " ADDR" + [ +-.RB [ i | o ] seq ++.RB [ no ] "" [ i | o ] seq + ] [ + .RB [ i | o ] key + .I KEY ++| ++.BR no [ i | o ] key + ] [ +-.RB [ i | o ] csum ++.RB [ no ] "" [ i | o ] csum + ] [ + .BI hoplimit " TTL " + ] [ +@@ -884,7 +890,7 @@ the following additional arguments are supported: + It must be an address on another interface on this host. + + .sp +-.RB [ i | o ] seq ++.RB [ no ] "" [ i | o ] seq + - serialize packets. + The + .B oseq +@@ -894,7 +900,10 @@ The + flag requires that all input packets are serialized. + + .sp +-.RB [ i | o ] key " \fIKEY" ++.RB [ i | o ] key ++.I KEY ++| ++.BR no [ i | o ] key + - use keyed GRE with key + .IR KEY ". "KEY + is either a number or an IPv4 address-like dotted quad. +@@ -906,7 +915,7 @@ The + parameters specify different keys for input and output. + + .sp +-.RB [ i | o ] csum ++.RB [ no ] "" [ i | o ] csum + - generate/require checksums for tunneled packets. + The + .B ocsum +-- +2.20.1 + diff --git a/SOURCES/0131-tc_filter-add-support-for-chain-index.patch b/SOURCES/0131-tc_filter-add-support-for-chain-index.patch new file mode 100644 index 0000000..4fe63f0 --- /dev/null +++ b/SOURCES/0131-tc_filter-add-support-for-chain-index.patch @@ -0,0 +1,250 @@ +From 55c511b5caab0bfb9997bca9031947a45fe7854b Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 5 Jun 2019 13:09:39 +0200 +Subject: [PATCH] tc_filter: add support for chain index + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1714660 +Upstream Status: iproute2.git commit 732f03461bc48 + +commit 732f03461bc48cf94946ee3cc92ab5832862b989 +Author: Jiri Pirko +Date: Tue May 16 19:29:35 2017 +0200 + + tc_filter: add support for chain index + + Allow user to put filter to a specific chain identified by index. + + Signed-off-by: Jiri Pirko +--- + tc/tc_filter.c | 87 +++++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 72 insertions(+), 15 deletions(-) + +diff --git a/tc/tc_filter.c b/tc/tc_filter.c +index a6bb73d12eaba..8dbebf1ffa32a 100644 +--- a/tc/tc_filter.c ++++ b/tc/tc_filter.c +@@ -31,7 +31,7 @@ static void usage(void) + fprintf(stderr, + "Usage: tc filter [ add | del | change | replace | show ] dev STRING\n" + "Usage: tc filter get dev STRING parent CLASSID protocol PROTO handle FILTERID pref PRIO FILTER_TYPE\n" +- " [ pref PRIO ] protocol PROTO\n" ++ " [ pref PRIO ] protocol PROTO [ chain CHAIN_INDEX ]\n" + " [ estimator INTERVAL TIME_CONSTANT ]\n" + " [ root | ingress | egress | parent CLASSID ]\n" + " [ handle FILTERID ] [ [ FILTER_TYPE ] [ help | OPTIONS ] ]\n" +@@ -59,6 +59,8 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv) + __u32 prio = 0; + __u32 protocol = 0; + int protocol_set = 0; ++ __u32 chain_index; ++ int chain_index_set = 0; + char *fhandle = NULL; + char d[16] = {}; + char k[16] = {}; +@@ -127,6 +129,13 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv) + invarg("invalid protocol", *argv); + protocol = id; + protocol_set = 1; ++ } else if (matches(*argv, "chain") == 0) { ++ NEXT_ARG(); ++ if (chain_index_set) ++ duparg("chain", *argv); ++ if (get_u32(&chain_index, *argv, 0)) ++ invarg("invalid chain index value", *argv); ++ chain_index_set = 1; + } else if (matches(*argv, "estimator") == 0) { + if (parse_estimator(&argc, &argv, &est) < 0) + return -1; +@@ -146,6 +155,9 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv) + + req.t.tcm_info = TC_H_MAKE(prio<<16, protocol); + ++ if (chain_index_set) ++ addattr32(&req.n, sizeof(req), TCA_CHAIN, chain_index); ++ + if (k[0]) + addattr_l(&req.n, sizeof(req), TCA_KIND, k, strlen(k)+1); + +@@ -167,6 +179,7 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv) + return -1; + } + } ++ + if (est.ewma_log) + addattr_l(&req.n, sizeof(req), TCA_RATE, &est, sizeof(est)); + +@@ -193,6 +206,8 @@ static __u32 filter_parent; + static int filter_ifindex; + static __u32 filter_prio; + static __u32 filter_protocol; ++static __u32 filter_chain_index; ++static int filter_chain_index_set; + __u16 f_proto; + + int print_filter(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) +@@ -270,6 +285,15 @@ int print_filter(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) + } + } + fprintf(fp, "%s ", rta_getattr_str(tb[TCA_KIND])); ++ ++ if (tb[TCA_CHAIN]) { ++ __u32 chain_index = rta_getattr_u32(tb[TCA_CHAIN]); ++ ++ if (!filter_chain_index_set || ++ filter_chain_index != chain_index) ++ fprintf(fp, "chain %u ", chain_index); ++ } ++ + q = get_filter_kind(RTA_DATA(tb[TCA_KIND])); + if (tb[TCA_OPTIONS]) { + if (q) +@@ -312,6 +336,8 @@ static int tc_filter_get(int cmd, unsigned int flags, int argc, char **argv) + __u32 prio = 0; + __u32 protocol = 0; + int protocol_set = 0; ++ __u32 chain_index; ++ int chain_index_set = 0; + __u32 parent_handle = 0; + char *fhandle = NULL; + char d[16] = {}; +@@ -376,6 +402,13 @@ static int tc_filter_get(int cmd, unsigned int flags, int argc, char **argv) + invarg("invalid protocol", *argv); + protocol = id; + protocol_set = 1; ++ } else if (matches(*argv, "chain") == 0) { ++ NEXT_ARG(); ++ if (chain_index_set) ++ duparg("chain", *argv); ++ if (get_u32(&chain_index, *argv, 0)) ++ invarg("invalid chain index value", *argv); ++ chain_index_set = 1; + } else if (matches(*argv, "help") == 0) { + usage(); + return 0; +@@ -405,6 +438,9 @@ static int tc_filter_get(int cmd, unsigned int flags, int argc, char **argv) + + req.t.tcm_info = TC_H_MAKE(prio<<16, protocol); + ++ if (chain_index_set) ++ addattr32(&req.n, sizeof(req), TCA_CHAIN, chain_index); ++ + if (req.t.tcm_parent == TC_H_UNSPEC) { + fprintf(stderr, "Must specify filter parent\n"); + return -1; +@@ -462,10 +498,20 @@ static int tc_filter_get(int cmd, unsigned int flags, int argc, char **argv) + + static int tc_filter_list(int argc, char **argv) + { +- struct tcmsg t = { .tcm_family = AF_UNSPEC }; ++ struct { ++ struct nlmsghdr n; ++ struct tcmsg t; ++ char buf[MAX_MSG]; ++ } req = { ++ .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), ++ .n.nlmsg_type = RTM_GETTFILTER, ++ .t.tcm_parent = TC_H_UNSPEC, ++ .t.tcm_family = AF_UNSPEC, ++ }; + char d[16] = {}; + __u32 prio = 0; + __u32 protocol = 0; ++ __u32 chain_index; + char *fhandle = NULL; + + while (argc > 0) { +@@ -475,39 +521,39 @@ static int tc_filter_list(int argc, char **argv) + duparg("dev", *argv); + strncpy(d, *argv, sizeof(d)-1); + } else if (strcmp(*argv, "root") == 0) { +- if (t.tcm_parent) { ++ if (req.t.tcm_parent) { + fprintf(stderr, + "Error: \"root\" is duplicate parent ID\n"); + return -1; + } +- filter_parent = t.tcm_parent = TC_H_ROOT; ++ filter_parent = req.t.tcm_parent = TC_H_ROOT; + } else if (strcmp(*argv, "ingress") == 0) { +- if (t.tcm_parent) { ++ if (req.t.tcm_parent) { + fprintf(stderr, + "Error: \"ingress\" is duplicate parent ID\n"); + return -1; + } + filter_parent = TC_H_MAKE(TC_H_CLSACT, + TC_H_MIN_INGRESS); +- t.tcm_parent = filter_parent; ++ req.t.tcm_parent = filter_parent; + } else if (strcmp(*argv, "egress") == 0) { +- if (t.tcm_parent) { ++ if (req.t.tcm_parent) { + fprintf(stderr, + "Error: \"egress\" is duplicate parent ID\n"); + return -1; + } + filter_parent = TC_H_MAKE(TC_H_CLSACT, + TC_H_MIN_EGRESS); +- t.tcm_parent = filter_parent; ++ req.t.tcm_parent = filter_parent; + } else if (strcmp(*argv, "parent") == 0) { + __u32 handle; + + NEXT_ARG(); +- if (t.tcm_parent) ++ if (req.t.tcm_parent) + duparg("parent", *argv); + if (get_tc_classid(&handle, *argv)) + invarg("invalid parent ID", *argv); +- filter_parent = t.tcm_parent = handle; ++ filter_parent = req.t.tcm_parent = handle; + } else if (strcmp(*argv, "handle") == 0) { + NEXT_ARG(); + if (fhandle) +@@ -531,6 +577,14 @@ static int tc_filter_list(int argc, char **argv) + invarg("invalid protocol", *argv); + protocol = res; + filter_protocol = protocol; ++ } else if (matches(*argv, "chain") == 0) { ++ NEXT_ARG(); ++ if (filter_chain_index_set) ++ duparg("chain", *argv); ++ if (get_u32(&chain_index, *argv, 0)) ++ invarg("invalid chain index value", *argv); ++ filter_chain_index_set = 1; ++ filter_chain_index = chain_index; + } else if (matches(*argv, "help") == 0) { + usage(); + } else { +@@ -543,20 +597,23 @@ static int tc_filter_list(int argc, char **argv) + argc--; argv++; + } + +- t.tcm_info = TC_H_MAKE(prio<<16, protocol); ++ req.t.tcm_info = TC_H_MAKE(prio<<16, protocol); + + ll_init_map(&rth); + + if (d[0]) { +- t.tcm_ifindex = ll_name_to_index(d); +- if (t.tcm_ifindex == 0) { ++ req.t.tcm_ifindex = ll_name_to_index(d); ++ if (req.t.tcm_ifindex == 0) { + fprintf(stderr, "Cannot find device \"%s\"\n", d); + return 1; + } +- filter_ifindex = t.tcm_ifindex; ++ filter_ifindex = req.t.tcm_ifindex; + } + +- if (rtnl_dump_request(&rth, RTM_GETTFILTER, &t, sizeof(t)) < 0) { ++ if (filter_chain_index_set) ++ addattr32(&req.n, sizeof(req), TCA_CHAIN, chain_index); ++ ++ if (rtnl_dump_request_n(&rth, &req.n) < 0) { + perror("Cannot send dump request"); + return 1; + } +-- +2.20.1 + diff --git a/SOURCES/0132-tc-actions-add-helpers-to-parse-and-print-control-ac.patch b/SOURCES/0132-tc-actions-add-helpers-to-parse-and-print-control-ac.patch new file mode 100644 index 0000000..7d1a3b4 --- /dev/null +++ b/SOURCES/0132-tc-actions-add-helpers-to-parse-and-print-control-ac.patch @@ -0,0 +1,772 @@ +From 23f1822fa8129326de4709d643f41cf26b6bae88 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 5 Jun 2019 13:09:39 +0200 +Subject: [PATCH] tc: actions: add helpers to parse and print control actions + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1714660 +Upstream Status: iproute2.git commit e67aba5595811 +Conflicts: context change due to out-of-order cherry-pick of + commit 73aa988868e7e ("tc/m_gact: Drop dead code") + +commit e67aba559581143f9bc34f0706b0c3feeeab08fa +Author: Jiri Pirko +Date: Tue May 16 19:29:36 2017 +0200 + + tc: actions: add helpers to parse and print control actions + + Each tc action is terminated by a control action. Each action parses and + prints then intividually. Introduce set of helpers and allow to share + this code. + + Signed-off-by: Jiri Pirko +--- + tc/m_bpf.c | 8 +-- + tc/m_connmark.c | 4 +- + tc/m_csum.c | 9 ++- + tc/m_gact.c | 45 ++++----------- + tc/m_ife.c | 10 ++-- + tc/m_mirred.c | 10 ++-- + tc/m_nat.c | 11 ++-- + tc/m_pedit.c | 8 +-- + tc/m_police.c | 50 +++++------------ + tc/m_sample.c | 4 +- + tc/m_simple.c | 3 - + tc/m_skbedit.c | 7 +-- + tc/m_skbmod.c | 30 +--------- + tc/m_tunnel_key.c | 8 +-- + tc/m_vlan.c | 9 ++- + tc/tc_util.c | 137 +++++++++++++++++++++++++++++++++++++++++++++- + tc/tc_util.h | 11 +++- + 17 files changed, 209 insertions(+), 155 deletions(-) + +diff --git a/tc/m_bpf.c b/tc/m_bpf.c +index 1ddc334f2f21b..57283030a35f5 100644 +--- a/tc/m_bpf.c ++++ b/tc/m_bpf.c +@@ -75,7 +75,7 @@ static int bpf_parse_opt(struct action_util *a, int *ptr_argc, char ***ptr_argv, + int tca_id, struct nlmsghdr *n) + { + const char *bpf_obj = NULL, *bpf_uds_name = NULL; +- struct tc_act_bpf parm = { .action = TC_ACT_PIPE }; ++ struct tc_act_bpf parm = {}; + struct bpf_cfg_in cfg = {}; + bool seen_run = false; + struct rtattr *tail; +@@ -123,8 +123,8 @@ opt_bpf: + NEXT_ARG_FWD(); + } + +- if (argc && !action_a2n(*argv, &parm.action, false)) +- NEXT_ARG_FWD(); ++ parse_action_control_dflt(&argc, &argv, &parm.action, ++ false, TC_ACT_PIPE); + + if (argc) { + if (matches(*argv, "index") == 0) { +@@ -186,7 +186,7 @@ static int bpf_print_opt(struct action_util *au, FILE *f, struct rtattr *arg) + b, sizeof(b))); + } + +- fprintf(f, "default-action %s\n", action_n2a(parm->action)); ++ print_action_control(f, "default-action ", parm->action, "\n"); + fprintf(f, "\tindex %u ref %d bind %d", parm->index, parm->refcnt, + parm->bindcnt); + +diff --git a/tc/m_connmark.c b/tc/m_connmark.c +index 295f90d52eefd..3c2274bc0d2af 100644 +--- a/tc/m_connmark.c ++++ b/tc/m_connmark.c +@@ -80,9 +80,7 @@ parse_connmark(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, + } + } + +- sel.action = TC_ACT_PIPE; +- if (argc && !action_a2n(*argv, &sel.action, false)) +- NEXT_ARG_FWD(); ++ parse_action_control_dflt(&argc, &argv, &sel.action, false, TC_ACT_PIPE); + + if (argc) { + if (matches(*argv, "index") == 0) { +diff --git a/tc/m_csum.c b/tc/m_csum.c +index 0ee8cad3fbe4c..7b156734f64c5 100644 +--- a/tc/m_csum.c ++++ b/tc/m_csum.c +@@ -123,8 +123,7 @@ parse_csum(struct action_util *a, int *argc_p, + return -1; + } + +- if (argc && !action_a2n(*argv, &sel.action, false)) +- NEXT_ARG_FWD(); ++ parse_action_control_dflt(&argc, &argv, &sel.action, false, TC_ACT_OK); + + if (argc) { + if (matches(*argv, "index") == 0) { +@@ -200,10 +199,10 @@ print_csum(struct action_util *au, FILE *f, struct rtattr *arg) + uflag_1 = "?empty"; + } + +- fprintf(f, "csum (%s%s%s%s%s%s%s) action %s\n", ++ fprintf(f, "csum (%s%s%s%s%s%s%s) ", + uflag_1, uflag_2, uflag_3, +- uflag_4, uflag_5, uflag_6, uflag_7, +- action_n2a(sel->action)); ++ uflag_4, uflag_5, uflag_6, uflag_7); ++ print_action_control(f, "action ", sel->action, "\n"); + fprintf(f, "\tindex %u ref %d bind %d", sel->index, sel->refcnt, + sel->bindcnt); + +diff --git a/tc/m_gact.c b/tc/m_gact.c +index 0cb5222fd3817..bc3860bbe4441 100644 +--- a/tc/m_gact.c ++++ b/tc/m_gact.c +@@ -68,26 +68,13 @@ usage(void) + exit(-1); + } + +-static int +-get_act(char ***argv_p) +-{ +- int n; +- +- if (action_a2n(**argv_p, &n, false)) { +- fprintf(stderr, "bad action type %s\n", **argv_p); +- return -10; +- } +- return n; +-} +- + static int + parse_gact(struct action_util *a, int *argc_p, char ***argv_p, + int tca_id, struct nlmsghdr *n) + { + int argc = *argc_p; + char **argv = *argv_p; +- int action = TC_POLICE_RECLASSIFY; +- struct tc_gact p = { .action = TC_POLICE_RECLASSIFY }; ++ struct tc_gact p = { 0 }; + #ifdef CONFIG_GACT_PROB + int rd = 0; + struct tc_gact_p pp; +@@ -101,16 +88,8 @@ parse_gact(struct action_util *a, int *argc_p, char ***argv_p, + if (matches(*argv, "gact") == 0) { + argc--; + argv++; +- } else { +- action = get_act(&argv); +- if (action != -10) { +- p.action = action; +- argc--; +- argv++; +- } else { +- explain(); +- return action; +- } ++ } else if (parse_action_control(&argc, &argv, &p.action, false) == -1) { ++ usage(); + } + + #ifdef CONFIG_GACT_PROB +@@ -129,13 +108,9 @@ parse_gact(struct action_util *a, int *argc_p, char ***argv_p, + return -1; + } + +- action = get_act(&argv); +- if (action != -10) { /* FIXME */ +- pp.paction = action; +- } else { +- explain(); +- return -1; +- } ++ if (parse_action_control(&argc, &argv, ++ &pp.paction, false) == -1) ++ usage(); + argc--; + argv++; + if (get_u16(&pp.pval, *argv, 10)) { +@@ -204,7 +179,8 @@ print_gact(struct action_util *au, FILE * f, struct rtattr *arg) + } + p = RTA_DATA(tb[TCA_GACT_PARMS]); + +- fprintf(f, "gact action %s", action_n2a(p->action)); ++ fprintf(f, "gact "); ++ print_action_control(f, "action ", p->action, ""); + #ifdef CONFIG_GACT_PROB + if (tb[TCA_GACT_PROB] != NULL) { + pp = RTA_DATA(tb[TCA_GACT_PROB]); +@@ -213,8 +189,9 @@ print_gact(struct action_util *au, FILE * f, struct rtattr *arg) + memset(&pp_dummy, 0, sizeof(pp_dummy)); + pp = &pp_dummy; + } +- fprintf(f, "\n\t random type %s %s val %d", +- prob_n2a(pp->ptype), action_n2a(pp->paction), pp->pval); ++ fprintf(f, "\n\t random type %s", prob_n2a(pp->ptype)); ++ print_action_control(f, " ", pp->paction, " "); ++ fprintf(f, "val %d", pp->pval); + #endif + fprintf(f, "\n\t index %u ref %d bind %d", p->index, p->refcnt, + p->bindcnt); +diff --git a/tc/m_ife.c b/tc/m_ife.c +index f6131b1332324..e3521e62c178c 100644 +--- a/tc/m_ife.c ++++ b/tc/m_ife.c +@@ -57,7 +57,7 @@ static int parse_ife(struct action_util *a, int *argc_p, char ***argv_p, + int argc = *argc_p; + char **argv = *argv_p; + int ok = 0; +- struct tc_ife p = { .action = TC_ACT_PIPE }; /* good default */ ++ struct tc_ife p = { 0 }; + struct rtattr *tail; + struct rtattr *tail2; + char dbuf[ETH_ALEN]; +@@ -156,8 +156,7 @@ static int parse_ife(struct action_util *a, int *argc_p, char ***argv_p, + argv++; + } + +- if (argc && !action_a2n(*argv, &p.action, false)) +- NEXT_ARG_FWD(); ++ parse_action_control_dflt(&argc, &argv, &p.action, false, TC_ACT_PIPE); + + if (argc) { + if (matches(*argv, "index") == 0) { +@@ -245,9 +244,8 @@ static int print_ife(struct action_util *au, FILE *f, struct rtattr *arg) + } + p = RTA_DATA(tb[TCA_IFE_PARMS]); + +- fprintf(f, "ife %s action %s ", +- (p->flags & IFE_ENCODE) ? "encode" : "decode", +- action_n2a(p->action)); ++ fprintf(f, "ife %s ", p->flags & IFE_ENCODE ? "encode" : "decode"); ++ print_action_control(f, "action ", p->action, " "); + + if (tb[TCA_IFE_TYPE]) { + ife_type = rta_getattr_u16(tb[TCA_IFE_TYPE]); +diff --git a/tc/m_mirred.c b/tc/m_mirred.c +index e9438904fdf50..2384bda1ff045 100644 +--- a/tc/m_mirred.c ++++ b/tc/m_mirred.c +@@ -170,10 +170,8 @@ parse_direction(struct action_util *a, int *argc_p, char ***argv_p, + } + + +- if (argc && +- (p.eaction == TCA_EGRESS_MIRROR || p.eaction == TCA_INGRESS_MIRROR) +- && !action_a2n(*argv, &p.action, false)) +- NEXT_ARG(); ++ if (p.eaction == TCA_EGRESS_MIRROR || p.eaction == TCA_INGRESS_MIRROR) ++ parse_action_control(&argc, &argv, &p.action, false); + + if (argc) { + if (iok && matches(*argv, "index") == 0) { +@@ -272,8 +270,8 @@ print_mirred(struct action_util *au, FILE * f, struct rtattr *arg) + return -1; + } + +- fprintf(f, "mirred (%s to device %s) %s", +- mirred_n2a(p->eaction), dev, action_n2a(p->action)); ++ fprintf(f, "mirred (%s to device %s)", mirred_n2a(p->eaction), dev); ++ print_action_control(f, " ", p->action, ""); + + fprintf(f, "\n "); + fprintf(f, "\tindex %u ref %d bind %d", p->index, p->refcnt, +diff --git a/tc/m_nat.c b/tc/m_nat.c +index 525f185e2c082..31b68fb6bd784 100644 +--- a/tc/m_nat.c ++++ b/tc/m_nat.c +@@ -115,8 +115,7 @@ parse_nat(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, struct + return -1; + } + +- if (argc && !action_a2n(*argv, &sel.action, false)) +- NEXT_ARG_FWD(); ++ parse_action_control_dflt(&argc, &argv, &sel.action, false, TC_ACT_OK); + + if (argc) { + if (matches(*argv, "index") == 0) { +@@ -164,12 +163,12 @@ print_nat(struct action_util *au, FILE * f, struct rtattr *arg) + len = ffs(sel->mask); + len = len ? 33 - len : 0; + +- fprintf(f, " nat %s %s/%d %s %s", sel->flags & TCA_NAT_FLAG_EGRESS ? +- "egress" : "ingress", ++ fprintf(f, " nat %s %s/%d %s", sel->flags & TCA_NAT_FLAG_EGRESS ? ++ "egress" : "ingress", + format_host_r(AF_INET, 4, &sel->old_addr, buf1, sizeof(buf1)), + len, +- format_host_r(AF_INET, 4, &sel->new_addr, buf2, sizeof(buf2)), +- action_n2a(sel->action)); ++ format_host_r(AF_INET, 4, &sel->new_addr, buf2, sizeof(buf2))); ++ print_action_control(f, " ", sel->action, ""); + + if (show_stats) { + if (tb[TCA_NAT_TM]) { +diff --git a/tc/m_pedit.c b/tc/m_pedit.c +index dfa6b2c4835e9..b7d26b4540beb 100644 +--- a/tc/m_pedit.c ++++ b/tc/m_pedit.c +@@ -670,8 +670,7 @@ int parse_pedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, + return -1; + } + +- if (argc && !action_a2n(*argv, &sel.sel.action, false)) +- NEXT_ARG(); ++ parse_action_control_dflt(&argc, &argv, &sel.sel.action, false, TC_ACT_OK); + + if (argc) { + if (matches(*argv, "index") == 0) { +@@ -776,8 +775,9 @@ int print_pedit(struct action_util *au, FILE *f, struct rtattr *arg) + } + } + +- fprintf(f, " pedit action %s keys %d\n ", +- action_n2a(sel->action), sel->nkeys); ++ fprintf(f, " pedit "); ++ print_action_control(f, "action ", sel->action, " "); ++ fprintf(f,"keys %d\n ", sel->nkeys); + fprintf(f, "\t index %u ref %d bind %d", sel->index, sel->refcnt, + sel->bindcnt); + +diff --git a/tc/m_police.c b/tc/m_police.c +index 226e20e4e8005..2b73969de5daf 100644 +--- a/tc/m_police.c ++++ b/tc/m_police.c +@@ -50,27 +50,6 @@ static void explain1(char *arg) + fprintf(stderr, "Illegal \"%s\"\n", arg); + } + +-static int get_police_result(int *action, int *result, char *arg) +-{ +- char *p = strchr(arg, '/'); +- +- if (p) +- *p = 0; +- +- if (action_a2n(arg, action, true)) { +- if (p) +- *p = '/'; +- return -1; +- } +- +- if (p) { +- *p = '/'; +- if (action_a2n(p+1, result, true)) +- return -1; +- } +- return 0; +-} +- + int act_parse_police(struct action_util *a, int *argc_p, char ***argv_p, + int tca_id, struct nlmsghdr *n) + { +@@ -166,23 +145,19 @@ int act_parse_police(struct action_util *a, int *argc_p, char ***argv_p, + explain1("peakrate"); + return -1; + } +- } else if (matches(*argv, "reclassify") == 0) { +- p.action = TC_POLICE_RECLASSIFY; +- } else if (matches(*argv, "drop") == 0 || +- matches(*argv, "shot") == 0) { +- p.action = TC_POLICE_SHOT; +- } else if (matches(*argv, "continue") == 0) { +- p.action = TC_POLICE_UNSPEC; +- } else if (matches(*argv, "pass") == 0) { +- p.action = TC_POLICE_OK; +- } else if (matches(*argv, "pipe") == 0) { +- p.action = TC_POLICE_PIPE; ++ } else if (matches(*argv, "reclassify") == 0 || ++ matches(*argv, "drop") == 0 || ++ matches(*argv, "shot") == 0 || ++ matches(*argv, "continue") == 0 || ++ matches(*argv, "pass") == 0 || ++ matches(*argv, "pipe") == 0) { ++ if (parse_action_control(&argc, &argv, &p.action, false)) ++ return -1; + } else if (strcmp(*argv, "conform-exceed") == 0) { + NEXT_ARG(); +- if (get_police_result(&p.action, &presult, *argv)) { +- fprintf(stderr, "Illegal \"action\"\n"); ++ if (parse_action_control_slash(&argc, &argv, &p.action, ++ &presult, true)) + return -1; +- } + } else if (matches(*argv, "overhead") == 0) { + NEXT_ARG(); + if (get_u16(&overhead, *argv, 10)) { +@@ -318,12 +293,13 @@ int print_police(struct action_util *a, FILE *f, struct rtattr *arg) + fprintf(f, "avrate %s ", + sprint_rate(rta_getattr_u32(tb[TCA_POLICE_AVRATE]), + b1)); +- fprintf(f, "action %s", action_n2a(p->action)); ++ ++ print_action_control(f, "action ", p->action, ""); + + if (tb[TCA_POLICE_RESULT]) { + __u32 action = rta_getattr_u32(tb[TCA_POLICE_RESULT]); + +- fprintf(f, "/%s ", action_n2a(action)); ++ print_action_control(f, "/", action, " "); + } else + fprintf(f, " "); + +diff --git a/tc/m_sample.c b/tc/m_sample.c +index 9291109071a89..ff5ee6bd1ef63 100644 +--- a/tc/m_sample.c ++++ b/tc/m_sample.c +@@ -98,9 +98,7 @@ static int parse_sample(struct action_util *a, int *argc_p, char ***argv_p, + NEXT_ARG_FWD(); + } + +- p.action = TC_ACT_PIPE; +- if (argc && !action_a2n(*argv, &p.action, false)) +- NEXT_ARG_FWD(); ++ parse_action_control_dflt(&argc, &argv, &p.action, false, TC_ACT_PIPE); + + if (argc) { + if (matches(*argv, "index") == 0) { +diff --git a/tc/m_simple.c b/tc/m_simple.c +index 65e48addf161b..f8937bcabb7ae 100644 +--- a/tc/m_simple.c ++++ b/tc/m_simple.c +@@ -120,9 +120,6 @@ parse_simple(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, + } + } + +- if (argc && !action_a2n(*argv, &sel.action, false)) +- NEXT_ARG_FWD(); +- + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); +diff --git a/tc/m_skbedit.c b/tc/m_skbedit.c +index 638715f679d37..aa374fcb33ed9 100644 +--- a/tc/m_skbedit.c ++++ b/tc/m_skbedit.c +@@ -120,9 +120,8 @@ parse_skbedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, + argv++; + } + +- sel.action = TC_ACT_PIPE; +- if (argc && !action_a2n(*argv, &sel.action, false)) +- NEXT_ARG(); ++ parse_action_control_dflt(&argc, &argv, &sel.action, ++ false, TC_ACT_PIPE); + + if (argc) { + if (matches(*argv, "index") == 0) { +@@ -214,7 +213,7 @@ static int print_skbedit(struct action_util *au, FILE *f, struct rtattr *arg) + fprintf(f, " ptype %d", *ptype); + } + +- fprintf(f, " %s", action_n2a(p->action)); ++ print_action_control(f, " ", p->action, ""); + + fprintf(f, "\n\t index %u ref %d bind %d", + p->index, p->refcnt, p->bindcnt); +diff --git a/tc/m_skbmod.c b/tc/m_skbmod.c +index acb7771d2901b..1ccd474309348 100644 +--- a/tc/m_skbmod.c ++++ b/tc/m_skbmod.c +@@ -61,7 +61,6 @@ static int parse_skbmod(struct action_util *a, int *argc_p, char ***argv_p, + char *saddr = NULL; + + memset(&p, 0, sizeof(p)); +- p.action = TC_ACT_PIPE; /* good default */ + + if (argc <= 0) + return -1; +@@ -123,31 +122,7 @@ static int parse_skbmod(struct action_util *a, int *argc_p, char ***argv_p, + argv++; + } + +- if (argc) { +- if (matches(*argv, "reclassify") == 0) { +- p.action = TC_ACT_RECLASSIFY; +- argc--; +- argv++; +- } else if (matches(*argv, "pipe") == 0) { +- p.action = TC_ACT_PIPE; +- argc--; +- argv++; +- } else if (matches(*argv, "drop") == 0 || +- matches(*argv, "shot") == 0) { +- p.action = TC_ACT_SHOT; +- argc--; +- argv++; +- } else if (matches(*argv, "continue") == 0) { +- p.action = TC_ACT_UNSPEC; +- argc--; +- argv++; +- } else if (matches(*argv, "pass") == 0 || +- matches(*argv, "ok") == 0) { +- p.action = TC_ACT_OK; +- argc--; +- argv++; +- } +- } ++ parse_action_control_dflt(&argc, &argv, &p.action, false, TC_ACT_PIPE); + + if (argc) { + if (matches(*argv, "index") == 0) { +@@ -206,7 +181,8 @@ static int print_skbmod(struct action_util *au, FILE *f, struct rtattr *arg) + + p = RTA_DATA(tb[TCA_SKBMOD_PARMS]); + +- fprintf(f, "skbmod action %s ", action_n2a(p->action)); ++ fprintf(f, "skbmod "); ++ print_action_control(f, "", p->action, " "); + + if (tb[TCA_SKBMOD_ETYPE]) { + skbmod_etype = rta_getattr_u16(tb[TCA_SKBMOD_ETYPE]); +diff --git a/tc/m_tunnel_key.c b/tc/m_tunnel_key.c +index 60fd1c464e531..cdde64a15b929 100644 +--- a/tc/m_tunnel_key.c ++++ b/tc/m_tunnel_key.c +@@ -99,7 +99,7 @@ static int tunnel_key_parse_tos_ttl(char *str, int type, struct nlmsghdr *n) + static int parse_tunnel_key(struct action_util *a, int *argc_p, char ***argv_p, + int tca_id, struct nlmsghdr *n) + { +- struct tc_tunnel_key parm = { .action = TC_ACT_PIPE }; ++ struct tc_tunnel_key parm = {}; + char **argv = *argv_p; + int argc = *argc_p; + struct rtattr *tail; +@@ -194,8 +194,8 @@ static int parse_tunnel_key(struct action_util *a, int *argc_p, char ***argv_p, + NEXT_ARG_FWD(); + } + +- if (argc && !action_a2n(*argv, &parm.action, false)) +- NEXT_ARG_FWD(); ++ parse_action_control_dflt(&argc, &argv, &parm.action, ++ false, TC_ACT_PIPE); + + if (argc) { + if (matches(*argv, "index") == 0) { +@@ -318,7 +318,7 @@ static int print_tunnel_key(struct action_util *au, FILE *f, struct rtattr *arg) + tb[TCA_TUNNEL_KEY_ENC_TTL]); + break; + } +- fprintf(f, " %s", action_n2a(parm->action)); ++ print_action_control(f, " ", parm->action, ""); + + fprintf(f, "\n\tindex %d ref %d bind %d", parm->index, parm->refcnt, + parm->bindcnt); +diff --git a/tc/m_vlan.c b/tc/m_vlan.c +index 44b9375966da3..2441b06847ecd 100644 +--- a/tc/m_vlan.c ++++ b/tc/m_vlan.c +@@ -59,7 +59,7 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p, + int proto_set = 0; + __u8 prio; + int prio_set = 0; +- struct tc_vlan parm = { 0 }; ++ struct tc_vlan parm = {}; + + if (matches(*argv, "vlan") != 0) + return -1; +@@ -133,9 +133,8 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p, + argv++; + } + +- parm.action = TC_ACT_PIPE; +- if (argc && !action_a2n(*argv, &parm.action, false)) +- NEXT_ARG_FWD(); ++ parse_action_control_dflt(&argc, &argv, &parm.action, ++ false, TC_ACT_PIPE); + + if (argc) { + if (matches(*argv, "index") == 0) { +@@ -224,7 +223,7 @@ static int print_vlan(struct action_util *au, FILE *f, struct rtattr *arg) + } + break; + } +- fprintf(f, " %s", action_n2a(parm->action)); ++ print_action_control(f, " ", parm->action, ""); + + fprintf(f, "\n\t index %u ref %d bind %d", parm->index, parm->refcnt, + parm->bindcnt); +diff --git a/tc/tc_util.c b/tc/tc_util.c +index 296825ae174e0..840222832690b 100644 +--- a/tc/tc_util.c ++++ b/tc/tc_util.c +@@ -411,7 +411,7 @@ char *sprint_qdisc_handle(__u32 h, char *buf) + return buf; + } + +-const char *action_n2a(int action) ++static const char *action_n2a(int action) + { + static char buf[64]; + +@@ -443,7 +443,7 @@ const char *action_n2a(int action) + * + * In error case, returns -1 and does not touch @result. Otherwise returns 0. + */ +-int action_a2n(char *arg, int *result, bool allow_num) ++static int action_a2n(char *arg, int *result, bool allow_num) + { + int n; + char dummy; +@@ -474,6 +474,139 @@ int action_a2n(char *arg, int *result, bool allow_num) + return 0; + } + ++/* Parse action control including possible options. ++ * ++ * Parameters: ++ * @argc_p - pointer to argc to parse ++ * @argv_p - pointer to argv to parse ++ * @result_p - pointer to output variable ++ * @allow_num - whether action may be in numeric format already ++ * ++ * In error case, returns -1 and does not touch @result_1p. Otherwise returns 0. ++ */ ++int parse_action_control(int *argc_p, char ***argv_p, ++ int *result_p, bool allow_num) ++{ ++ int argc = *argc_p; ++ char **argv = *argv_p; ++ int result; ++ ++ if (!argc) ++ return -1; ++ if (action_a2n(*argv, &result, allow_num) == -1) { ++ fprintf(stderr, "Bad action type %s\n", *argv); ++ return -1; ++ } ++ NEXT_ARG_FWD(); ++ *argc_p = argc; ++ *argv_p = argv; ++ *result_p = result; ++ return 0; ++} ++ ++/* Parse action control including possible options. ++ * ++ * Parameters: ++ * @argc_p - pointer to argc to parse ++ * @argv_p - pointer to argv to parse ++ * @result_p - pointer to output variable ++ * @allow_num - whether action may be in numeric format already ++ * @default_result - set as a result in case of parsing error ++ * ++ * In case there is an error during parsing, the default result is used. ++ */ ++void parse_action_control_dflt(int *argc_p, char ***argv_p, ++ int *result_p, bool allow_num, ++ int default_result) ++{ ++ if (parse_action_control(argc_p, argv_p, result_p, allow_num)) ++ *result_p = default_result; ++} ++ ++static int parse_action_control_slash_spaces(int *argc_p, char ***argv_p, ++ int *result1_p, int *result2_p, ++ bool allow_num) ++{ ++ int argc = *argc_p; ++ char **argv = *argv_p; ++ int result1, result2; ++ int *result_p = &result1; ++ int ok = 0; ++ int ret; ++ ++ while (argc > 0) { ++ switch (ok) { ++ case 1: ++ if (strcmp(*argv, "/") != 0) ++ goto out; ++ result_p = &result2; ++ NEXT_ARG(); ++ /* fall-through */ ++ case 0: /* fall-through */ ++ case 2: ++ ret = parse_action_control(&argc, &argv, ++ result_p, allow_num); ++ if (ret) ++ return ret; ++ ok++; ++ break; ++ default: ++ goto out; ++ } ++ } ++out: ++ *result1_p = result1; ++ if (ok == 2) ++ *result2_p = result2; ++ *argc_p = argc; ++ *argv_p = argv; ++ return 0; ++} ++ ++/* Parse action control with slash including possible options. ++ * ++ * Parameters: ++ * @argc_p - pointer to argc to parse ++ * @argv_p - pointer to argv to parse ++ * @result1_p - pointer to the first (before slash) output variable ++ * @result2_p - pointer to the second (after slash) output variable ++ * @allow_num - whether action may be in numeric format already ++ * ++ * In error case, returns -1 and does not touch @result*. Otherwise returns 0. ++ */ ++int parse_action_control_slash(int *argc_p, char ***argv_p, ++ int *result1_p, int *result2_p, bool allow_num) ++{ ++ char **argv = *argv_p; ++ int result1, result2; ++ char *p = strchr(*argv, '/'); ++ ++ if (!p) ++ return parse_action_control_slash_spaces(argc_p, argv_p, ++ result1_p, result2_p, ++ allow_num); ++ *p = 0; ++ if (action_a2n(*argv, &result1, allow_num)) { ++ if (p) ++ *p = '/'; ++ return -1; ++ } ++ ++ *p = '/'; ++ if (action_a2n(p + 1, &result2, allow_num)) ++ return -1; ++ ++ *result1_p = result1; ++ *result2_p = result2; ++ return 0; ++} ++ ++void print_action_control(FILE *f, const char *prefix, ++ int action, const char *suffix) ++{ ++ fprintf(f, "%s%s%s", prefix, action_n2a(action), suffix); ++} ++ + int get_linklayer(unsigned int *val, const char *arg) + { + int res; +diff --git a/tc/tc_util.h b/tc/tc_util.h +index 4db26c6d5e25b..5c54ad384eae6 100644 +--- a/tc/tc_util.h ++++ b/tc/tc_util.h +@@ -100,8 +100,15 @@ char *sprint_tc_classid(__u32 h, char *buf); + int tc_print_police(FILE *f, struct rtattr *tb); + int parse_police(int *argc_p, char ***argv_p, int tca_id, struct nlmsghdr *n); + +-const char *action_n2a(int action); +-int action_a2n(char *arg, int *result, bool allow_num); ++int parse_action_control(int *argc_p, char ***argv_p, ++ int *result_p, bool allow_num); ++void parse_action_control_dflt(int *argc_p, char ***argv_p, ++ int *result_p, bool allow_num, ++ int default_result); ++int parse_action_control_slash(int *argc_p, char ***argv_p, ++ int *result1_p, int *result2_p, bool allow_num); ++void print_action_control(FILE *f, const char *prefix, ++ int action, const char *suffix); + int act_parse_police(struct action_util *a, int *argc_p, + char ***argv_p, int tca_id, struct nlmsghdr *n); + int print_police(struct action_util *a, FILE *f, struct rtattr *tb); +-- +2.20.1 + diff --git a/SOURCES/0133-tc-actions-introduce-support-for-goto-chain-action.patch b/SOURCES/0133-tc-actions-introduce-support-for-goto-chain-action.patch new file mode 100644 index 0000000..bd60875 --- /dev/null +++ b/SOURCES/0133-tc-actions-introduce-support-for-goto-chain-action.patch @@ -0,0 +1,247 @@ +From eaccce1b85efafbea1607ff88d7259541f311ee2 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 5 Jun 2019 13:10:31 +0200 +Subject: [PATCH] tc/actions: introduce support for goto chain action + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1714660 +Upstream Status: iproute2.git commit d19f72f7898a7 + +commit d19f72f7898a78ef76628833c204afb96f9a05cd +Author: Jiri Pirko +Date: Tue May 16 19:29:37 2017 +0200 + + tc/actions: introduce support for goto chain action + + Allow user to set control action "goto" with filter chain index as + a parameter. + + Signed-off-by: Jiri Pirko +--- + man/man8/tc-ife.8 | 2 +- + man/man8/tc-pedit.8 | 2 +- + man/man8/tc-police.8 | 2 +- + man/man8/tc-vlan.8 | 2 +- + tc/m_connmark.c | 3 ++- + tc/m_gact.c | 6 ++++-- + tc/m_pedit.c | 3 ++- + tc/m_police.c | 6 ++++-- + tc/m_skbmod.c | 3 ++- + tc/m_vlan.c | 3 ++- + tc/tc_util.c | 24 +++++++++++++++++++++++- + 11 files changed, 43 insertions(+), 13 deletions(-) + +diff --git a/man/man8/tc-ife.8 b/man/man8/tc-ife.8 +index a8f1f287d1502..24595cc6d615c 100644 +--- a/man/man8/tc-ife.8 ++++ b/man/man8/tc-ife.8 +@@ -34,7 +34,7 @@ IFE - encapsulate/decapsulate metadata + + .ti -8 + .IR CONTROL " := { " +-.BR reclassify " | " use " | " pipe " | " drop " | " continue " | " ok " }" ++.BR reclassify " | " use " | " pipe " | " drop " | " continue " | " ok " | " goto " " chain " " CHAIN_INDEX " }" + .SH DESCRIPTION + The + .B ife +diff --git a/man/man8/tc-pedit.8 b/man/man8/tc-pedit.8 +index 82d4217bc9589..bbd725c4d0ba1 100644 +--- a/man/man8/tc-pedit.8 ++++ b/man/man8/tc-pedit.8 +@@ -82,7 +82,7 @@ pedit - generic packet editor action + + .ti -8 + .IR CONTROL " := {" +-.BR reclassify " | " pipe " | " drop " | " shot " | " continue " | " pass " }" ++.BR reclassify " | " pipe " | " drop " | " shot " | " continue " | " pass " | " goto " " chain " " CHAIN_INDEX " }" + .SH DESCRIPTION + The + .B pedit +diff --git a/man/man8/tc-police.8 b/man/man8/tc-police.8 +index 620c28813fc7e..bcc5f438825d1 100644 +--- a/man/man8/tc-police.8 ++++ b/man/man8/tc-police.8 +@@ -30,7 +30,7 @@ police - policing action + + .ti -8 + .IR EXCEEDACT/NOTEXCEEDACT " := { " +-.BR pipe " | " ok " | " reclassify " | " drop " | " continue " }" ++.BR pipe " | " ok " | " reclassify " | " drop " | " continue " | " goto " " chain " " CHAIN_INDEX " }" + .SH DESCRIPTION + The + .B police +diff --git a/man/man8/tc-vlan.8 b/man/man8/tc-vlan.8 +index a526f66b60b4c..f5ffc25f054ed 100644 +--- a/man/man8/tc-vlan.8 ++++ b/man/man8/tc-vlan.8 +@@ -26,7 +26,7 @@ vlan - vlan manipulation module + + .ti -8 + .IR CONTROL " := { " +-.BR reclassify " | " pipe " | " drop " | " continue " | " pass " }" ++.BR reclassify " | " pipe " | " drop " | " continue " | " pass " | " goto " " chain " " CHAIN_INDEX " }" + .SH DESCRIPTION + The + .B vlan +diff --git a/tc/m_connmark.c b/tc/m_connmark.c +index 3c2274bc0d2af..37d7185415490 100644 +--- a/tc/m_connmark.c ++++ b/tc/m_connmark.c +@@ -30,7 +30,8 @@ explain(void) + fprintf(stderr, "Usage: ... connmark [zone ZONE] [CONTROL] [index ]\n"); + fprintf(stderr, "where :\n" + "\tZONE is the conntrack zone\n" +- "\tCONTROL := reclassify|pipe|drop|continue|ok\n"); ++ "\tCONTROL := reclassify | pipe | drop | continue | ok |\n" ++ "\t goto chain \n"); + } + + static void +diff --git a/tc/m_gact.c b/tc/m_gact.c +index bc3860bbe4441..c04c00bbded3c 100644 +--- a/tc/m_gact.c ++++ b/tc/m_gact.c +@@ -45,7 +45,8 @@ explain(void) + #ifdef CONFIG_GACT_PROB + fprintf(stderr, "Usage: ... gact [RAND] [INDEX]\n"); + fprintf(stderr, +- "Where: \tACTION := reclassify | drop | continue | pass | pipe\n" ++ "Where: \tACTION := reclassify | drop | continue | pass | pipe |\n" ++ " \t goto chain \n" + "\tRAND := random \n" + "\tRANDTYPE := netrand | determ\n" + "\tVAL : = value not exceeding 10000\n" +@@ -54,7 +55,8 @@ explain(void) + #else + fprintf(stderr, "Usage: ... gact [INDEX]\n"); + fprintf(stderr, +- "Where: \tACTION := reclassify | drop | continue | pass | pipe\n" ++ "Where: \tACTION := reclassify | drop | continue | pass | pipe |\n" ++ " \t goto chain \n" + "\tINDEX := index value used\n" + "\n"); + #endif +diff --git a/tc/m_pedit.c b/tc/m_pedit.c +index b7d26b4540beb..5d89ab1d832ab 100644 +--- a/tc/m_pedit.c ++++ b/tc/m_pedit.c +@@ -45,7 +45,8 @@ static void explain(void) + "\t\tCMD:= clear | invert | set | add | retain\n" + "\t:= ip | ip6 \n" + " \t\t| udp | tcp | icmp \n" +- "\tCONTROL:= reclassify | pipe | drop | continue | pass\n" ++ "\tCONTROL:= reclassify | pipe | drop | continue | pass |\n" ++ "\t goto chain \n" + "\tNOTE: if 'ex' is set, extended functionality will be supported (kernel >= 4.11)\n" + "For Example usage look at the examples directory\n"); + +diff --git a/tc/m_police.c b/tc/m_police.c +index 2b73969de5daf..86117db0482ec 100644 +--- a/tc/m_police.c ++++ b/tc/m_police.c +@@ -41,7 +41,8 @@ static void usage(void) + fprintf(stderr, "Where: CONTROL := conform-exceed [/NOTEXCEEDACT]\n"); + fprintf(stderr, " Define how to handle packets which exceed ()\n"); + fprintf(stderr, " or conform () the configured bandwidth limit.\n"); +- fprintf(stderr, " EXCEEDACT/NOTEXCEEDACT := { pipe | ok | reclassify | drop | continue }\n"); ++ fprintf(stderr, " EXCEEDACT/NOTEXCEEDACT := { pipe | ok | reclassify | drop | continue |\n"); ++ fprintf(stderr, " goto chain }\n"); + exit(-1); + } + +@@ -150,7 +151,8 @@ int act_parse_police(struct action_util *a, int *argc_p, char ***argv_p, + matches(*argv, "shot") == 0 || + matches(*argv, "continue") == 0 || + matches(*argv, "pass") == 0 || +- matches(*argv, "pipe") == 0) { ++ matches(*argv, "pipe") == 0 || ++ matches(*argv, "goto") == 0) { + if (parse_action_control(&argc, &argv, &p.action, false)) + return -1; + } else if (strcmp(*argv, "conform-exceed") == 0) { +diff --git a/tc/m_skbmod.c b/tc/m_skbmod.c +index 1ccd474309348..ba79308ba8354 100644 +--- a/tc/m_skbmod.c ++++ b/tc/m_skbmod.c +@@ -36,7 +36,8 @@ static void skbmod_explain(void) + "\tDMAC := 6 byte Destination MAC address\n" + "\tSMAC := optional 6 byte Source MAC address\n" + "\tETYPE := optional 16 bit ethertype\n" +- "\tCONTROL := reclassify|pipe|drop|continue|ok\n" ++ "\tCONTROL := reclassify | pipe | drop | continue | ok |\n" ++ "\t goto chain \n" + "\tINDEX := skbmod index value to use\n"); + } + +diff --git a/tc/m_vlan.c b/tc/m_vlan.c +index 2441b06847ecd..cccb4996b05f3 100644 +--- a/tc/m_vlan.c ++++ b/tc/m_vlan.c +@@ -32,7 +32,8 @@ static void explain(void) + fprintf(stderr, " vlan modify [ protocol VLANPROTO ] id VLANID [ priority VLANPRIO ] [CONTROL]\n"); + fprintf(stderr, " VLANPROTO is one of 802.1Q or 802.1AD\n"); + fprintf(stderr, " with default: 802.1Q\n"); +- fprintf(stderr, " CONTROL := reclassify | pipe | drop | continue | pass\n"); ++ fprintf(stderr, " CONTROL := reclassify | pipe | drop | continue | pass |\n"); ++ fprintf(stderr, " goto chain \n"); + } + + static void usage(void) +diff --git a/tc/tc_util.c b/tc/tc_util.c +index 840222832690b..194185a0fa1d8 100644 +--- a/tc/tc_util.c ++++ b/tc/tc_util.c +@@ -415,6 +415,8 @@ static const char *action_n2a(int action) + { + static char buf[64]; + ++ if (TC_ACT_EXT_CMP(action, TC_ACT_GOTO_CHAIN)) ++ return "goto"; + switch (action) { + case TC_ACT_UNSPEC: + return "continue"; +@@ -458,6 +460,7 @@ static int action_a2n(char *arg, int *result, bool allow_num) + {"ok", TC_ACT_OK}, + {"reclassify", TC_ACT_RECLASSIFY}, + {"pipe", TC_ACT_PIPE}, ++ {"goto", TC_ACT_GOTO_CHAIN}, + { NULL }, + }, *iter; + +@@ -497,6 +500,22 @@ int parse_action_control(int *argc_p, char ***argv_p, + fprintf(stderr, "Bad action type %s\n", *argv); + return -1; + } ++ if (result == TC_ACT_GOTO_CHAIN) { ++ __u32 chain_index; ++ ++ NEXT_ARG(); ++ if (matches(*argv, "chain") != 0) { ++ fprintf(stderr, "\"chain index\" expected\n"); ++ return -1; ++ } ++ NEXT_ARG(); ++ if (get_u32(&chain_index, *argv, 10) || ++ chain_index > TC_ACT_EXT_VAL_MASK) { ++ fprintf(stderr, "Illegal \"chain index\"\n"); ++ return -1; ++ } ++ result |= chain_index; ++ } + NEXT_ARG_FWD(); + *argc_p = argc; + *argv_p = argv; +@@ -604,7 +623,10 @@ int parse_action_control_slash(int *argc_p, char ***argv_p, + void print_action_control(FILE *f, const char *prefix, + int action, const char *suffix) + { +- fprintf(f, "%s%s%s", prefix, action_n2a(action), suffix); ++ fprintf(f, "%s%s", prefix, action_n2a(action)); ++ if (TC_ACT_EXT_CMP(action, TC_ACT_GOTO_CHAIN)) ++ fprintf(f, " chain %u", action & TC_ACT_EXT_VAL_MASK); ++ fprintf(f, "%s", suffix); + } + + int get_linklayer(unsigned int *val, const char *arg) +-- +2.20.1 + diff --git a/SOURCES/0134-tc-gact-fix-control-action-parsing.patch b/SOURCES/0134-tc-gact-fix-control-action-parsing.patch new file mode 100644 index 0000000..46e953c --- /dev/null +++ b/SOURCES/0134-tc-gact-fix-control-action-parsing.patch @@ -0,0 +1,41 @@ +From 46ce82dd840a158c8fe80842ac808b1df425e216 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 5 Jun 2019 13:10:31 +0200 +Subject: [PATCH] tc: gact: fix control action parsing + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1714660 +Upstream Status: iproute2.git commit 18f05d06016d9 +Conflicts: context change due to out-of-order cherry-pick of + commit 73aa988868e7e ("tc/m_gact: Drop dead code") + +commit 18f05d06016d9492c87fd105d831de0d6d858f43 +Author: Jiri Pirko +Date: Mon Jun 5 16:22:03 2017 +0200 + + tc: gact: fix control action parsing + + parse_action_control helper does advancing of the arg inside. So don't + do it outside. + + Fixes: e67aba559581 ("tc: actions: add helpers to parse and print control actions") + Signed-off-by: Jiri Pirko +--- + tc/m_gact.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/tc/m_gact.c b/tc/m_gact.c +index c04c00bbded3c..73346d4e9333b 100644 +--- a/tc/m_gact.c ++++ b/tc/m_gact.c +@@ -113,8 +113,6 @@ parse_gact(struct action_util *a, int *argc_p, char ***argv_p, + if (parse_action_control(&argc, &argv, + &pp.paction, false) == -1) + usage(); +- argc--; +- argv++; + if (get_u16(&pp.pval, *argv, 10)) { + fprintf(stderr, "Illegal probability val 0x%x\n", pp.pval); + return -1; +-- +2.20.1 + diff --git a/SOURCES/0135-tc-don-t-print-error-message-on-miss-when-parsing-ac.patch b/SOURCES/0135-tc-don-t-print-error-message-on-miss-when-parsing-ac.patch new file mode 100644 index 0000000..3d32538 --- /dev/null +++ b/SOURCES/0135-tc-don-t-print-error-message-on-miss-when-parsing-ac.patch @@ -0,0 +1,97 @@ +From 8efbb8de949eedd4341d075175f932245a9f142c Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 5 Jun 2019 13:11:03 +0200 +Subject: [PATCH] tc: don't print error message on miss when parsing action + with default + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1714660 +Upstream Status: iproute2.git commit c794b7b179026 + +commit c794b7b17902627b19ddc00699d89ea7b6b1edf7 +Author: Jiri Pirko +Date: Thu Jun 15 14:10:51 2017 +0200 + + tc: don't print error message on miss when parsing action with default + + In case default control action parsing takes place, it is ok to miss. + So don't print error message. + + Fixes: e67aba559581 ("tc: actions: add helpers to parse and print control actions") + Reported-by: Jiri Benc + Signed-off-by: Jiri Pirko + Tested-by: Jiri Benc +--- + tc/tc_util.c | 36 ++++++++++++++++++++++-------------- + 1 file changed, 22 insertions(+), 14 deletions(-) + +diff --git a/tc/tc_util.c b/tc/tc_util.c +index 194185a0fa1d8..cdc23477ada53 100644 +--- a/tc/tc_util.c ++++ b/tc/tc_util.c +@@ -477,18 +477,8 @@ static int action_a2n(char *arg, int *result, bool allow_num) + return 0; + } + +-/* Parse action control including possible options. +- * +- * Parameters: +- * @argc_p - pointer to argc to parse +- * @argv_p - pointer to argv to parse +- * @result_p - pointer to output variable +- * @allow_num - whether action may be in numeric format already +- * +- * In error case, returns -1 and does not touch @result_1p. Otherwise returns 0. +- */ +-int parse_action_control(int *argc_p, char ***argv_p, +- int *result_p, bool allow_num) ++static int __parse_action_control(int *argc_p, char ***argv_p, int *result_p, ++ bool allow_num, bool ignore_a2n_miss) + { + int argc = *argc_p; + char **argv = *argv_p; +@@ -497,7 +487,8 @@ int parse_action_control(int *argc_p, char ***argv_p, + if (!argc) + return -1; + if (action_a2n(*argv, &result, allow_num) == -1) { +- fprintf(stderr, "Bad action type %s\n", *argv); ++ if (!ignore_a2n_miss) ++ fprintf(stderr, "Bad action type %s\n", *argv); + return -1; + } + if (result == TC_ACT_GOTO_CHAIN) { +@@ -523,6 +514,23 @@ int parse_action_control(int *argc_p, char ***argv_p, + return 0; + } + ++/* Parse action control including possible options. ++ * ++ * Parameters: ++ * @argc_p - pointer to argc to parse ++ * @argv_p - pointer to argv to parse ++ * @result_p - pointer to output variable ++ * @allow_num - whether action may be in numeric format already ++ * ++ * In error case, returns -1 and does not touch @result_1p. Otherwise returns 0. ++ */ ++int parse_action_control(int *argc_p, char ***argv_p, ++ int *result_p, bool allow_num) ++{ ++ return __parse_action_control(argc_p, argv_p, result_p, ++ allow_num, false); ++} ++ + /* Parse action control including possible options. + * + * Parameters: +@@ -538,7 +546,7 @@ void parse_action_control_dflt(int *argc_p, char ***argv_p, + int *result_p, bool allow_num, + int default_result) + { +- if (parse_action_control(argc_p, argv_p, result_p, allow_num)) ++ if (__parse_action_control(argc_p, argv_p, result_p, allow_num, true)) + *result_p = default_result; + } + +-- +2.20.1 + diff --git a/SOURCES/0136-tc-util-Don-t-call-NEXT_ARG_FWD-in-__parse_action_co.patch b/SOURCES/0136-tc-util-Don-t-call-NEXT_ARG_FWD-in-__parse_action_co.patch new file mode 100644 index 0000000..1b806df --- /dev/null +++ b/SOURCES/0136-tc-util-Don-t-call-NEXT_ARG_FWD-in-__parse_action_co.patch @@ -0,0 +1,235 @@ +From 1a12c7c90330410171007ada7513247fda5a1c57 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 5 Jun 2019 13:11:03 +0200 +Subject: [PATCH] tc: util: Don't call NEXT_ARG_FWD() in + __parse_action_control() + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1714660 +Upstream Status: iproute2.git commit 3572e01a090a2 +Conflicts: context change mainly due to missing commit 35f2a7639dca4 + ("tc/actions: introduce support for jump action") + +commit 3572e01a090a298e2f4c4f796bad6639b652e031 +Author: Michal Privoznik +Date: Fri Dec 8 11:18:07 2017 +0100 + + tc: util: Don't call NEXT_ARG_FWD() in __parse_action_control() + + Not all callers want parse_action_control*() to advance the + arguments. For instance act_parse_police() does the argument + advancing itself. + + Fixes: e67aba559581 ("tc: actions: add helpers to parse and print control actions") + Signed-off-by: Michal Privoznik +--- + tc/m_bpf.c | 1 + + tc/m_connmark.c | 1 + + tc/m_csum.c | 1 + + tc/m_gact.c | 12 ++++++------ + tc/m_ife.c | 1 + + tc/m_mirred.c | 4 +++- + tc/m_nat.c | 1 + + tc/m_pedit.c | 1 + + tc/m_sample.c | 1 + + tc/m_skbedit.c | 1 + + tc/m_skbmod.c | 1 + + tc/m_tunnel_key.c | 1 + + tc/m_vlan.c | 1 + + tc/tc_util.c | 1 - + 14 files changed, 20 insertions(+), 8 deletions(-) + +diff --git a/tc/m_bpf.c b/tc/m_bpf.c +index 57283030a35f5..c2bad5640707c 100644 +--- a/tc/m_bpf.c ++++ b/tc/m_bpf.c +@@ -125,6 +125,7 @@ opt_bpf: + + parse_action_control_dflt(&argc, &argv, &parm.action, + false, TC_ACT_PIPE); ++ NEXT_ARG_FWD(); + + if (argc) { + if (matches(*argv, "index") == 0) { +diff --git a/tc/m_connmark.c b/tc/m_connmark.c +index 37d7185415490..47c7a8c2b17e7 100644 +--- a/tc/m_connmark.c ++++ b/tc/m_connmark.c +@@ -82,6 +82,7 @@ parse_connmark(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, + } + + parse_action_control_dflt(&argc, &argv, &sel.action, false, TC_ACT_PIPE); ++ NEXT_ARG_FWD(); + + if (argc) { + if (matches(*argv, "index") == 0) { +diff --git a/tc/m_csum.c b/tc/m_csum.c +index 7b156734f64c5..e1352c0820f69 100644 +--- a/tc/m_csum.c ++++ b/tc/m_csum.c +@@ -124,6 +124,7 @@ parse_csum(struct action_util *a, int *argc_p, + } + + parse_action_control_dflt(&argc, &argv, &sel.action, false, TC_ACT_OK); ++ NEXT_ARG_FWD(); + + if (argc) { + if (matches(*argv, "index") == 0) { +diff --git a/tc/m_gact.c b/tc/m_gact.c +index 73346d4e9333b..dd9542a5cc644 100644 +--- a/tc/m_gact.c ++++ b/tc/m_gact.c +@@ -86,14 +86,13 @@ parse_gact(struct action_util *a, int *argc_p, char ***argv_p, + if (argc < 0) + return -1; + +- +- if (matches(*argv, "gact") == 0) { +- argc--; +- argv++; +- } else if (parse_action_control(&argc, &argv, &p.action, false) == -1) { +- usage(); ++ if (matches(*argv, "gact") != 0 && ++ parse_action_control(&argc, &argv, &p.action, false) == -1) { ++ usage(); /* does not return */ + } + ++ NEXT_ARG_FWD(); ++ + #ifdef CONFIG_GACT_PROB + if (argc > 0) { + if (matches(*argv, "random") == 0) { +@@ -113,6 +112,7 @@ parse_gact(struct action_util *a, int *argc_p, char ***argv_p, + if (parse_action_control(&argc, &argv, + &pp.paction, false) == -1) + usage(); ++ NEXT_ARG_FWD(); + if (get_u16(&pp.pval, *argv, 10)) { + fprintf(stderr, "Illegal probability val 0x%x\n", pp.pval); + return -1; +diff --git a/tc/m_ife.c b/tc/m_ife.c +index e3521e62c178c..54fad8f70e73a 100644 +--- a/tc/m_ife.c ++++ b/tc/m_ife.c +@@ -158,6 +158,7 @@ static int parse_ife(struct action_util *a, int *argc_p, char ***argv_p, + + parse_action_control_dflt(&argc, &argv, &p.action, false, TC_ACT_PIPE); + ++ NEXT_ARG_FWD(); + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); +diff --git a/tc/m_mirred.c b/tc/m_mirred.c +index 2384bda1ff045..b09b016c2ca39 100644 +--- a/tc/m_mirred.c ++++ b/tc/m_mirred.c +@@ -170,8 +170,10 @@ parse_direction(struct action_util *a, int *argc_p, char ***argv_p, + } + + +- if (p.eaction == TCA_EGRESS_MIRROR || p.eaction == TCA_INGRESS_MIRROR) ++ if (p.eaction == TCA_EGRESS_MIRROR || p.eaction == TCA_INGRESS_MIRROR) { + parse_action_control(&argc, &argv, &p.action, false); ++ NEXT_ARG_FWD(); ++ } + + if (argc) { + if (iok && matches(*argv, "index") == 0) { +diff --git a/tc/m_nat.c b/tc/m_nat.c +index 31b68fb6bd784..bb455f080b3a4 100644 +--- a/tc/m_nat.c ++++ b/tc/m_nat.c +@@ -117,6 +117,7 @@ parse_nat(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, struct + + parse_action_control_dflt(&argc, &argv, &sel.action, false, TC_ACT_OK); + ++ NEXT_ARG_FWD(); + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); +diff --git a/tc/m_pedit.c b/tc/m_pedit.c +index 5d89ab1d832ab..3391be95da38c 100644 +--- a/tc/m_pedit.c ++++ b/tc/m_pedit.c +@@ -673,6 +673,7 @@ int parse_pedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, + + parse_action_control_dflt(&argc, &argv, &sel.sel.action, false, TC_ACT_OK); + ++ NEXT_ARG_FWD(); + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); +diff --git a/tc/m_sample.c b/tc/m_sample.c +index ff5ee6bd1ef63..31774c0e806b4 100644 +--- a/tc/m_sample.c ++++ b/tc/m_sample.c +@@ -100,6 +100,7 @@ static int parse_sample(struct action_util *a, int *argc_p, char ***argv_p, + + parse_action_control_dflt(&argc, &argv, &p.action, false, TC_ACT_PIPE); + ++ NEXT_ARG_FWD(); + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); +diff --git a/tc/m_skbedit.c b/tc/m_skbedit.c +index aa374fcb33ed9..c41a7bb082dad 100644 +--- a/tc/m_skbedit.c ++++ b/tc/m_skbedit.c +@@ -123,6 +123,7 @@ parse_skbedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, + parse_action_control_dflt(&argc, &argv, &sel.action, + false, TC_ACT_PIPE); + ++ NEXT_ARG_FWD(); + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); +diff --git a/tc/m_skbmod.c b/tc/m_skbmod.c +index ba79308ba8354..00318d42642a5 100644 +--- a/tc/m_skbmod.c ++++ b/tc/m_skbmod.c +@@ -125,6 +125,7 @@ static int parse_skbmod(struct action_util *a, int *argc_p, char ***argv_p, + + parse_action_control_dflt(&argc, &argv, &p.action, false, TC_ACT_PIPE); + ++ NEXT_ARG_FWD(); + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); +diff --git a/tc/m_tunnel_key.c b/tc/m_tunnel_key.c +index cdde64a15b929..0ff3f1a2b9876 100644 +--- a/tc/m_tunnel_key.c ++++ b/tc/m_tunnel_key.c +@@ -197,6 +197,7 @@ static int parse_tunnel_key(struct action_util *a, int *argc_p, char ***argv_p, + parse_action_control_dflt(&argc, &argv, &parm.action, + false, TC_ACT_PIPE); + ++ NEXT_ARG_FWD(); + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); +diff --git a/tc/m_vlan.c b/tc/m_vlan.c +index cccb4996b05f3..0b2966ce82e53 100644 +--- a/tc/m_vlan.c ++++ b/tc/m_vlan.c +@@ -137,6 +137,7 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p, + parse_action_control_dflt(&argc, &argv, &parm.action, + false, TC_ACT_PIPE); + ++ NEXT_ARG_FWD(); + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); +diff --git a/tc/tc_util.c b/tc/tc_util.c +index cdc23477ada53..4584d4a448fb4 100644 +--- a/tc/tc_util.c ++++ b/tc/tc_util.c +@@ -507,7 +507,6 @@ static int __parse_action_control(int *argc_p, char ***argv_p, int *result_p, + } + result |= chain_index; + } +- NEXT_ARG_FWD(); + *argc_p = argc; + *argv_p = argv; + *result_p = result; +-- +2.20.1 + diff --git a/SOURCES/0137-tc-fix-parsing-of-the-control-action.patch b/SOURCES/0137-tc-fix-parsing-of-the-control-action.patch new file mode 100644 index 0000000..78b9286 --- /dev/null +++ b/SOURCES/0137-tc-fix-parsing-of-the-control-action.patch @@ -0,0 +1,321 @@ +From cb73324026eb3f9c315735b9020890f43eeaac43 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 5 Jun 2019 13:12:06 +0200 +Subject: [PATCH] tc: fix parsing of the control action + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1714660 +Upstream Status: iproute2.git commit 75ef7b18d2a13 +Conflicts: context change due to missing commit 35f2a7639dca4 + ("tc/actions: introduce support for jump action") + +commit 75ef7b18d2a13657056706895bf8d8dd3ac93e46 +Author: Davide Caratti +Date: Fri Mar 2 19:36:16 2018 +0100 + + tc: fix parsing of the control action + + If the user didn't specify any control action, don't pop the command line + arguments: otherwise, parsing of the next argument (tipically the 'index' + keyword) results in an error, causing the following 'tc-testing' failures: + + Test a6d6: Add skbedit action with index + Test 38f3: Delete skbedit action + Test a568: Add action with ife type + Test b983: Add action without ife type + Test 7d50: Add skbmod action to set destination mac + Test 9b29: Add skbmod action to set source mac + Test e93a: Delete an skbmod action + + Also, add missing parse for 'ok' control action to m_police, to fix the + following 'tc-testing' failure: + + Test 8dd5: Add police action with control ok + + tested with: + # ./tdc.py + + test results: + all tests ok using kernel 4.16-rc2, except 9aa8 "Get a single skbmod + action from a list" (which is failing also before this commit) + + Fixes: 3572e01a090a ("tc: util: Don't call NEXT_ARG_FWD() in __parse_action_control()") + Cc: Michal Privoznik + Cc: Wolfgang Bumiller + Signed-off-by: Davide Caratti + Signed-off-by: Stephen Hemminger +--- + tc/m_bpf.c | 1 - + tc/m_connmark.c | 1 - + tc/m_csum.c | 1 - + tc/m_gact.c | 9 +++------ + tc/m_ife.c | 1 - + tc/m_mirred.c | 5 ++--- + tc/m_nat.c | 1 - + tc/m_pedit.c | 1 - + tc/m_police.c | 16 ++++++++++------ + tc/m_sample.c | 1 - + tc/m_skbedit.c | 1 - + tc/m_skbmod.c | 1 - + tc/m_tunnel_key.c | 1 - + tc/m_vlan.c | 1 - + tc/tc_util.c | 6 +++++- + 15 files changed, 20 insertions(+), 27 deletions(-) + +diff --git a/tc/m_bpf.c b/tc/m_bpf.c +index c2bad5640707c..57283030a35f5 100644 +--- a/tc/m_bpf.c ++++ b/tc/m_bpf.c +@@ -125,7 +125,6 @@ opt_bpf: + + parse_action_control_dflt(&argc, &argv, &parm.action, + false, TC_ACT_PIPE); +- NEXT_ARG_FWD(); + + if (argc) { + if (matches(*argv, "index") == 0) { +diff --git a/tc/m_connmark.c b/tc/m_connmark.c +index 47c7a8c2b17e7..37d7185415490 100644 +--- a/tc/m_connmark.c ++++ b/tc/m_connmark.c +@@ -82,7 +82,6 @@ parse_connmark(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, + } + + parse_action_control_dflt(&argc, &argv, &sel.action, false, TC_ACT_PIPE); +- NEXT_ARG_FWD(); + + if (argc) { + if (matches(*argv, "index") == 0) { +diff --git a/tc/m_csum.c b/tc/m_csum.c +index e1352c0820f69..7b156734f64c5 100644 +--- a/tc/m_csum.c ++++ b/tc/m_csum.c +@@ -124,7 +124,6 @@ parse_csum(struct action_util *a, int *argc_p, + } + + parse_action_control_dflt(&argc, &argv, &sel.action, false, TC_ACT_OK); +- NEXT_ARG_FWD(); + + if (argc) { + if (matches(*argv, "index") == 0) { +diff --git a/tc/m_gact.c b/tc/m_gact.c +index dd9542a5cc644..45eecf7ea1647 100644 +--- a/tc/m_gact.c ++++ b/tc/m_gact.c +@@ -86,12 +86,10 @@ parse_gact(struct action_util *a, int *argc_p, char ***argv_p, + if (argc < 0) + return -1; + +- if (matches(*argv, "gact") != 0 && +- parse_action_control(&argc, &argv, &p.action, false) == -1) { ++ if (!matches(*argv, "gact")) ++ NEXT_ARG_FWD(); ++ if (parse_action_control(&argc, &argv, &p.action, false)) + usage(); /* does not return */ +- } +- +- NEXT_ARG_FWD(); + + #ifdef CONFIG_GACT_PROB + if (argc > 0) { +@@ -112,7 +110,6 @@ parse_gact(struct action_util *a, int *argc_p, char ***argv_p, + if (parse_action_control(&argc, &argv, + &pp.paction, false) == -1) + usage(); +- NEXT_ARG_FWD(); + if (get_u16(&pp.pval, *argv, 10)) { + fprintf(stderr, "Illegal probability val 0x%x\n", pp.pval); + return -1; +diff --git a/tc/m_ife.c b/tc/m_ife.c +index 54fad8f70e73a..e3521e62c178c 100644 +--- a/tc/m_ife.c ++++ b/tc/m_ife.c +@@ -158,7 +158,6 @@ static int parse_ife(struct action_util *a, int *argc_p, char ***argv_p, + + parse_action_control_dflt(&argc, &argv, &p.action, false, TC_ACT_PIPE); + +- NEXT_ARG_FWD(); + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); +diff --git a/tc/m_mirred.c b/tc/m_mirred.c +index b09b016c2ca39..b1f45f1e6ecb0 100644 +--- a/tc/m_mirred.c ++++ b/tc/m_mirred.c +@@ -76,6 +76,7 @@ parse_direction(struct action_util *a, int *argc_p, char ***argv_p, + while (argc > 0) { + + if (matches(*argv, "action") == 0) { ++ NEXT_ARG(); + break; + } else if (!egress && matches(*argv, "egress") == 0) { + egress = 1; +@@ -170,10 +171,8 @@ parse_direction(struct action_util *a, int *argc_p, char ***argv_p, + } + + +- if (p.eaction == TCA_EGRESS_MIRROR || p.eaction == TCA_INGRESS_MIRROR) { ++ if (p.eaction == TCA_EGRESS_MIRROR || p.eaction == TCA_INGRESS_MIRROR) + parse_action_control(&argc, &argv, &p.action, false); +- NEXT_ARG_FWD(); +- } + + if (argc) { + if (iok && matches(*argv, "index") == 0) { +diff --git a/tc/m_nat.c b/tc/m_nat.c +index bb455f080b3a4..31b68fb6bd784 100644 +--- a/tc/m_nat.c ++++ b/tc/m_nat.c +@@ -117,7 +117,6 @@ parse_nat(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, struct + + parse_action_control_dflt(&argc, &argv, &sel.action, false, TC_ACT_OK); + +- NEXT_ARG_FWD(); + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); +diff --git a/tc/m_pedit.c b/tc/m_pedit.c +index 3391be95da38c..5d89ab1d832ab 100644 +--- a/tc/m_pedit.c ++++ b/tc/m_pedit.c +@@ -673,7 +673,6 @@ int parse_pedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, + + parse_action_control_dflt(&argc, &argv, &sel.sel.action, false, TC_ACT_OK); + +- NEXT_ARG_FWD(); + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); +diff --git a/tc/m_police.c b/tc/m_police.c +index 86117db0482ec..b79545961f4d7 100644 +--- a/tc/m_police.c ++++ b/tc/m_police.c +@@ -151,15 +151,18 @@ int act_parse_police(struct action_util *a, int *argc_p, char ***argv_p, + matches(*argv, "shot") == 0 || + matches(*argv, "continue") == 0 || + matches(*argv, "pass") == 0 || ++ matches(*argv, "ok") == 0 || + matches(*argv, "pipe") == 0 || + matches(*argv, "goto") == 0) { +- if (parse_action_control(&argc, &argv, &p.action, false)) +- return -1; ++ if (!parse_action_control(&argc, &argv, &p.action, false)) ++ goto action_ctrl_ok; ++ return -1; + } else if (strcmp(*argv, "conform-exceed") == 0) { + NEXT_ARG(); +- if (parse_action_control_slash(&argc, &argv, &p.action, +- &presult, true)) +- return -1; ++ if (!parse_action_control_slash(&argc, &argv, &p.action, ++ &presult, true)) ++ goto action_ctrl_ok; ++ return -1; + } else if (matches(*argv, "overhead") == 0) { + NEXT_ARG(); + if (get_u16(&overhead, *argv, 10)) { +@@ -175,8 +178,9 @@ int act_parse_police(struct action_util *a, int *argc_p, char ***argv_p, + } else { + break; + } ++ NEXT_ARG_FWD(); ++action_ctrl_ok: + ok++; +- argc--; argv++; + } + + if (!ok) +diff --git a/tc/m_sample.c b/tc/m_sample.c +index 31774c0e806b4..ff5ee6bd1ef63 100644 +--- a/tc/m_sample.c ++++ b/tc/m_sample.c +@@ -100,7 +100,6 @@ static int parse_sample(struct action_util *a, int *argc_p, char ***argv_p, + + parse_action_control_dflt(&argc, &argv, &p.action, false, TC_ACT_PIPE); + +- NEXT_ARG_FWD(); + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); +diff --git a/tc/m_skbedit.c b/tc/m_skbedit.c +index c41a7bb082dad..aa374fcb33ed9 100644 +--- a/tc/m_skbedit.c ++++ b/tc/m_skbedit.c +@@ -123,7 +123,6 @@ parse_skbedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, + parse_action_control_dflt(&argc, &argv, &sel.action, + false, TC_ACT_PIPE); + +- NEXT_ARG_FWD(); + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); +diff --git a/tc/m_skbmod.c b/tc/m_skbmod.c +index 00318d42642a5..ba79308ba8354 100644 +--- a/tc/m_skbmod.c ++++ b/tc/m_skbmod.c +@@ -125,7 +125,6 @@ static int parse_skbmod(struct action_util *a, int *argc_p, char ***argv_p, + + parse_action_control_dflt(&argc, &argv, &p.action, false, TC_ACT_PIPE); + +- NEXT_ARG_FWD(); + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); +diff --git a/tc/m_tunnel_key.c b/tc/m_tunnel_key.c +index 0ff3f1a2b9876..cdde64a15b929 100644 +--- a/tc/m_tunnel_key.c ++++ b/tc/m_tunnel_key.c +@@ -197,7 +197,6 @@ static int parse_tunnel_key(struct action_util *a, int *argc_p, char ***argv_p, + parse_action_control_dflt(&argc, &argv, &parm.action, + false, TC_ACT_PIPE); + +- NEXT_ARG_FWD(); + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); +diff --git a/tc/m_vlan.c b/tc/m_vlan.c +index 0b2966ce82e53..cccb4996b05f3 100644 +--- a/tc/m_vlan.c ++++ b/tc/m_vlan.c +@@ -137,7 +137,6 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p, + parse_action_control_dflt(&argc, &argv, &parm.action, + false, TC_ACT_PIPE); + +- NEXT_ARG_FWD(); + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); +diff --git a/tc/tc_util.c b/tc/tc_util.c +index 4584d4a448fb4..65695ea592ed8 100644 +--- a/tc/tc_util.c ++++ b/tc/tc_util.c +@@ -507,6 +507,7 @@ static int __parse_action_control(int *argc_p, char ***argv_p, int *result_p, + } + result |= chain_index; + } ++ NEXT_ARG_FWD(); + *argc_p = argc; + *argv_p = argv; + *result_p = result; +@@ -603,8 +604,8 @@ out: + int parse_action_control_slash(int *argc_p, char ***argv_p, + int *result1_p, int *result2_p, bool allow_num) + { ++ int result1, result2, argc = *argc_p; + char **argv = *argv_p; +- int result1, result2; + char *p = strchr(*argv, '/'); + + if (!p) +@@ -624,6 +625,9 @@ int parse_action_control_slash(int *argc_p, char ***argv_p, + + *result1_p = result1; + *result2_p = result2; ++ NEXT_ARG_FWD(); ++ *argc_p = argc; ++ *argv_p = argv; + return 0; + } + +-- +2.20.1 + diff --git a/SOURCES/0138-m_mirred-don-t-bail-if-the-control-action-is-missing.patch b/SOURCES/0138-m_mirred-don-t-bail-if-the-control-action-is-missing.patch new file mode 100644 index 0000000..f87d77e --- /dev/null +++ b/SOURCES/0138-m_mirred-don-t-bail-if-the-control-action-is-missing.patch @@ -0,0 +1,46 @@ +From e4526cbbfb6151e87e493a7fecfe2384a3751100 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 5 Jun 2019 13:12:49 +0200 +Subject: [PATCH] m_mirred: don't bail if the control action is missing + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1714660 +Upstream Status: iproute2.git commit 6eccf7ecdb010 + +commit 6eccf7ecdb010a90e5271942748ef4338ddb61ae +Author: Paolo Abeni +Date: Mon May 20 11:56:52 2019 +0200 + + m_mirred: don't bail if the control action is missing + + The mirred act admits an optional control action, defaulting + to TC_ACT_PIPE. The parsing code currently emits an error message + if the control action is not provided on the command line, even + if the command itself completes with no error. + + This change shuts down the error message, using the appropriate + parsing helper. + + Fixes: e67aba559581 ("tc: actions: add helpers to parse and print control actions") + Signed-off-by: Paolo Abeni + Signed-off-by: Stephen Hemminger +--- + tc/m_mirred.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tc/m_mirred.c b/tc/m_mirred.c +index b1f45f1e6ecb0..90d0b633c1318 100644 +--- a/tc/m_mirred.c ++++ b/tc/m_mirred.c +@@ -172,7 +172,8 @@ parse_direction(struct action_util *a, int *argc_p, char ***argv_p, + + + if (p.eaction == TCA_EGRESS_MIRROR || p.eaction == TCA_INGRESS_MIRROR) +- parse_action_control(&argc, &argv, &p.action, false); ++ parse_action_control_dflt(&argc, &argv, &p.action, false, ++ TC_ACT_PIPE); + + if (argc) { + if (iok && matches(*argv, "index") == 0) { +-- +2.20.1 + diff --git a/SOURCES/0139-tc-m_tunnel_key-add-csum-nocsum-option.patch b/SOURCES/0139-tc-m_tunnel_key-add-csum-nocsum-option.patch new file mode 100644 index 0000000..3aa571a --- /dev/null +++ b/SOURCES/0139-tc-m_tunnel_key-add-csum-nocsum-option.patch @@ -0,0 +1,131 @@ +From 0152070641c58eccf6c6d9981a33f17ada23996f Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 5 Jun 2019 13:12:49 +0200 +Subject: [PATCH] tc: m_tunnel_key: add csum/nocsum option + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1714660 +Upstream Status: iproute2.git commit 59eb271d1d259 +Conflicts: context change due to out-of-order cherry-pick of + commit 9f89b0cc0eda2 ("tc/act_tunnel_key: Enable + setup of tos and ttl") + +commit 59eb271d1d259da21372d222a2d995e57ef648a9 +Author: Jiri Benc +Date: Wed Jun 14 21:30:18 2017 +0200 + + tc: m_tunnel_key: add csum/nocsum option + + Allows control of UDP zero checksum. + + Signed-off-by: Jiri Benc +--- + man/man8/tc-tunnel_key.8 | 18 ++++++++++++++++++ + tc/m_tunnel_key.c | 21 ++++++++++++++++++++- + 2 files changed, 38 insertions(+), 1 deletion(-) + +diff --git a/man/man8/tc-tunnel_key.8 b/man/man8/tc-tunnel_key.8 +index 5e93c59d49465..0cd792a66d185 100644 +--- a/man/man8/tc-tunnel_key.8 ++++ b/man/man8/tc-tunnel_key.8 +@@ -18,6 +18,7 @@ tunnel_key - Tunnel metadata manipulation + .BI dst_port " UDP_PORT" + .BI tos " TOS" + .BI ttl " TTL" ++.RB "[ " csum " | " nocsum " ]" + + .SH DESCRIPTION + The +@@ -85,6 +86,23 @@ Outer header TOS + .TP + .B ttl + Outer header TTL ++.TP ++.RB [ no ] csum ++Controlls outer UDP checksum. When set to ++.B csum ++(which is default), the outer UDP checksum is calculated and included in the ++packets. When set to ++.BR nocsum , ++outer UDP checksum is zero. Note that when using zero UDP checksums with ++IPv6, the other tunnel endpoint must be configured to accept such packets. ++In Linux, this would be the ++.B udp6zerocsumrx ++option for the VXLAN tunnel interface. ++.IP ++If using ++.B nocsum ++with IPv6, be sure you know what you are doing. Zero UDP checksums provide ++weaker protection against corrupted packets. See RFC6935 for details. + .RE + .SH EXAMPLES + The following example encapsulates incoming ICMP packets on eth0 into a vxlan +diff --git a/tc/m_tunnel_key.c b/tc/m_tunnel_key.c +index cdde64a15b929..992adc51c28ab 100644 +--- a/tc/m_tunnel_key.c ++++ b/tc/m_tunnel_key.c +@@ -28,7 +28,8 @@ static void explain(void) + "id \n" + "src_ip (mandatory)\n" + "dst_ip (mandatory)\n" +- "dst_port \n"); ++ "dst_port \n" ++ "csum | nocsum (default is \"csum\")\n"); + } + + static void usage(void) +@@ -107,6 +108,7 @@ static int parse_tunnel_key(struct action_util *a, int *argc_p, char ***argv_p, + int ret; + int has_src_ip = 0; + int has_dst_ip = 0; ++ int csum = 1; + + if (matches(*argv, "tunnel_key") != 0) + return -1; +@@ -186,6 +188,10 @@ static int parse_tunnel_key(struct action_util *a, int *argc_p, char ***argv_p, + fprintf(stderr, "Illegal \"ttl\"\n"); + return -1; + } ++ } else if (matches(*argv, "csum") == 0) { ++ csum = 1; ++ } else if (matches(*argv, "nocsum") == 0) { ++ csum = 0; + } else if (matches(*argv, "help") == 0) { + usage(); + } else { +@@ -194,6 +200,8 @@ static int parse_tunnel_key(struct action_util *a, int *argc_p, char ***argv_p, + NEXT_ARG_FWD(); + } + ++ addattr8(n, MAX_MSG, TCA_TUNNEL_KEY_NO_CSUM, !csum); ++ + parse_action_control_dflt(&argc, &argv, &parm.action, + false, TC_ACT_PIPE); + +@@ -276,6 +284,15 @@ static void tunnel_key_print_tos_ttl(FILE *f, char *name, + } + } + ++static void tunnel_key_print_flag(FILE *f, const char *name_on, ++ const char *name_off, ++ struct rtattr *attr) ++{ ++ if (!attr) ++ return; ++ fprintf(f, "\n\t%s", rta_getattr_u8(attr) ? name_on : name_off); ++} ++ + static int print_tunnel_key(struct action_util *au, FILE *f, struct rtattr *arg) + { + struct rtattr *tb[TCA_TUNNEL_KEY_MAX + 1]; +@@ -312,6 +329,8 @@ static int print_tunnel_key(struct action_util *au, FILE *f, struct rtattr *arg) + tb[TCA_TUNNEL_KEY_ENC_KEY_ID]); + tunnel_key_print_dst_port(f, "dst_port", + tb[TCA_TUNNEL_KEY_ENC_DST_PORT]); ++ tunnel_key_print_flag(f, "nocsum", "csum", ++ tb[TCA_TUNNEL_KEY_NO_CSUM]); + tunnel_key_print_tos_ttl(f, "tos", + tb[TCA_TUNNEL_KEY_ENC_TOS]); + tunnel_key_print_tos_ttl(f, "ttl", +-- +2.20.1 + diff --git a/SOURCES/0140-gre6-add-collect-metadata-support.patch b/SOURCES/0140-gre6-add-collect-metadata-support.patch new file mode 100644 index 0000000..38ec1d7 --- /dev/null +++ b/SOURCES/0140-gre6-add-collect-metadata-support.patch @@ -0,0 +1,160 @@ +From b9961cdb54c22fa1b3f1eac5446a008fde7532e6 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 5 Jun 2019 13:13:31 +0200 +Subject: [PATCH] gre6: add collect metadata support + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1714660 +Upstream Status: iproute2.git commit 6231c5bec6d25 +Conflicts: +* Context change due to missing commit + ad4b1425c3182 ("iplink: Expose IFLA_*_FWMARK attributes for supported link types") +* Adjusted gre_print_opt() to missing commit 6856fb65484ba + ("ip: link_gre6.c: add json output support") + +commit 6231c5bec6d256f7861f39d3a578f5259f274cc4 +Author: William Tu +Date: Tue Dec 12 18:22:52 2017 -0800 + + gre6: add collect metadata support + + The patch adds 'external' option to support collect metadata + gre6 tunnel. The 'external' keyword is already used to set the + device into collect metadata mode such as vxlan, geneve, ipip, + etc. This patch extends support for ipv6 gre and gretap. + Example of L3 and L2 gre device: + bash:~# ip link add dev ip6gre123 type ip6gre external + bash:~# ip link add dev ip6gretap123 type ip6gretap external + + Signed-off-by: William Tu + Cc: Daniel Borkmann +--- + ip/link_gre6.c | 49 ++++++++++++++++++++++++++++--------------- + man/man8/ip-link.8.in | 17 +++++++++++++++ + 2 files changed, 49 insertions(+), 17 deletions(-) + +diff --git a/ip/link_gre6.c b/ip/link_gre6.c +index 127e51de4ab73..ea42fb1a9f664 100644 +--- a/ip/link_gre6.c ++++ b/ip/link_gre6.c +@@ -102,6 +102,7 @@ static int gre_parse_opt(struct link_util *lu, int argc, char **argv, + __u16 encapflags = TUNNEL_ENCAP_FLAG_CSUM6; + __u16 encapsport = 0; + __u16 encapdport = 0; ++ __u8 metadata = 0; + int len; + + if (!(n->nlmsg_flags & NLM_F_CREATE)) { +@@ -173,6 +174,9 @@ get_failed: + if (greinfo[IFLA_GRE_ENCAP_SPORT]) + encapsport = rta_getattr_u16(greinfo[IFLA_GRE_ENCAP_SPORT]); + ++ if (greinfo[IFLA_GRE_COLLECT_METADATA]) ++ metadata = 1; ++ + if (greinfo[IFLA_GRE_ENCAP_DPORT]) + encapdport = rta_getattr_u16(greinfo[IFLA_GRE_ENCAP_DPORT]); + +@@ -333,6 +337,8 @@ get_failed: + encapflags |= TUNNEL_ENCAP_FLAG_REMCSUM; + } else if (strcmp(*argv, "noencap-remcsum") == 0) { + encapflags &= ~TUNNEL_ENCAP_FLAG_REMCSUM; ++ } else if (strcmp(*argv, "external") == 0) { ++ metadata = 1; + } else if (strcmp(*argv, "encaplimit") == 0) { + NEXT_ARG(); + if (strcmp(*argv, "none") == 0) { +@@ -350,23 +356,27 @@ get_failed: + argc--; argv++; + } + +- addattr32(n, 1024, IFLA_GRE_IKEY, ikey); +- addattr32(n, 1024, IFLA_GRE_OKEY, okey); +- addattr_l(n, 1024, IFLA_GRE_IFLAGS, &iflags, 2); +- addattr_l(n, 1024, IFLA_GRE_OFLAGS, &oflags, 2); +- addattr_l(n, 1024, IFLA_GRE_LOCAL, &laddr, sizeof(laddr)); +- addattr_l(n, 1024, IFLA_GRE_REMOTE, &raddr, sizeof(raddr)); +- if (link) +- addattr32(n, 1024, IFLA_GRE_LINK, link); +- addattr_l(n, 1024, IFLA_GRE_TTL, &hop_limit, 1); +- addattr_l(n, 1024, IFLA_GRE_ENCAP_LIMIT, &encap_limit, 1); +- addattr_l(n, 1024, IFLA_GRE_FLOWINFO, &flowinfo, 4); +- addattr32(n, 1024, IFLA_GRE_FLAGS, flags); +- +- addattr16(n, 1024, IFLA_GRE_ENCAP_TYPE, encaptype); +- addattr16(n, 1024, IFLA_GRE_ENCAP_FLAGS, encapflags); +- addattr16(n, 1024, IFLA_GRE_ENCAP_SPORT, htons(encapsport)); +- addattr16(n, 1024, IFLA_GRE_ENCAP_DPORT, htons(encapdport)); ++ if (!metadata) { ++ addattr32(n, 1024, IFLA_GRE_IKEY, ikey); ++ addattr32(n, 1024, IFLA_GRE_OKEY, okey); ++ addattr_l(n, 1024, IFLA_GRE_IFLAGS, &iflags, 2); ++ addattr_l(n, 1024, IFLA_GRE_OFLAGS, &oflags, 2); ++ addattr_l(n, 1024, IFLA_GRE_LOCAL, &laddr, sizeof(laddr)); ++ addattr_l(n, 1024, IFLA_GRE_REMOTE, &raddr, sizeof(raddr)); ++ if (link) ++ addattr32(n, 1024, IFLA_GRE_LINK, link); ++ addattr_l(n, 1024, IFLA_GRE_TTL, &hop_limit, 1); ++ addattr_l(n, 1024, IFLA_GRE_ENCAP_LIMIT, &encap_limit, 1); ++ addattr_l(n, 1024, IFLA_GRE_FLOWINFO, &flowinfo, 4); ++ addattr32(n, 1024, IFLA_GRE_FLAGS, flags); ++ ++ addattr16(n, 1024, IFLA_GRE_ENCAP_TYPE, encaptype); ++ addattr16(n, 1024, IFLA_GRE_ENCAP_FLAGS, encapflags); ++ addattr16(n, 1024, IFLA_GRE_ENCAP_SPORT, htons(encapsport)); ++ addattr16(n, 1024, IFLA_GRE_ENCAP_DPORT, htons(encapdport)); ++ } else { ++ addattr_l(n, 1024, IFLA_GRE_COLLECT_METADATA, NULL, 0); ++ } + + return 0; + } +@@ -385,6 +395,11 @@ static void gre_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) + if (!tb) + return; + ++ if (tb[IFLA_GRE_COLLECT_METADATA]) { ++ fprintf(f, "external"); ++ return; ++ } ++ + if (tb[IFLA_GRE_FLAGS]) + flags = rta_getattr_u32(tb[IFLA_GRE_FLAGS]); + +diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in +index 8be5d5e1e9fd6..612bd8ce92696 100644 +--- a/man/man8/ip-link.8.in ++++ b/man/man8/ip-link.8.in +@@ -877,6 +877,8 @@ the following additional arguments are supported: + .BI "dscp inherit" + ] [ + .BI dev " PHYS_DEV " ++] [ ++.RB external + ] + + .in +8 +@@ -958,6 +960,21 @@ or + .IR 00 ".." ff + when tunneling non-IP packets. The default value is 00. + ++.sp ++.RB external ++- make this tunnel externally controlled (or not, which is the default). ++In the kernel, this is referred to as collect metadata mode. This flag is ++mutually exclusive with the ++.BR remote , ++.BR local , ++.BR seq , ++.BR key, ++.BR csum, ++.BR hoplimit, ++.BR encaplimit, ++.BR flowlabel " and " tclass ++options. ++ + .in -8 + + .TP +-- +2.20.1 + diff --git a/SOURCES/0141-tc_util-Silence-spurious-compiler-warning.patch b/SOURCES/0141-tc_util-Silence-spurious-compiler-warning.patch new file mode 100644 index 0000000..b345515 --- /dev/null +++ b/SOURCES/0141-tc_util-Silence-spurious-compiler-warning.patch @@ -0,0 +1,40 @@ +From 774b1c35d4515434e979d9090960ad3293bfe12e Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 5 Jun 2019 13:18:27 +0200 +Subject: [PATCH] tc_util: Silence spurious compiler warning + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1714660 +Upstream Status: iproute2.git commit 66942e522e54d + +commit 66942e522e54d9f96153590b7c1c7830b8f73f5c +Author: Phil Sutter +Date: Wed Nov 15 15:01:31 2017 +0100 + + tc_util: Silence spurious compiler warning + + GCC version 7.2.1 complains that 'result1' may be used uninitialized in + parse_action_control_slash_spaces(). This should not be possible in + practice, so the actual value 'result1' is initialized with does not + matter. + + Signed-off-by: Phil Sutter +--- + tc/tc_util.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tc/tc_util.c b/tc/tc_util.c +index 65695ea592ed8..e115e5a70e3a1 100644 +--- a/tc/tc_util.c ++++ b/tc/tc_util.c +@@ -556,7 +556,7 @@ static int parse_action_control_slash_spaces(int *argc_p, char ***argv_p, + { + int argc = *argc_p; + char **argv = *argv_p; +- int result1, result2; ++ int result1 = -1, result2; + int *result_p = &result1; + int ok = 0; + int ret; +-- +2.20.1 + diff --git a/SOURCES/0142-ss-use-for-any-address-any-family-sockets.patch b/SOURCES/0142-ss-use-for-any-address-any-family-sockets.patch new file mode 100644 index 0000000..8e97133 --- /dev/null +++ b/SOURCES/0142-ss-use-for-any-address-any-family-sockets.patch @@ -0,0 +1,76 @@ +From 04453080a3a92071227fd79a039137f72fa82e15 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 12 Jun 2019 15:00:33 +0200 +Subject: [PATCH] ss: use [::] for any address/any family sockets + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1588122 +Upstream Status: RHEL-only + +commit d981824803999a339f4b8fb9ad36d9d5990d9eab +Author: Andrea Claudi +Date: Wed Jun 12 14:49:07 2019 +0200 + + ss: use [::] for any address/any family sockets + + commit aba9c23a6e1cb ("ss: enclose IPv6 address in brackets") + brings in the unintended side effect of showing as "*" sockets + listening to any address in any family. This is consistent with + upstream iproute and RHEL 8 iproute version, but not with + previous versions of RHEL 7 iproute. + + This commit partially reverts aba9c23a6e1cb using "[::]" for + any family sockets when -f inet6 is used. + + Tested with + # ss -ln -f inet6 + + ("ss: enclose IPv6 address in brackets") +--- + misc/ss.c | 29 ++++++++++++----------------- + 1 file changed, 12 insertions(+), 17 deletions(-) + +diff --git a/misc/ss.c b/misc/ss.c +index 6aaae1b5390e4..8f184fb929d31 100644 +--- a/misc/ss.c ++++ b/misc/ss.c +@@ -1090,25 +1090,20 @@ static void inet_addr_print(const inet_prefix *a, int port, unsigned int ifindex + ap = format_host(AF_INET, 4, a->data); + } + } else { +- if (!memcmp(a->data, &in6addr_any, sizeof(in6addr_any))) { +- buf[0] = '*'; +- buf[1] = 0; +- } else { +- ap = format_host(a->family, 16, a->data); +- +- /* Numeric IPv6 addresses should be bracketed */ +- if (strchr(ap, ':')) { +- snprintf(buf, sizeof(buf), +- "[%s]", ap); +- ap = buf; +- } ++ ap = format_host(a->family, 16, a->data); + +- est_len = strlen(ap); +- if (est_len <= addr_width) +- est_len = addr_width; +- else +- est_len = addr_width + ((est_len-addr_width+3)/4)*4; ++ /* Numeric IPv6 addresses should be bracketed */ ++ if (strchr(ap, ':')) { ++ snprintf(buf, sizeof(buf), ++ "[%s]", ap); ++ ap = buf; + } ++ ++ est_len = strlen(ap); ++ if (est_len <= addr_width) ++ est_len = addr_width; ++ else ++ est_len = addr_width + ((est_len-addr_width+3)/4)*4; + } + + if (ifindex) { +-- +2.20.1 + diff --git a/SOURCES/0143-tc-introduce-tc_qdisc_block_exists-helper.patch b/SOURCES/0143-tc-introduce-tc_qdisc_block_exists-helper.patch new file mode 100644 index 0000000..df5bbd6 --- /dev/null +++ b/SOURCES/0143-tc-introduce-tc_qdisc_block_exists-helper.patch @@ -0,0 +1,110 @@ +From 2e2ac620670997b59d65a73b0af3e77431be3c18 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Tue, 18 Jun 2019 20:01:45 +0200 +Subject: [PATCH] tc: introduce tc_qdisc_block_exists helper + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1721291 +Upstream Status: iproute2.git commit d0bcedd549566 +Conflicts: context change due to missing commit 6f7df6b2a1fef + ("tc: Optimize gact action lookup") + +commit d0bcedd549566a87354aa804df3be6be80681ee9 +Author: Jiri Pirko +Date: Sat Jan 20 11:00:27 2018 +0100 + + tc: introduce tc_qdisc_block_exists helper + + This hepler used qdisc dump to list all qdisc and find if block index in + question is used by any of them. That means the block with specified + index exists. + + Signed-off-by: Jiri Pirko + Signed-off-by: David Ahern +--- + tc/tc_qdisc.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++ + tc/tc_util.h | 2 ++ + 2 files changed, 63 insertions(+) + +diff --git a/tc/tc_qdisc.c b/tc/tc_qdisc.c +index 8b0c5c72dbad1..f8e06ccf205a0 100644 +--- a/tc/tc_qdisc.c ++++ b/tc/tc_qdisc.c +@@ -366,3 +366,64 @@ int do_qdisc(int argc, char **argv) + fprintf(stderr, "Command \"%s\" is unknown, try \"tc qdisc help\".\n", *argv); + return -1; + } ++ ++struct tc_qdisc_block_exists_ctx { ++ __u32 block_index; ++ bool found; ++}; ++ ++static int tc_qdisc_block_exists_cb(const struct sockaddr_nl *who, ++ struct nlmsghdr *n, void *arg) ++{ ++ struct tc_qdisc_block_exists_ctx *ctx = arg; ++ struct tcmsg *t = NLMSG_DATA(n); ++ struct rtattr *tb[TCA_MAX+1]; ++ int len = n->nlmsg_len; ++ ++ if (n->nlmsg_type != RTM_NEWQDISC) ++ return 0; ++ ++ len -= NLMSG_LENGTH(sizeof(*t)); ++ if (len < 0) ++ return -1; ++ ++ parse_rtattr(tb, TCA_MAX, TCA_RTA(t), len); ++ ++ if (tb[TCA_KIND] == NULL) ++ return -1; ++ ++ if (tb[TCA_INGRESS_BLOCK] && ++ RTA_PAYLOAD(tb[TCA_INGRESS_BLOCK]) >= sizeof(__u32)) { ++ __u32 block = rta_getattr_u32(tb[TCA_INGRESS_BLOCK]); ++ ++ if (block == ctx->block_index) ++ ctx->found = true; ++ } ++ ++ if (tb[TCA_EGRESS_BLOCK] && ++ RTA_PAYLOAD(tb[TCA_EGRESS_BLOCK]) >= sizeof(__u32)) { ++ __u32 block = rta_getattr_u32(tb[TCA_EGRESS_BLOCK]); ++ ++ if (block == ctx->block_index) ++ ctx->found = true; ++ } ++ return 0; ++} ++ ++bool tc_qdisc_block_exists(__u32 block_index) ++{ ++ struct tc_qdisc_block_exists_ctx ctx = { .block_index = block_index }; ++ struct tcmsg t = { .tcm_family = AF_UNSPEC }; ++ ++ if (rtnl_dump_request(&rth, RTM_GETQDISC, &t, sizeof(t)) < 0) { ++ perror("Cannot send dump request"); ++ return false; ++ } ++ ++ if (rtnl_dump_filter(&rth, tc_qdisc_block_exists_cb, &ctx) < 0) { ++ perror("Dump terminated\n"); ++ return false; ++ } ++ ++ return ctx.found; ++} +diff --git a/tc/tc_util.h b/tc/tc_util.h +index 5c54ad384eae6..8344c11833ee8 100644 +--- a/tc/tc_util.h ++++ b/tc/tc_util.h +@@ -122,4 +122,6 @@ int prio_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt); + int cls_names_init(char *path); + void cls_names_uninit(void); + ++bool tc_qdisc_block_exists(__u32 block_index); ++ + #endif +-- +2.20.1 + diff --git a/SOURCES/0144-tc_filter-resolve-device-name-before-parsing-filter.patch b/SOURCES/0144-tc_filter-resolve-device-name-before-parsing-filter.patch new file mode 100644 index 0000000..66c8eaa --- /dev/null +++ b/SOURCES/0144-tc_filter-resolve-device-name-before-parsing-filter.patch @@ -0,0 +1,113 @@ +From 83b78ff645260a51ff5d643169009faeb3032d3c Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Tue, 18 Jun 2019 20:02:54 +0200 +Subject: [PATCH] tc_filter: resolve device name before parsing filter + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1721291 +Upstream Status: iproute2.git commit 01ea76b1cf545 + +commit 01ea76b1cf54516c71a9a54fba672410ada2cccb +Author: Jakub Kicinski +Date: Thu Nov 23 18:12:06 2017 -0800 + + tc_filter: resolve device name before parsing filter + + Move resolving device name into an ifindex before calling filter + specific callbacks. This way if filters need the ifindex, they + can read it from the request. + + Signed-off-by: Jakub Kicinski + Reviewed-by: Quentin Monnet + Acked-by: Daniel Borkmann +--- + tc/tc_filter.c | 50 ++++++++++++++++++++++++-------------------------- + 1 file changed, 24 insertions(+), 26 deletions(-) + +diff --git a/tc/tc_filter.c b/tc/tc_filter.c +index 8dbebf1ffa32a..e479039159df6 100644 +--- a/tc/tc_filter.c ++++ b/tc/tc_filter.c +@@ -161,6 +161,16 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv) + if (k[0]) + addattr_l(&req.n, sizeof(req), TCA_KIND, k, strlen(k)+1); + ++ if (d[0]) { ++ ll_init_map(&rth); ++ ++ req.t.tcm_ifindex = ll_name_to_index(d); ++ if (req.t.tcm_ifindex == 0) { ++ fprintf(stderr, "Cannot find device \"%s\"\n", d); ++ return 1; ++ } ++ } ++ + if (q) { + if (q->parse_fopt(q, fhandle, argc, argv, &req.n)) + return 1; +@@ -183,17 +193,6 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv) + if (est.ewma_log) + addattr_l(&req.n, sizeof(req), TCA_RATE, &est, sizeof(est)); + +- +- if (d[0]) { +- ll_init_map(&rth); +- +- req.t.tcm_ifindex = ll_name_to_index(d); +- if (req.t.tcm_ifindex == 0) { +- fprintf(stderr, "Cannot find device \"%s\"\n", d); +- return 1; +- } +- } +- + if (rtnl_talk(&rth, &req.n, NULL) < 0) { + fprintf(stderr, "We have an error talking to the kernel\n"); + return 2; +@@ -453,10 +452,23 @@ static int tc_filter_get(int cmd, unsigned int flags, int argc, char **argv) + return -1; + } + ++ if (d[0]) { ++ ll_init_map(&rth); ++ ++ req.t.tcm_ifindex = ll_name_to_index(d); ++ if (req.t.tcm_ifindex == 0) { ++ fprintf(stderr, "Cannot find device \"%s\"\n", d); ++ return 1; ++ } ++ filter_ifindex = req.t.tcm_ifindex; ++ } else { ++ fprintf(stderr, "Must specify netdevice \"dev\"\n"); ++ return -1; ++ } ++ + if (q->parse_fopt(q, fhandle, argc, argv, &req.n)) + return 1; + +- + if (!fhandle) { + fprintf(stderr, "Must specify filter \"handle\"\n"); + return -1; +@@ -471,20 +483,6 @@ static int tc_filter_get(int cmd, unsigned int flags, int argc, char **argv) + return -1; + } + +- if (d[0]) { +- ll_init_map(&rth); +- +- req.t.tcm_ifindex = ll_name_to_index(d); +- if (req.t.tcm_ifindex == 0) { +- fprintf(stderr, "Cannot find device \"%s\"\n", d); +- return 1; +- } +- filter_ifindex = req.t.tcm_ifindex; +- } else { +- fprintf(stderr, "Must specify netdevice \"dev\"\n"); +- return -1; +- } +- + if (rtnl_talk(&rth, &req.n, &answer) < 0) { + fprintf(stderr, "We have an error talking to the kernel\n"); + return 2; +-- +2.20.1 + diff --git a/SOURCES/0145-tc-introduce-support-for-block-handle-for-filter-ope.patch b/SOURCES/0145-tc-introduce-support-for-block-handle-for-filter-ope.patch new file mode 100644 index 0000000..1fe56ff --- /dev/null +++ b/SOURCES/0145-tc-introduce-support-for-block-handle-for-filter-ope.patch @@ -0,0 +1,269 @@ +From 13e1ae7b588c723091f81538bb5834b274f0b0c7 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Tue, 18 Jun 2019 20:02:54 +0200 +Subject: [PATCH] tc: introduce support for block-handle for filter operations + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1721291 +Upstream Status: iproute2.git commit 0c7cef9669a82 +Conflicts: context change due to missing commit 485d0c6001c4a + ("tc: Add batchsize feature for filter and actions"), + also adjust code to use fprintf instead of print_string + due to missing commit 249284ff5a44a ("tc: jsonify filter core") + +commit 0c7cef9669a82d4ad0438922f7ce57c18100d6b8 +Author: Jiri Pirko +Date: Sat Jan 20 11:00:28 2018 +0100 + + tc: introduce support for block-handle for filter operations + + So far, qdisc was the only handle that could be used to manipulate + filters. Kernel added support for using block to manipulate it. So add + the support to use block index to manipulate filters. The magic + TCM_IFINDEX_MAGIC_BLOCK indicates the block index is in use. + + Signed-off-by: Jiri Pirko + Signed-off-by: David Ahern +--- + man/man8/tc.8 | 18 +++++++++ + tc/tc_filter.c | 102 +++++++++++++++++++++++++++++++++++++++++-------- + 2 files changed, 104 insertions(+), 16 deletions(-) + +diff --git a/man/man8/tc.8 b/man/man8/tc.8 +index a341a8f995f85..c493ccfa7c900 100644 +--- a/man/man8/tc.8 ++++ b/man/man8/tc.8 +@@ -41,6 +41,19 @@ tc \- show / manipulate traffic control settings + .B flowid + \fIflow-id\fR + ++.B tc ++.RI "[ " OPTIONS " ]" ++.B filter [ add | change | replace | delete | get ] block ++\fIBLOCK_INDEX\fR ++.B [ handle \fIfilter-id\fR ] ++.B protocol ++\fIprotocol\fR ++.B prio ++\fIpriority\fR filtertype ++[ filtertype specific parameters ] ++.B flowid ++\fIflow-id\fR ++ + .B tc + .RI "[ " OPTIONS " ]" + .RI "[ " FORMAT " ]" +@@ -58,6 +71,11 @@ tc \- show / manipulate traffic control settings + .RI "[ " OPTIONS " ]" + .B filter show dev + \fIDEV\fR ++.P ++.B tc ++.RI "[ " OPTIONS " ]" ++.B filter show block ++\fIBLOCK_INDEX\fR + + .P + .ti 8 +diff --git a/tc/tc_filter.c b/tc/tc_filter.c +index e479039159df6..5676ed3a74383 100644 +--- a/tc/tc_filter.c ++++ b/tc/tc_filter.c +@@ -29,14 +29,17 @@ + static void usage(void) + { + fprintf(stderr, +- "Usage: tc filter [ add | del | change | replace | show ] dev STRING\n" +- "Usage: tc filter get dev STRING parent CLASSID protocol PROTO handle FILTERID pref PRIO FILTER_TYPE\n" ++ "Usage: tc filter [ add | del | change | replace | show ] [ dev STRING ]\n" ++ " tc filter [ add | del | change | replace | show ] [ block BLOCK_INDEX ]\n" ++ " tc filter get dev STRING parent CLASSID protocol PROTO handle FILTERID pref PRIO FILTER_TYPE\n" ++ " tc filter get block BLOCK_INDEX protocol PROTO handle FILTERID pref PRIO FILTER_TYPE\n" + " [ pref PRIO ] protocol PROTO [ chain CHAIN_INDEX ]\n" + " [ estimator INTERVAL TIME_CONSTANT ]\n" + " [ root | ingress | egress | parent CLASSID ]\n" + " [ handle FILTERID ] [ [ FILTER_TYPE ] [ help | OPTIONS ] ]\n" + "\n" + " tc filter show [ dev STRING ] [ root | ingress | egress | parent CLASSID ]\n" ++ " tc filter show [ block BLOCK_INDEX ]\n" + "Where:\n" + "FILTER_TYPE := { rsvp | u32 | bpf | fw | route | etc. }\n" + "FILTERID := ... format depends on classifier, see there\n" +@@ -61,6 +64,7 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv) + int protocol_set = 0; + __u32 chain_index; + int chain_index_set = 0; ++ __u32 block_index = 0; + char *fhandle = NULL; + char d[16] = {}; + char k[16] = {}; +@@ -74,7 +78,21 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv) + NEXT_ARG(); + if (d[0]) + duparg("dev", *argv); ++ if (block_index) { ++ fprintf(stderr, "Error: \"dev\" and \"block\" are mutually exlusive\n"); ++ return -1; ++ } + strncpy(d, *argv, sizeof(d)-1); ++ } else if (matches(*argv, "block") == 0) { ++ NEXT_ARG(); ++ if (block_index) ++ duparg("block", *argv); ++ if (d[0]) { ++ fprintf(stderr, "Error: \"dev\" and \"block\" are mutually exlusive\n"); ++ return -1; ++ } ++ if (get_u32(&block_index, *argv, 0) || !block_index) ++ invarg("invalid block index value", *argv); + } else if (strcmp(*argv, "root") == 0) { + if (req.t.tcm_parent) { + fprintf(stderr, +@@ -169,6 +187,9 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv) + fprintf(stderr, "Cannot find device \"%s\"\n", d); + return 1; + } ++ } else if (block_index) { ++ req.t.tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK; ++ req.t.tcm_block_index = block_index; + } + + if (q) { +@@ -207,6 +228,7 @@ static __u32 filter_prio; + static __u32 filter_protocol; + static __u32 filter_chain_index; + static int filter_chain_index_set; ++static __u32 filter_block_index; + __u16 f_proto; + + int print_filter(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) +@@ -251,19 +273,25 @@ int print_filter(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) + fprintf(fp, "added "); + + fprintf(fp, "filter "); +- if (!filter_ifindex || filter_ifindex != t->tcm_ifindex) +- fprintf(fp, "dev %s ", ll_index_to_name(t->tcm_ifindex)); +- +- if (!filter_parent || filter_parent != t->tcm_parent) { +- if (t->tcm_parent == TC_H_ROOT) +- fprintf(fp, "root "); +- else if (t->tcm_parent == TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS)) +- fprintf(fp, "ingress "); +- else if (t->tcm_parent == TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS)) +- fprintf(fp, "egress "); +- else { +- print_tc_classid(abuf, sizeof(abuf), t->tcm_parent); +- fprintf(fp, "parent %s ", abuf); ++ if (t->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) { ++ if (!filter_block_index || ++ filter_block_index != t->tcm_block_index) ++ fprintf(fp, "block %u ", t->tcm_block_index); ++ } else { ++ if (!filter_ifindex || filter_ifindex != t->tcm_ifindex) ++ fprintf(fp, "dev %s ", ll_index_to_name(t->tcm_ifindex)); ++ ++ if (!filter_parent || filter_parent != t->tcm_parent) { ++ if (t->tcm_parent == TC_H_ROOT) ++ fprintf(fp, "root "); ++ else if (t->tcm_parent == TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS)) ++ fprintf(fp, "ingress "); ++ else if (t->tcm_parent == TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS)) ++ fprintf(fp, "egress "); ++ else { ++ print_tc_classid(abuf, sizeof(abuf), t->tcm_parent); ++ fprintf(fp, "parent %s ", abuf); ++ } + } + } + +@@ -337,6 +365,7 @@ static int tc_filter_get(int cmd, unsigned int flags, int argc, char **argv) + int protocol_set = 0; + __u32 chain_index; + int chain_index_set = 0; ++ __u32 block_index = 0; + __u32 parent_handle = 0; + char *fhandle = NULL; + char d[16] = {}; +@@ -347,7 +376,21 @@ static int tc_filter_get(int cmd, unsigned int flags, int argc, char **argv) + NEXT_ARG(); + if (d[0]) + duparg("dev", *argv); ++ if (block_index) { ++ fprintf(stderr, "Error: \"dev\" and \"block\" are mutually exlusive\n"); ++ return -1; ++ } + strncpy(d, *argv, sizeof(d)-1); ++ } else if (matches(*argv, "block") == 0) { ++ NEXT_ARG(); ++ if (block_index) ++ duparg("block", *argv); ++ if (d[0]) { ++ fprintf(stderr, "Error: \"dev\" and \"block\" are mutually exlusive\n"); ++ return -1; ++ } ++ if (get_u32(&block_index, *argv, 0) || !block_index) ++ invarg("invalid block index value", *argv); + } else if (strcmp(*argv, "root") == 0) { + if (req.t.tcm_parent) { + fprintf(stderr, +@@ -461,8 +504,12 @@ static int tc_filter_get(int cmd, unsigned int flags, int argc, char **argv) + return 1; + } + filter_ifindex = req.t.tcm_ifindex; ++ } else if (block_index) { ++ req.t.tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK; ++ req.t.tcm_block_index = block_index; ++ filter_block_index = block_index; + } else { +- fprintf(stderr, "Must specify netdevice \"dev\"\n"); ++ fprintf(stderr, "Must specify netdevice \"dev\" or block index \"block\"\n"); + return -1; + } + +@@ -510,6 +557,7 @@ static int tc_filter_list(int argc, char **argv) + __u32 prio = 0; + __u32 protocol = 0; + __u32 chain_index; ++ __u32 block_index = 0; + char *fhandle = NULL; + + while (argc > 0) { +@@ -517,7 +565,21 @@ static int tc_filter_list(int argc, char **argv) + NEXT_ARG(); + if (d[0]) + duparg("dev", *argv); ++ if (block_index) { ++ fprintf(stderr, "Error: \"dev\" cannot be used in the same time as \"block\"\n"); ++ return -1; ++ } + strncpy(d, *argv, sizeof(d)-1); ++ } else if (matches(*argv, "block") == 0) { ++ NEXT_ARG(); ++ if (block_index) ++ duparg("block", *argv); ++ if (d[0]) { ++ fprintf(stderr, "Error: \"block\" cannot be used in the same time as \"dev\"\n"); ++ return -1; ++ } ++ if (get_u32(&block_index, *argv, 0) || !block_index) ++ invarg("invalid block index value", *argv); + } else if (strcmp(*argv, "root") == 0) { + if (req.t.tcm_parent) { + fprintf(stderr, +@@ -606,6 +668,14 @@ static int tc_filter_list(int argc, char **argv) + return 1; + } + filter_ifindex = req.t.tcm_ifindex; ++ } else if (block_index) { ++ if (!tc_qdisc_block_exists(block_index)) { ++ fprintf(stderr, "Cannot find block \"%u\"\n", block_index); ++ return 1; ++ } ++ req.t.tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK; ++ req.t.tcm_block_index = block_index; ++ filter_block_index = block_index; + } + + if (filter_chain_index_set) +-- +2.20.1 + diff --git a/SOURCES/0146-tc-implement-ingress-egress-block-index-attributes-f.patch b/SOURCES/0146-tc-implement-ingress-egress-block-index-attributes-f.patch new file mode 100644 index 0000000..e0aa77c --- /dev/null +++ b/SOURCES/0146-tc-implement-ingress-egress-block-index-attributes-f.patch @@ -0,0 +1,121 @@ +From f38f33f8693ed7a4f883b18862e47f822ff8a62d Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Tue, 18 Jun 2019 20:04:42 +0200 +Subject: [PATCH] tc: implement ingress/egress block index attributes for + qdiscs + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1721291 +Upstream Status: iproute2.git commit 063463efd7f0d +Conflicts: adjust the code to make it compile due to missing + commit c91d262f414d2 ("tc: jsonify qdisc core") + +commit 063463efd7f0d91b7372b089a7b7aff7fc9ac0f6 +Author: Jiri Pirko +Date: Sat Jan 20 11:00:29 2018 +0100 + + tc: implement ingress/egress block index attributes for qdiscs + + During qdisc creation it is possible to specify shared block for bot + ingress and egress. Pass this values to kernel according to the command + line options. + + Signed-off-by: Jiri Pirko + Signed-off-by: David Ahern +--- + man/man8/tc.8 | 6 +++++- + tc/tc_qdisc.c | 34 ++++++++++++++++++++++++++++++++++ + 2 files changed, 39 insertions(+), 1 deletion(-) + +diff --git a/man/man8/tc.8 b/man/man8/tc.8 +index c493ccfa7c900..c89a7a8ecf83b 100644 +--- a/man/man8/tc.8 ++++ b/man/man8/tc.8 +@@ -11,7 +11,11 @@ tc \- show / manipulate traffic control settings + \fIqdisc-id\fR + .B | root ] + .B [ handle +-\fIqdisc-id\fR ] qdisc ++\fIqdisc-id\fR ] ++.B [ ingress_block ++\fIBLOCK_INDEX\fR ] ++.B [ egress_block ++\fIBLOCK_INDEX\fR ] qdisc + [ qdisc specific parameters ] + .P + +diff --git a/tc/tc_qdisc.c b/tc/tc_qdisc.c +index f8e06ccf205a0..26d23f43007ae 100644 +--- a/tc/tc_qdisc.c ++++ b/tc/tc_qdisc.c +@@ -32,6 +32,7 @@ static int usage(void) + fprintf(stderr, " [ handle QHANDLE ] [ root | ingress | clsact | parent CLASSID ]\n"); + fprintf(stderr, " [ estimator INTERVAL TIME_CONSTANT ]\n"); + fprintf(stderr, " [ stab [ help | STAB_OPTIONS] ]\n"); ++ fprintf(stderr, " [ ingress_block BLOCK_INDEX ] [ egress_block BLOCK_INDEX ]\n"); + fprintf(stderr, " [ [ QDISC_KIND ] [ help | OPTIONS ] ]\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " tc qdisc show [ dev STRING ] [ ingress | clsact ]\n"); +@@ -62,6 +63,8 @@ static int tc_qdisc_modify(int cmd, unsigned int flags, int argc, char **argv) + .n.nlmsg_type = cmd, + .t.tcm_family = AF_UNSPEC, + }; ++ __u32 ingress_block = 0; ++ __u32 egress_block = 0; + + while (argc > 0) { + if (strcmp(*argv, "dev") == 0) { +@@ -122,6 +125,14 @@ static int tc_qdisc_modify(int cmd, unsigned int flags, int argc, char **argv) + if (parse_size_table(&argc, &argv, &stab.szopts) < 0) + return -1; + continue; ++ } else if (matches(*argv, "ingress_block") == 0) { ++ NEXT_ARG(); ++ if (get_u32(&ingress_block, *argv, 0) || !ingress_block) ++ invarg("invalid ingress block index value", *argv); ++ } else if (matches(*argv, "egress_block") == 0) { ++ NEXT_ARG(); ++ if (get_u32(&egress_block, *argv, 0) || !egress_block) ++ invarg("invalid egress block index value", *argv); + } else if (matches(*argv, "help") == 0) { + usage(); + } else { +@@ -139,6 +150,13 @@ static int tc_qdisc_modify(int cmd, unsigned int flags, int argc, char **argv) + if (est.ewma_log) + addattr_l(&req.n, sizeof(req), TCA_RATE, &est, sizeof(est)); + ++ if (ingress_block) ++ addattr32(&req.n, sizeof(req), ++ TCA_INGRESS_BLOCK, ingress_block); ++ if (egress_block) ++ addattr32(&req.n, sizeof(req), ++ TCA_EGRESS_BLOCK, egress_block); ++ + if (q) { + if (q->parse_qopt) { + if (q->parse_qopt(q, argc, argv, &req.n)) +@@ -252,6 +270,22 @@ int print_qdisc(const struct sockaddr_nl *who, + if (t->tcm_info != 1) + fprintf(fp, "refcnt %d ", t->tcm_info); + ++ if (tb[TCA_INGRESS_BLOCK] && ++ RTA_PAYLOAD(tb[TCA_INGRESS_BLOCK]) >= sizeof(__u32)) { ++ __u32 block = rta_getattr_u32(tb[TCA_INGRESS_BLOCK]); ++ ++ if (block) ++ fprintf(fp, "ingress_block %u ", block); ++ } ++ ++ if (tb[TCA_EGRESS_BLOCK] && ++ RTA_PAYLOAD(tb[TCA_EGRESS_BLOCK]) >= sizeof(__u32)) { ++ __u32 block = rta_getattr_u32(tb[TCA_EGRESS_BLOCK]); ++ ++ if (block) ++ fprintf(fp, "egress_block %u ", block); ++ } ++ + /* pfifo_fast is generic enough to warrant the hardcoding --JHS */ + if (strcmp("pfifo_fast", RTA_DATA(tb[TCA_KIND])) == 0) + q = get_qdisc_kind("prio"); +-- +2.20.1 + diff --git a/SPECS/iproute.spec b/SPECS/iproute.spec index 0f0a78e..9c0adae 100644 --- a/SPECS/iproute.spec +++ b/SPECS/iproute.spec @@ -2,7 +2,7 @@ %define rpmversion 4.11.0 %define baserelease 0.el7 -%define specrelease 14%{?dist}.2 +%define specrelease 25%{?dist} %define pkg_release %{specrelease}%{?buildid} Summary: Advanced IP routing and network device configuration tools @@ -58,11 +58,109 @@ Patch39: 0040-link_gre6-Detect-invalid-encaplimit-values.patch Patch40: 0041-man-tc-csum.8-Fix-inconsistency-in-example-descripti.patch Patch41: 0042-tc-fix-command-tc-actions-del-hang-issue.patch Patch42: 0043-ip-link-Fix-use-after-free-in-nl_get_ll_addr_len.patch -Patch43: 0050-iproute-Abort-if-nexthop-cannot-be-parsed.patch -Patch44: 0051-ip-route-Fix-segfault-with-many-nexthops.patch -Patch45: 0052-man-ip-route.8-Document-nexthop-limit.patch -Patch46: 0053-ip-route-Fix-nexthop-encap-parsing.patch -Patch47: 0054-ip-link-Fix-listing-of-alias-interfaces.patch +Patch43: 0044-tc-m_tunnel_key-reformat-the-usage-text.patch +Patch44: 0045-tc-m_tunnel_key-Allow-key-less-tunnels.patch +Patch45: 0046-tc-include-stdint.h-explicitly-for-UINT16_MAX.patch +Patch46: 0047-Update-kernel-headers.patch +Patch47: 0048-tc-flower-Add-match-on-encapsulating-tos-ttl.patch +Patch48: 0049-tc-act_tunnel_key-Enable-setup-of-tos-and-ttl.patch +Patch49: 0050-iproute-Abort-if-nexthop-cannot-be-parsed.patch +Patch50: 0051-ip-route-Fix-segfault-with-many-nexthops.patch +Patch51: 0052-man-ip-route.8-Document-nexthop-limit.patch +Patch52: 0053-ip-route-Fix-nexthop-encap-parsing.patch +Patch53: 0054-ip-link-Fix-listing-of-alias-interfaces.patch +Patch54: 0055-ip-Add-violation-counters-to-VF-statisctics.patch +Patch55: 0056-devlink-Add-support-for-devlink-resource-abstraction.patch +Patch56: 0057-devlink-Add-support-for-hot-reload.patch +Patch57: 0058-devlink-Update-man-pages-and-add-resource-man.patch +Patch58: 0059-devlink-Add-param-command-support.patch +Patch59: 0060-man-ip-route.8-ssthresh-parameter-is-NUMBER.patch +Patch60: 0061-man-tc-vlan.8-Fix-for-incorrect-example.patch +Patch61: 0062-tc-flower-Add-support-for-QinQ.patch +Patch62: 0063-utils-Move-BIT-macro-to-common-header.patch +Patch63: 0064-lib-make-resolve_hosts-variable-common.patch +Patch64: 0065-json_writer-add-new-json-handlers-null-float-with-fo.patch +Patch65: 0066-rdma-Add-MR-resource-tracking-information.patch +Patch66: 0067-rdma-add-infrastructure-for-RDMA-tool.patch +Patch67: 0068-rdma-add-man-pages-for-RDMA-tool.patch +Patch68: 0069-tc-f_flower-Add-support-for-matching-first-frag-pack.patch +Patch69: 0070-ss-enclose-IPv6-address-in-brackets.patch +Patch70: 0071-ip-address-Use-correct-max-attribute-value-in-print_.patch +Patch71: 0072-examples-Some-shell-fixes-to-cbq.init.patch +Patch72: 0073-ifcfg-Quote-left-hand-side-of-expression.patch +Patch73: 0074-tipc-node-Fix-socket-fd-check-in-cmd_node_get_addr.patch +Patch74: 0075-iproute_lwtunnel-Argument-to-strerror-must-be-positi.patch +Patch75: 0076-iproute_lwtunnel-csum_mode-value-checking-was-ineffe.patch +Patch76: 0077-ss-Don-t-leak-fd-in-tcp_show_netlink_file.patch +Patch77: 0078-tc-em_ipset-Don-t-leak-sockfd-on-error-path.patch +Patch78: 0079-ipvrf-Fix-error-path-of-vrf_switch.patch +Patch79: 0080-ifstat-Fix-memleak-in-error-case.patch +Patch80: 0081-ifstat-Fix-memleak-in-dump_kern_db-for-json-output.patch +Patch81: 0082-ss-Fix-potential-memleak-in-unix_stats_print.patch +Patch82: 0083-tipc-bearer-Fix-resource-leak-in-error-path.patch +Patch83: 0084-devlink-No-need-for-this-self-assignment.patch +Patch84: 0085-ipntable-No-need-to-check-and-assign-to-parms_rta.patch +Patch85: 0086-iproute-Fix-for-missing-Oifs-display.patch +Patch86: 0087-lib-rt_names-Drop-dead-code-in-rtnl_rttable_n2a.patch +Patch87: 0088-ss-Skip-useless-check-in-parse_hostcond.patch +Patch88: 0089-ss-Drop-useless-assignment.patch +Patch89: 0090-tc-m_gact-Drop-dead-code.patch +Patch90: 0091-ipaddress-Avoid-accessing-uninitialized-variable-lcl.patch +Patch91: 0092-iplink_can-Prevent-overstepping-array-bounds.patch +Patch92: 0093-ipmaddr-Avoid-accessing-uninitialized-data.patch +Patch93: 0094-ss-Use-C99-initializer-in-netlink_show_one.patch +Patch94: 0095-netem-maketable-Check-return-value-of-fstat.patch +Patch95: 0096-tc-q_multiq-Don-t-pass-garbage-in-TCA_OPTIONS.patch +Patch96: 0097-iproute-Check-mark-value-input.patch +Patch97: 0098-iplink_vrf-Complain-if-main-table-is-not-found.patch +Patch98: 0099-devlink-Check-return-code-of-strslashrsplit.patch +Patch99: 0100-lib-bpf-Don-t-leak-fp-in-bpf_find_mntpt.patch +Patch100: 0101-ifstat-nstat-Check-fdopen-return-value.patch +Patch101: 0102-tc-q_netem-Don-t-dereference-possibly-NULL-pointer.patch +Patch102: 0103-tc-tc_filter-Make-sure-filter-name-is-not-empty.patch +Patch103: 0104-tipc-bearer-Prevent-NULL-pointer-dereference.patch +Patch104: 0105-ipntable-Avoid-memory-allocation-for-filter.name.patch +Patch105: 0106-lib-fs-Fix-format-string-in-find_fs_mount.patch +Patch106: 0107-lib-inet_proto-Review-inet_proto_-a2n-n2a.patch +Patch107: 0108-lnstat_util-Simplify-alloc_and_open-a-bit.patch +Patch108: 0109-tc-m_xt-Fix-for-potential-string-buffer-overflows.patch +Patch109: 0110-lib-ll_map-Choose-size-of-new-cache-items-at-run-tim.patch +Patch110: 0111-ss-Make-struct-tcpstat-fields-timer-and-timeout-unsi.patch +Patch111: 0112-ss-Make-sure-scanned-index-value-to-unix_state_map-i.patch +Patch112: 0113-netem-maketable-Check-return-value-of-fscanf.patch +Patch113: 0114-lib-bpf-Check-return-value-of-write.patch +Patch114: 0115-lib-fs-Fix-and-simplify-make_path.patch +Patch115: 0116-lib-libnetlink-Don-t-pass-NULL-parameter-to-memcpy.patch +Patch116: 0117-utils-Implement-strlcpy-and-strlcat.patch +Patch117: 0118-Convert-the-obvious-cases-to-strlcpy.patch +Patch118: 0119-Convert-harmful-calls-to-strncpy-to-strlcpy.patch +Patch119: 0120-ipxfrm-Replace-STRBUF_CAT-macro-with-strlcat.patch +Patch120: 0121-tc_util-No-need-to-terminate-an-snprintf-ed-buffer.patch +Patch121: 0122-lnstat_util-Make-sure-buffer-is-NUL-terminated.patch +Patch122: 0123-utils-strlcpy-and-strlcat-don-t-clobber-dst.patch +Patch123: 0124-ip-6-tunnel-Avoid-copying-user-supplied-interface-na.patch +Patch124: 0125-tc-flower-No-need-to-cache-indev-arg.patch +Patch125: 0126-Check-user-supplied-interface-name-lengths.patch +Patch126: 0127-bpf-minor-cleanups-for-bpf_trace_pipe.patch +Patch127: 0128-ip-tunnel-Use-tnl_parse_key-to-parse-tunnel-key.patch +Patch128: 0129-man-ip-link-document-GRE-tunnels.patch +Patch129: 0130-gre-gre6-allow-clearing-i-o-key-seq-csum-flags.patch +Patch130: 0131-tc_filter-add-support-for-chain-index.patch +Patch131: 0132-tc-actions-add-helpers-to-parse-and-print-control-ac.patch +Patch132: 0133-tc-actions-introduce-support-for-goto-chain-action.patch +Patch133: 0134-tc-gact-fix-control-action-parsing.patch +Patch134: 0135-tc-don-t-print-error-message-on-miss-when-parsing-ac.patch +Patch135: 0136-tc-util-Don-t-call-NEXT_ARG_FWD-in-__parse_action_co.patch +Patch136: 0137-tc-fix-parsing-of-the-control-action.patch +Patch137: 0138-m_mirred-don-t-bail-if-the-control-action-is-missing.patch +Patch138: 0139-tc-m_tunnel_key-add-csum-nocsum-option.patch +Patch139: 0140-gre6-add-collect-metadata-support.patch +Patch140: 0141-tc_util-Silence-spurious-compiler-warning.patch +Patch141: 0142-ss-use-for-any-address-any-family-sockets.patch +Patch142: 0143-tc-introduce-tc_qdisc_block_exists-helper.patch +Patch143: 0144-tc_filter-resolve-device-name-before-parsing-filter.patch +Patch144: 0145-tc-introduce-support-for-block-handle-for-filter-ope.patch +Patch145: 0146-tc-implement-ingress-egress-block-index-attributes-f.patch License: GPLv2+ and Public Domain BuildRequires: bison BuildRequires: flex @@ -175,14 +273,130 @@ cat %{SOURCE3} >>%{buildroot}%{_sysconfdir}/iproute2/rt_dsfield %{_includedir}/iproute2/bpf_elf.h %changelog -* Tue Mar 12 2019 Phil Sutter [4.11.0-14.el7_6.2] -- ip-link: Fix listing of alias interfaces (Phil Sutter) [1687717] - -* Fri Mar 01 2019 Phil Sutter [4.11.0-14.el7_6.1] -- ip-route: Fix nexthop encap parsing (Phil Sutter) [1679996] -- man: ip-route.8: Document nexthop limit (Phil Sutter) [1679996] -- ip-route: Fix segfault with many nexthops (Phil Sutter) [1679996] -- iproute: Abort if nexthop cannot be parsed (Phil Sutter) [1679996] +* Fri Jun 21 2019 Andrea Claudi [4.11.0-25.el7] +- tc: implement ingress/egress block index attributes for qdiscs (Andrea Claudi) [1721291] +- tc: introduce support for block-handle for filter operations (Andrea Claudi) [1721291] +- tc_filter: resolve device name before parsing filter (Andrea Claudi) [1721291] +- tc: introduce tc_qdisc_block_exists helper (Andrea Claudi) [1721291] + +* Wed Jun 12 2019 Andrea Claudi [4.11.0-24.el7] +- ss: use [::] for any address/any family sockets (Andrea Claudi) [1588122] + +* Wed Jun 05 2019 Andrea Claudi [4.11.0-23.el7] +- tc_util: Silence spurious compiler warning (Andrea Claudi) [1714660] +- gre6: add collect metadata support (Andrea Claudi) [1714660] +- tc: m_tunnel_key: add csum/nocsum option (Andrea Claudi) [1714660] +- m_mirred: don't bail if the control action is missing (Andrea Claudi) [1714660] +- tc: fix parsing of the control action (Andrea Claudi) [1714660] +- tc: util: Don't call NEXT_ARG_FWD() in __parse_action_control() (Andrea Claudi) [1714660] +- tc: don't print error message on miss when parsing action with default (Andrea Claudi) [1714660] +- tc: gact: fix control action parsing (Andrea Claudi) [1714660] +- tc/actions: introduce support for goto chain action (Andrea Claudi) [1714660] +- tc: actions: add helpers to parse and print control actions (Andrea Claudi) [1714660] +- tc_filter: add support for chain index (Andrea Claudi) [1714660] +- gre/gre6: allow clearing {,i,o}{key,seq,csum} flags (Andrea Claudi) [1714660] +- man: ip link: document GRE tunnels (Andrea Claudi) [1714660] +- ip/tunnel: Use tnl_parse_key() to parse tunnel key (Andrea Claudi) [1714660] + +* Tue Apr 30 2019 Andrea Claudi [4.11.0-22.el7] +- bpf: minor cleanups for bpf_trace_pipe (Andrea Claudi) [1465646] + +* Mon Apr 29 2019 Andrea Claudi [4.11.0-21.el7] +- Check user supplied interface name lengths (Andrea Claudi) [1465646] +- tc: flower: No need to cache indev arg (Andrea Claudi) [1465646] +- ip{6, }tunnel: Avoid copying user-supplied interface name around (Andrea Claudi) [1465646] +- utils: strlcpy() and strlcat() don't clobber dst (Andrea Claudi) [1465646] +- lnstat_util: Make sure buffer is NUL-terminated (Andrea Claudi) [1465646] +- tc_util: No need to terminate an snprintf'ed buffer (Andrea Claudi) [1465646] +- ipxfrm: Replace STRBUF_CAT macro with strlcat() (Andrea Claudi) [1465646] +- Convert harmful calls to strncpy() to strlcpy() (Andrea Claudi) [1465646] +- Convert the obvious cases to strlcpy() (Andrea Claudi) [1465646] +- utils: Implement strlcpy() and strlcat() (Andrea Claudi) [1465646] +- lib/libnetlink: Don't pass NULL parameter to memcpy() (Andrea Claudi) [1465646] +- lib/fs: Fix and simplify make_path() (Andrea Claudi) [1465646] +- lib/bpf: Check return value of write() (Andrea Claudi) [1465646] +- netem/maketable: Check return value of fscanf() (Andrea Claudi) [1465646] +- ss: Make sure scanned index value to unix_state_map is sane (Andrea Claudi) [1465646] +- ss: Make struct tcpstat fields 'timer' and 'timeout' unsigned (Andrea Claudi) [1465646] +- lib/ll_map: Choose size of new cache items at run-time (Andrea Claudi) [1465646] +- tc/m_xt: Fix for potential string buffer overflows (Andrea Claudi) [1465646] +- lnstat_util: Simplify alloc_and_open() a bit (Andrea Claudi) [1465646] +- lib/inet_proto: Review inet_proto_{a2n,n2a}() (Andrea Claudi) [1465646] +- lib/fs: Fix format string in find_fs_mount() (Andrea Claudi) [1465646] +- ipntable: Avoid memory allocation for filter.name (Andrea Claudi) [1465646] +- tipc/bearer: Prevent NULL pointer dereference (Andrea Claudi) [1465646] +- tc/tc_filter: Make sure filter name is not empty (Andrea Claudi) [1465646] +- tc/q_netem: Don't dereference possibly NULL pointer (Andrea Claudi) [1465646] +- ifstat, nstat: Check fdopen() return value (Andrea Claudi) [1465646] +- lib/bpf: Don't leak fp in bpf_find_mntpt() (Andrea Claudi) [1465646] +- devlink: Check return code of strslashrsplit() (Andrea Claudi) [1465646] +- iplink_vrf: Complain if main table is not found (Andrea Claudi) [1465646] +- iproute: Check mark value input (Andrea Claudi) [1465646] +- tc/q_multiq: Don't pass garbage in TCA_OPTIONS (Andrea Claudi) [1465646] +- netem/maketable: Check return value of fstat() (Andrea Claudi) [1465646] +- ss: Use C99 initializer in netlink_show_one() (Andrea Claudi) [1465646] +- ipmaddr: Avoid accessing uninitialized data (Andrea Claudi) [1465646] +- iplink_can: Prevent overstepping array bounds (Andrea Claudi) [1465646] +- ipaddress: Avoid accessing uninitialized variable lcl (Andrea Claudi) [1465646] +- tc/m_gact: Drop dead code (Andrea Claudi) [1465646] +- ss: Drop useless assignment (Andrea Claudi) [1465646] +- ss: Skip useless check in parse_hostcond() (Andrea Claudi) [1465646] +- lib/rt_names: Drop dead code in rtnl_rttable_n2a() (Andrea Claudi) [1465646] +- iproute: Fix for missing 'Oifs:' display (Andrea Claudi) [1465646] +- ipntable: No need to check and assign to parms_rta (Andrea Claudi) [1465646] +- devlink: No need for this self-assignment (Andrea Claudi) [1465646] +- tipc/bearer: Fix resource leak in error path (Andrea Claudi) [1465646] +- ss: Fix potential memleak in unix_stats_print() (Andrea Claudi) [1465646] +- ifstat: Fix memleak in dump_kern_db() for json output (Andrea Claudi) [1465646] +- ifstat: Fix memleak in error case (Andrea Claudi) [1465646] +- ipvrf: Fix error path of vrf_switch() (Andrea Claudi) [1465646] +- tc/em_ipset: Don't leak sockfd on error path (Andrea Claudi) [1465646] +- ss: Don't leak fd in tcp_show_netlink_file() (Andrea Claudi) [1465646] +- iproute_lwtunnel: csum_mode value checking was ineffective (Andrea Claudi) [1465646] +- iproute_lwtunnel: Argument to strerror must be positive (Andrea Claudi) [1465646] +- tipc/node: Fix socket fd check in cmd_node_get_addr() (Andrea Claudi) [1465646] +- ifcfg: Quote left-hand side of [ ] expression (Andrea Claudi) [1465646] +- examples: Some shell fixes to cbq.init (Andrea Claudi) [1465646] +- ip-address: Use correct max attribute value in print_vf_stats64() (Andrea Claudi) [1679749] +- ss: enclose IPv6 address in brackets (Andrea Claudi) [1588122] +- tc: f_flower: Add support for matching first frag packets (Andrea Claudi) [1559814] + +* Thu Mar 28 2019 Andrea Claudi [4.11.0-20.el7] +- rdma: add man pages for RDMA tool (Andrea Claudi) [1642479] + +* Wed Mar 27 2019 Andrea Claudi [4.11.0-19.el7] +- rdma: add infrastructure for RDMA tool (Andrea Claudi) [1642479 1641914] +- rdma: Add MR resource tracking information (Andrea Claudi) [1642479 1641914] +- json_writer: add new json handlers (null, float with format, lluint, hu) (Andrea Claudi) [1642479 1641914] +- lib: make resolve_hosts variable common (Andrea Claudi) [1642479 1641914] +- utils: Move BIT macro to common header (Andrea Claudi) [1642479 1641914] +- tc: flower: Add support for QinQ (Andrea Claudi) [1642347] +- man: tc-vlan.8: Fix for incorrect example (Andrea Claudi) [1593630] +- man: ip-route.8: ssthresh parameter is NUMBER (Andrea Claudi) [1593628] +- devlink: Add param command support (Andrea Claudi) [1644731] +- devlink: Update man pages and add resource man (Andrea Claudi) [1644731] +- devlink: Add support for hot reload (Andrea Claudi) [1644731] +- devlink: Add support for devlink resource abstraction (Andrea Claudi) [1644731] + +* Mon Mar 18 2019 Andrea Claudi [4.11.0-18.el7] +- ip: Add violation counters to VF statisctics (Andrea Claudi) [1471680] + +* Mon Mar 11 2019 Phil Sutter [4.11.0-17.el7] +- ip-link: Fix listing of alias interfaces (Phil Sutter) [1673226] + +* Thu Feb 21 2019 Phil Sutter [4.11.0-16.el7] +- ip-route: Fix nexthop encap parsing (Phil Sutter) [1624656] +- man: ip-route.8: Document nexthop limit (Phil Sutter) [1624656] +- ip-route: Fix segfault with many nexthops (Phil Sutter) [1624656] +- iproute: Abort if nexthop cannot be parsed (Phil Sutter) [1624656] + +* Wed Feb 06 2019 Phil Sutter [4.11.0-15.el7] +- tc/act_tunnel_key: Enable setup of tos and ttl (Phil Sutter) [1641909] +- tc/flower: Add match on encapsulating tos/ttl (Phil Sutter) [1641909] +- Update kernel headers (Phil Sutter) +- tc: include stdint.h explicitly for UINT16_MAX (Phil Sutter) [1641909] +- tc: m_tunnel_key: Allow key-less tunnels (Phil Sutter) [1658506] +- tc: m_tunnel_key: reformat the usage text (Phil Sutter) [1658506] * Tue Mar 06 2018 Phil Sutter [4.11.0-14.el7] - ip-link: Fix use after free in nl_get_ll_addr_len() (Phil Sutter) [1550097]