From 8def76bf770ade4ca2b94b0d3f702f120c1553ab Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Nov 05 2019 21:28:06 +0000 Subject: import iproute-4.18.0-15.el8 --- diff --git a/SOURCES/0045-ip-rule-Add-ipproto-and-port-range-to-filter-list.patch b/SOURCES/0045-ip-rule-Add-ipproto-and-port-range-to-filter-list.patch new file mode 100644 index 0000000..1d9ee49 --- /dev/null +++ b/SOURCES/0045-ip-rule-Add-ipproto-and-port-range-to-filter-list.patch @@ -0,0 +1,121 @@ +From ec8d7120bf3b8fd47937e9297468e0bb7c1f270c Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 29 May 2019 17:40:35 +0200 +Subject: [PATCH] ip rule: Add ipproto and port range to filter list + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1678111 +Upstream Status: iproute2.git commit b2e8bf1584605 + +commit b2e8bf158460568ec5b48cba69f657f95891c901 +Author: David Ahern +Date: Tue Oct 30 15:03:30 2018 -0700 + + ip rule: Add ipproto and port range to filter list + + Allow ip rule dumps and flushes to filter based on ipproto, sport + and dport. Example: + + $ ip ru ls ipproto udp + 99: from all to 8.8.8.8 ipproto udp dport 53 lookup 1001 + $ ip ru ls dport 53 + 99: from all to 8.8.8.8 ipproto udp dport 53 lookup 1001 + + Signed-off-by: David Ahern +--- + ip/iprule.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 66 insertions(+) + +diff --git a/ip/iprule.c b/ip/iprule.c +index 744d6d88e3433..33160eafa2b33 100644 +--- a/ip/iprule.c ++++ b/ip/iprule.c +@@ -78,6 +78,9 @@ static struct + inet_prefix dst; + int protocol; + int protocolmask; ++ struct fib_rule_port_range sport; ++ struct fib_rule_port_range dport; ++ __u8 ipproto; + } filter; + + static inline int frh_get_table(struct fib_rule_hdr *frh, struct rtattr **tb) +@@ -174,6 +177,39 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) + return false; + } + ++ if (filter.ipproto) { ++ __u8 ipproto = 0; ++ ++ if (tb[FRA_IP_PROTO]) ++ ipproto = rta_getattr_u8(tb[FRA_IP_PROTO]); ++ if (filter.ipproto != ipproto) ++ return false; ++ } ++ ++ if (filter.sport.start) { ++ const struct fib_rule_port_range *r; ++ ++ if (!tb[FRA_SPORT_RANGE]) ++ return false; ++ ++ r = RTA_DATA(tb[FRA_SPORT_RANGE]); ++ if (r->start != filter.sport.start || ++ r->end != filter.sport.end) ++ return false; ++ } ++ ++ if (filter.dport.start) { ++ const struct fib_rule_port_range *r; ++ ++ if (!tb[FRA_DPORT_RANGE]) ++ return false; ++ ++ r = RTA_DATA(tb[FRA_DPORT_RANGE]); ++ if (r->start != filter.dport.start || ++ r->end != filter.dport.end) ++ return false; ++ } ++ + table = frh_get_table(frh, tb); + if (filter.tb > 0 && filter.tb ^ table) + return false; +@@ -604,6 +640,36 @@ static int iprule_list_flush_or_save(int argc, char **argv, int action) + filter.protocolmask = 0; + } + filter.protocol = prot; ++ } else if (strcmp(*argv, "ipproto") == 0) { ++ int ipproto; ++ ++ NEXT_ARG(); ++ ipproto = inet_proto_a2n(*argv); ++ if (ipproto < 0) ++ invarg("Invalid \"ipproto\" value\n", *argv); ++ filter.ipproto = ipproto; ++ } else if (strcmp(*argv, "sport") == 0) { ++ struct fib_rule_port_range r; ++ int ret; ++ ++ NEXT_ARG(); ++ ret = sscanf(*argv, "%hu-%hu", &r.start, &r.end); ++ if (ret == 1) ++ r.end = r.start; ++ else if (ret != 2) ++ invarg("invalid port range\n", *argv); ++ filter.sport = r; ++ } else if (strcmp(*argv, "dport") == 0) { ++ struct fib_rule_port_range r; ++ int ret; ++ ++ NEXT_ARG(); ++ ret = sscanf(*argv, "%hu-%hu", &r.start, &r.end); ++ if (ret == 1) ++ r.end = r.start; ++ else if (ret != 2) ++ invarg("invalid dport range\n", *argv); ++ filter.dport = r; + } else{ + if (matches(*argv, "dst") == 0 || + matches(*argv, "to") == 0) { +-- +2.20.1 + diff --git a/SOURCES/0046-tc-flower-Add-support-for-QinQ.patch b/SOURCES/0046-tc-flower-Add-support-for-QinQ.patch new file mode 100644 index 0000000..2657235 --- /dev/null +++ b/SOURCES/0046-tc-flower-Add-support-for-QinQ.patch @@ -0,0 +1,273 @@ +From b485126fd0a84a09f3d61bb4d634011be92fb6a4 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 29 May 2019 18:28:17 +0200 +Subject: [PATCH] tc: flower: Add support for QinQ +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1615928 +Upstream Status: iproute2.git commit 1f0a5dfd388cd + +commit 1f0a5dfd388cd5c25f6a24247667e04b2346e568 +Author: Jianbo Liu +Date: Sat Jun 30 10:01:33 2018 +0000 + + tc: flower: Add support for QinQ + + To support matching on both outer and inner vlan headers, + we add new cvlan_id/cvlan_prio/cvlan_ethtype for inner vlan header. + + Example: + # tc filter add dev eth0 protocol 802.1ad parent ffff: \ + flower vlan_id 1000 vlan_ethtype 802.1q \ + cvlan_id 100 cvlan_ethtype ipv4 \ + action vlan pop \ + action vlan pop \ + action mirred egress redirect dev eth1 + + # tc filter show dev eth0 ingress + filter protocol 802.1ad pref 1 flower chain 0 + filter protocol 802.1ad pref 1 flower chain 0 handle 0x1 +   vlan_id 1000 +   vlan_ethtype 802.1Q +   cvlan_id 100 +   cvlan_ethtype ip +   eth_type ipv4 +   in_hw + + Signed-off-by: Jianbo Liu + Acked-by: Jiri Pirko + Signed-off-by: David Ahern +--- + man/man8/tc-flower.8 | 23 ++++++++++ + tc/f_flower.c | 103 ++++++++++++++++++++++++++++++++++++++----- + 2 files changed, 114 insertions(+), 12 deletions(-) + +diff --git a/man/man8/tc-flower.8 b/man/man8/tc-flower.8 +index 276b5271cf013..8be8882592eaa 100644 +--- a/man/man8/tc-flower.8 ++++ b/man/man8/tc-flower.8 +@@ -34,6 +34,12 @@ flower \- flow based traffic control filter + .IR PRIORITY " | " + .BR vlan_ethtype " { " ipv4 " | " ipv6 " | " + .IR ETH_TYPE " } | " ++.B cvlan_id ++.IR VID " | " ++.B cvlan_prio ++.IR PRIORITY " | " ++.BR cvlan_ethtype " { " ipv4 " | " ipv6 " | " ++.IR ETH_TYPE " } | " + .B mpls_label + .IR LABEL " | " + .B mpls_tc +@@ -145,6 +151,23 @@ Match on layer three protocol. + .I VLAN_ETH_TYPE + may be either + .BR ipv4 ", " ipv6 ++or an unsigned 16bit value in hexadecimal format. To match on QinQ packet, it must be 802.1Q or 802.1AD. ++.TP ++.BI cvlan_id " VID" ++Match on QinQ inner vlan tag id. ++.I VID ++is an unsigned 12bit value in decimal format. ++.TP ++.BI cvlan_prio " PRIORITY" ++Match on QinQ inner vlan tag priority. ++.I PRIORITY ++is an unsigned 3bit value in decimal format. ++.TP ++.BI cvlan_ethtype " VLAN_ETH_TYPE" ++Match on QinQ layer three protocol. ++.I VLAN_ETH_TYPE ++may be either ++.BR ipv4 ", " ipv6 + or an unsigned 16bit value in hexadecimal format. + .TP + .BI mpls_label " LABEL" +diff --git a/tc/f_flower.c b/tc/f_flower.c +index 43102c86d1597..634bb81af7dbb 100644 +--- a/tc/f_flower.c ++++ b/tc/f_flower.c +@@ -50,6 +50,9 @@ static void explain(void) + " vlan_id VID |\n" + " vlan_prio PRIORITY |\n" + " vlan_ethtype [ ipv4 | ipv6 | ETH-TYPE ] |\n" ++ " cvlan_id VID |\n" ++ " cvlan_prio PRIORITY |\n" ++ " cvlan_ethtype [ ipv4 | ipv6 | ETH-TYPE ] |\n" + " dst_mac MASKED-LLADDR |\n" + " src_mac MASKED-LLADDR |\n" + " ip_proto [tcp | udp | sctp | icmp | icmpv6 | IP-PROTO ] |\n" +@@ -131,15 +134,21 @@ err: + return err; + } + ++static bool eth_type_vlan(__be16 ethertype) ++{ ++ return ethertype == htons(ETH_P_8021Q) || ++ ethertype == htons(ETH_P_8021AD); ++} ++ + static int flower_parse_vlan_eth_type(char *str, __be16 eth_type, int type, + __be16 *p_vlan_eth_type, + struct nlmsghdr *n) + { + __be16 vlan_eth_type; + +- if (eth_type != htons(ETH_P_8021Q)) { +- fprintf(stderr, +- "Can't set \"vlan_ethtype\" if ethertype isn't 802.1Q\n"); ++ if (!eth_type_vlan(eth_type)) { ++ fprintf(stderr, "Can't set \"%s\" if ethertype isn't 802.1Q or 802.1AD\n", ++ type == TCA_FLOWER_KEY_VLAN_ETH_TYPE ? "vlan_ethtype" : "cvlan_ethtype"); + return -1; + } + +@@ -762,6 +771,7 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, + struct rtattr *tail; + __be16 eth_type = TC_H_MIN(t->tcm_info); + __be16 vlan_ethtype = 0; ++ __be16 cvlan_ethtype = 0; + __u8 ip_proto = 0xff; + __u32 flags = 0; + __u32 mtf = 0; +@@ -839,9 +849,8 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, + __u16 vid; + + NEXT_ARG(); +- if (eth_type != htons(ETH_P_8021Q)) { +- fprintf(stderr, +- "Can't set \"vlan_id\" if ethertype isn't 802.1Q\n"); ++ if (!eth_type_vlan(eth_type)) { ++ fprintf(stderr, "Can't set \"vlan_id\" if ethertype isn't 802.1Q or 802.1AD\n"); + return -1; + } + ret = get_u16(&vid, *argv, 10); +@@ -854,9 +863,8 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, + __u8 vlan_prio; + + NEXT_ARG(); +- if (eth_type != htons(ETH_P_8021Q)) { +- fprintf(stderr, +- "Can't set \"vlan_prio\" if ethertype isn't 802.1Q\n"); ++ if (!eth_type_vlan(eth_type)) { ++ fprintf(stderr, "Can't set \"vlan_prio\" if ethertype isn't 802.1Q or 802.1AD\n"); + return -1; + } + ret = get_u8(&vlan_prio, *argv, 10); +@@ -873,6 +881,42 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, + &vlan_ethtype, n); + if (ret < 0) + return -1; ++ } else if (matches(*argv, "cvlan_id") == 0) { ++ __u16 vid; ++ ++ NEXT_ARG(); ++ if (!eth_type_vlan(vlan_ethtype)) { ++ fprintf(stderr, "Can't set \"cvlan_id\" if inner vlan ethertype isn't 802.1Q or 802.1AD\n"); ++ return -1; ++ } ++ ret = get_u16(&vid, *argv, 10); ++ if (ret < 0 || vid & ~0xfff) { ++ fprintf(stderr, "Illegal \"cvlan_id\"\n"); ++ return -1; ++ } ++ addattr16(n, MAX_MSG, TCA_FLOWER_KEY_CVLAN_ID, vid); ++ } else if (matches(*argv, "cvlan_prio") == 0) { ++ __u8 cvlan_prio; ++ ++ NEXT_ARG(); ++ if (!eth_type_vlan(vlan_ethtype)) { ++ fprintf(stderr, "Can't set \"cvlan_prio\" if inner vlan ethertype isn't 802.1Q or 802.1AD\n"); ++ return -1; ++ } ++ ret = get_u8(&cvlan_prio, *argv, 10); ++ if (ret < 0 || cvlan_prio & ~0x7) { ++ fprintf(stderr, "Illegal \"cvlan_prio\"\n"); ++ return -1; ++ } ++ addattr8(n, MAX_MSG, ++ TCA_FLOWER_KEY_CVLAN_PRIO, cvlan_prio); ++ } else if (matches(*argv, "cvlan_ethtype") == 0) { ++ NEXT_ARG(); ++ ret = flower_parse_vlan_eth_type(*argv, vlan_ethtype, ++ TCA_FLOWER_KEY_CVLAN_ETH_TYPE, ++ &cvlan_ethtype, n); ++ if (ret < 0) ++ return -1; + } else if (matches(*argv, "mpls_label") == 0) { + __u32 label; + +@@ -959,7 +1003,8 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, + } + } else if (matches(*argv, "ip_proto") == 0) { + NEXT_ARG(); +- ret = flower_parse_ip_proto(*argv, vlan_ethtype ? ++ ret = flower_parse_ip_proto(*argv, cvlan_ethtype ? ++ cvlan_ethtype : vlan_ethtype ? + vlan_ethtype : eth_type, + TCA_FLOWER_KEY_IP_PROTO, + &ip_proto, n); +@@ -989,7 +1034,8 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, + } + } else if (matches(*argv, "dst_ip") == 0) { + NEXT_ARG(); +- ret = flower_parse_ip_addr(*argv, vlan_ethtype ? ++ ret = flower_parse_ip_addr(*argv, cvlan_ethtype ? ++ cvlan_ethtype : vlan_ethtype ? + vlan_ethtype : eth_type, + TCA_FLOWER_KEY_IPV4_DST, + TCA_FLOWER_KEY_IPV4_DST_MASK, +@@ -1002,7 +1048,8 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, + } + } else if (matches(*argv, "src_ip") == 0) { + NEXT_ARG(); +- ret = flower_parse_ip_addr(*argv, vlan_ethtype ? ++ ret = flower_parse_ip_addr(*argv, cvlan_ethtype ? ++ cvlan_ethtype : vlan_ethtype ? + vlan_ethtype : eth_type, + TCA_FLOWER_KEY_IPV4_SRC, + TCA_FLOWER_KEY_IPV4_SRC_MASK, +@@ -1678,6 +1725,38 @@ static int flower_print_opt(struct filter_util *qu, FILE *f, + rta_getattr_u8(attr)); + } + ++ if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) { ++ SPRINT_BUF(buf); ++ struct rtattr *attr = tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]; ++ ++ print_string(PRINT_ANY, "vlan_ethtype", "\n vlan_ethtype %s", ++ ll_proto_n2a(rta_getattr_u16(attr), ++ buf, sizeof(buf))); ++ } ++ ++ if (tb[TCA_FLOWER_KEY_CVLAN_ID]) { ++ struct rtattr *attr = tb[TCA_FLOWER_KEY_CVLAN_ID]; ++ ++ print_uint(PRINT_ANY, "cvlan_id", "\n cvlan_id %u", ++ rta_getattr_u16(attr)); ++ } ++ ++ if (tb[TCA_FLOWER_KEY_CVLAN_PRIO]) { ++ struct rtattr *attr = tb[TCA_FLOWER_KEY_CVLAN_PRIO]; ++ ++ print_uint(PRINT_ANY, "cvlan_prio", "\n cvlan_prio %d", ++ rta_getattr_u8(attr)); ++ } ++ ++ if (tb[TCA_FLOWER_KEY_CVLAN_ETH_TYPE]) { ++ SPRINT_BUF(buf); ++ struct rtattr *attr = tb[TCA_FLOWER_KEY_CVLAN_ETH_TYPE]; ++ ++ print_string(PRINT_ANY, "cvlan_ethtype", "\n cvlan_ethtype %s", ++ ll_proto_n2a(rta_getattr_u16(attr), ++ buf, sizeof(buf))); ++ } ++ + flower_print_eth_addr("dst_mac", tb[TCA_FLOWER_KEY_ETH_DST], + tb[TCA_FLOWER_KEY_ETH_DST_MASK]); + flower_print_eth_addr("src_mac", tb[TCA_FLOWER_KEY_ETH_SRC], +-- +2.20.1 + diff --git a/SOURCES/0047-uapi-update-ib_verbs.patch b/SOURCES/0047-uapi-update-ib_verbs.patch new file mode 100644 index 0000000..36723cd --- /dev/null +++ b/SOURCES/0047-uapi-update-ib_verbs.patch @@ -0,0 +1,47 @@ +From bca44fb15b57a32a991780100171708e9e2e4960 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 29 May 2019 18:40:20 +0200 +Subject: [PATCH] uapi: update ib_verbs + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1664694 +Upstream Status: iproute2.git commit 27886a12416dd + +commit 27886a12416dd315cf386cfbfa93c2fb2aceca98 +Author: Stephen Hemminger +Date: Fri Aug 31 15:03:49 2018 -0700 + + uapi: update ib_verbs + + Merge current uapi from 4.19-rc1 + + Signed-off-by: Stephen Hemminger +--- + rdma/include/uapi/rdma/ib_user_verbs.h | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/rdma/include/uapi/rdma/ib_user_verbs.h b/rdma/include/uapi/rdma/ib_user_verbs.h +index 4f9991de8e3ad..25a16760de2ad 100644 +--- a/rdma/include/uapi/rdma/ib_user_verbs.h ++++ b/rdma/include/uapi/rdma/ib_user_verbs.h +@@ -279,7 +279,7 @@ struct ib_uverbs_query_port { + }; + + struct ib_uverbs_query_port_resp { +- __u32 port_cap_flags; ++ __u32 port_cap_flags; /* see ib_uverbs_query_port_cap_flags */ + __u32 max_msg_sz; + __u32 bad_pkey_cntr; + __u32 qkey_viol_cntr; +@@ -299,7 +299,8 @@ struct ib_uverbs_query_port_resp { + __u8 active_speed; + __u8 phys_state; + __u8 link_layer; +- __u8 reserved[2]; ++ __u8 flags; /* see ib_uverbs_query_port_flags */ ++ __u8 reserved; + }; + + struct ib_uverbs_alloc_pd { +-- +2.20.1 + diff --git a/SOURCES/0048-rdma-Fix-representation-of-PortInfo-CapabilityMask.patch b/SOURCES/0048-rdma-Fix-representation-of-PortInfo-CapabilityMask.patch new file mode 100644 index 0000000..159e26b --- /dev/null +++ b/SOURCES/0048-rdma-Fix-representation-of-PortInfo-CapabilityMask.patch @@ -0,0 +1,80 @@ +From e67f089156708052abeb9c67d77cb0cf966d89c6 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 29 May 2019 18:40:20 +0200 +Subject: [PATCH] rdma: Fix representation of PortInfo CapabilityMask + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1664694 +Upstream Status: iproute2.git commit d090fbf33bd15 + +commit d090fbf33bd15d82978596920b07616aa119ac2f +Author: Leon Romanovsky +Date: Sun Sep 16 20:28:13 2018 +0300 + + rdma: Fix representation of PortInfo CapabilityMask + + The port capability mask represents IBTA PortInfo specification, + but as it is written in description of kernel commit 2f944c0fbf58 + ("RDMA: Fix storage of PortInfo CapabilityMask in the kernel"), + the bit 26 was mistakenly overwritten. + + The rdmatool followed it too and mislead users by presenting wrong + value. Since it never showed proper value, we update the whole + port_cap_mask to comply with IBTA and show real HW values. + + Fixes: da990ab40a92 ("rdma: Add link object") + Signed-off-by: Leon Romanovsky + Signed-off-by: Stephen Hemminger +--- + rdma/link.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/rdma/link.c b/rdma/link.c +index 7e914c870994e..7a6d4b7e356d7 100644 +--- a/rdma/link.c ++++ b/rdma/link.c +@@ -20,6 +20,7 @@ static int link_help(struct rd *rd) + static const char *caps_to_str(uint32_t idx) + { + #define RDMA_PORT_FLAGS(x) \ ++ x(RESERVED, 0) \ + x(SM, 1) \ + x(NOTICE, 2) \ + x(TRAP, 3) \ +@@ -32,7 +33,9 @@ static const char *caps_to_str(uint32_t idx) + x(SM_DISABLED, 10) \ + x(SYS_IMAGE_GUID, 11) \ + x(PKEY_SW_EXT_PORT_TRAP, 12) \ ++ x(CABLE_INFO, 13) \ + x(EXTENDED_SPEEDS, 14) \ ++ x(CAP_MASK2, 15) \ + x(CM, 16) \ + x(SNMP_TUNNEL, 17) \ + x(REINIT, 18) \ +@@ -43,7 +46,12 @@ static const char *caps_to_str(uint32_t idx) + x(BOOT_MGMT, 23) \ + x(LINK_LATENCY, 24) \ + x(CLIENT_REG, 25) \ +- x(IP_BASED_GIDS, 26) ++ x(OTHER_LOCAL_CHANGES, 26) \ ++ x(LINK_SPPED_WIDTH, 27) \ ++ x(VENDOR_SPECIFIC_MADS, 28) \ ++ x(MULT_PKER_TRAP, 29) \ ++ x(MULT_FDB, 30) \ ++ x(HIERARCHY_INFO, 31) + + enum { RDMA_PORT_FLAGS(RDMA_BITMAP_ENUM) }; + +@@ -51,9 +59,7 @@ static const char *caps_to_str(uint32_t idx) + rdma_port_names[] = { RDMA_PORT_FLAGS(RDMA_BITMAP_NAMES) }; + #undef RDMA_PORT_FLAGS + +- if (idx < ARRAY_SIZE(rdma_port_names) && rdma_port_names[idx]) +- return rdma_port_names[idx]; +- return "UNKNOWN"; ++ return rdma_port_names[idx]; + } + + static void link_print_caps(struct rd *rd, struct nlattr **tb) +-- +2.20.1 + diff --git a/SOURCES/0049-devlink-Add-param-command-support.patch b/SOURCES/0049-devlink-Add-param-command-support.patch new file mode 100644 index 0000000..f512773 --- /dev/null +++ b/SOURCES/0049-devlink-Add-param-command-support.patch @@ -0,0 +1,702 @@ +From a126f6cc4f4d8f5f58758d673fbdb80894c5f05d Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 29 May 2019 18:55:33 +0200 +Subject: [PATCH] devlink: Add param command support + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1663199 +Upstream Status: iproute2.git commit 13925ae9eb38b + +commit 13925ae9eb38b99107be1d3fe21a1b73cf40bd97 +Author: Moshe Shemesh +Date: Wed Jul 4 17:12:06 2018 +0300 + + devlink: Add param command support + + Add support for configuration parameters set and show. + Each parameter can be either generic or driver-specific. + The user can retrieve data on these configuration parameters by devlink + param show command and can set new value to a configuration parameter + by devlink param set command. + The configuration parameters can be set in different configuration + modes: + runtime - set while driver is running, no reset required. + driverinit - applied while driver initializes, requires restart + driver by devlink reload command. + permanent - written to device's non-volatile memory, hard reset + required to apply. + + New commands added: + devlink dev param show [DEV name PARAMETER] + devlink dev param set DEV name PARAMETER value VALUE + cmode { permanent | driverinit | runtime } + + Signed-off-by: Moshe Shemesh + Signed-off-by: Jiri Pirko + Signed-off-by: David Ahern +--- + devlink/devlink.c | 454 +++++++++++++++++++++++++++++++++++++++++ + man/man8/devlink-dev.8 | 57 ++++++ + 2 files changed, 511 insertions(+) + +diff --git a/devlink/devlink.c b/devlink/devlink.c +index 7a5aef84f25dc..00a514f8ff666 100644 +--- a/devlink/devlink.c ++++ b/devlink/devlink.c +@@ -35,6 +35,10 @@ + #define ESWITCH_INLINE_MODE_NETWORK "network" + #define ESWITCH_INLINE_MODE_TRANSPORT "transport" + ++#define PARAM_CMODE_RUNTIME_STR "runtime" ++#define PARAM_CMODE_DRIVERINIT_STR "driverinit" ++#define PARAM_CMODE_PERMANENT_STR "permanent" ++ + static int g_new_line_count; + + #define pr_err(args...) fprintf(stderr, ##args) +@@ -187,6 +191,9 @@ static void ifname_map_free(struct ifname_map *ifname_map) + #define DL_OPT_ESWITCH_ENCAP_MODE BIT(15) + #define DL_OPT_RESOURCE_PATH BIT(16) + #define DL_OPT_RESOURCE_SIZE BIT(17) ++#define DL_OPT_PARAM_NAME BIT(18) ++#define DL_OPT_PARAM_VALUE BIT(19) ++#define DL_OPT_PARAM_CMODE BIT(20) + + struct dl_opts { + uint32_t present; /* flags of present items */ +@@ -211,6 +218,9 @@ struct dl_opts { + uint32_t resource_size; + uint32_t resource_id; + bool resource_id_valid; ++ const char *param_name; ++ const char *param_value; ++ enum devlink_param_cmode cmode; + }; + + struct dl { +@@ -348,6 +358,12 @@ static const enum mnl_attr_data_type devlink_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_DPIPE_FIELD_ID] = MNL_TYPE_U32, + [DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH] = MNL_TYPE_U32, + [DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE] = MNL_TYPE_U32, ++ [DEVLINK_ATTR_PARAM] = MNL_TYPE_NESTED, ++ [DEVLINK_ATTR_PARAM_NAME] = MNL_TYPE_STRING, ++ [DEVLINK_ATTR_PARAM_TYPE] = MNL_TYPE_U8, ++ [DEVLINK_ATTR_PARAM_VALUES_LIST] = MNL_TYPE_NESTED, ++ [DEVLINK_ATTR_PARAM_VALUE] = MNL_TYPE_NESTED, ++ [DEVLINK_ATTR_PARAM_VALUE_CMODE] = MNL_TYPE_U8, + }; + + static int attr_cb(const struct nlattr *attr, void *data) +@@ -514,6 +530,34 @@ static int strtouint16_t(const char *str, uint16_t *p_val) + return 0; + } + ++static int strtouint8_t(const char *str, uint8_t *p_val) ++{ ++ char *endptr; ++ unsigned long int val; ++ ++ val = strtoul(str, &endptr, 10); ++ if (endptr == str || *endptr != '\0') ++ return -EINVAL; ++ if (val > UCHAR_MAX) ++ return -ERANGE; ++ *p_val = val; ++ return 0; ++} ++ ++static int strtobool(const char *str, bool *p_val) ++{ ++ bool val; ++ ++ if (!strcmp(str, "true") || !strcmp(str, "1")) ++ val = true; ++ else if (!strcmp(str, "false") || !strcmp(str, "0")) ++ val = false; ++ else ++ return -EINVAL; ++ *p_val = val; ++ return 0; ++} ++ + static int __dl_argv_handle(char *str, char **p_bus_name, char **p_dev_name) + { + strslashrsplit(str, p_bus_name, p_dev_name); +@@ -792,6 +836,22 @@ static int eswitch_encap_mode_get(const char *typestr, bool *p_mode) + return 0; + } + ++static int param_cmode_get(const char *cmodestr, ++ enum devlink_param_cmode *cmode) ++{ ++ if (strcmp(cmodestr, PARAM_CMODE_RUNTIME_STR) == 0) { ++ *cmode = DEVLINK_PARAM_CMODE_RUNTIME; ++ } else if (strcmp(cmodestr, PARAM_CMODE_DRIVERINIT_STR) == 0) { ++ *cmode = DEVLINK_PARAM_CMODE_DRIVERINIT; ++ } else if (strcmp(cmodestr, PARAM_CMODE_PERMANENT_STR) == 0) { ++ *cmode = DEVLINK_PARAM_CMODE_PERMANENT; ++ } else { ++ pr_err("Unknown configuration mode \"%s\"\n", cmodestr); ++ return -EINVAL; ++ } ++ return 0; ++} ++ + static int dl_argv_parse(struct dl *dl, uint32_t o_required, + uint32_t o_optional) + { +@@ -973,6 +1033,32 @@ static int dl_argv_parse(struct dl *dl, uint32_t o_required, + if (err) + return err; + o_found |= DL_OPT_RESOURCE_SIZE; ++ } else if (dl_argv_match(dl, "name") && ++ (o_all & DL_OPT_PARAM_NAME)) { ++ dl_arg_inc(dl); ++ err = dl_argv_str(dl, &opts->param_name); ++ if (err) ++ return err; ++ o_found |= DL_OPT_PARAM_NAME; ++ } else if (dl_argv_match(dl, "value") && ++ (o_all & DL_OPT_PARAM_VALUE)) { ++ dl_arg_inc(dl); ++ err = dl_argv_str(dl, &opts->param_value); ++ if (err) ++ return err; ++ o_found |= DL_OPT_PARAM_VALUE; ++ } else if (dl_argv_match(dl, "cmode") && ++ (o_all & DL_OPT_PARAM_CMODE)) { ++ const char *cmodestr; ++ ++ dl_arg_inc(dl); ++ err = dl_argv_str(dl, &cmodestr); ++ if (err) ++ return err; ++ err = param_cmode_get(cmodestr, &opts->cmode); ++ if (err) ++ return err; ++ o_found |= DL_OPT_PARAM_CMODE; + } else { + pr_err("Unknown option \"%s\"\n", dl_argv(dl)); + return -EINVAL; +@@ -1057,6 +1143,24 @@ static int dl_argv_parse(struct dl *dl, uint32_t o_required, + return -EINVAL; + } + ++ if ((o_required & DL_OPT_PARAM_NAME) && ++ !(o_found & DL_OPT_PARAM_NAME)) { ++ pr_err("Parameter name expected.\n"); ++ return -EINVAL; ++ } ++ ++ if ((o_required & DL_OPT_PARAM_VALUE) && ++ !(o_found & DL_OPT_PARAM_VALUE)) { ++ pr_err("Value to set expected.\n"); ++ return -EINVAL; ++ } ++ ++ if ((o_required & DL_OPT_PARAM_CMODE) && ++ !(o_found & DL_OPT_PARAM_CMODE)) { ++ pr_err("Configuration mode expected.\n"); ++ return -EINVAL; ++ } ++ + return 0; + } + +@@ -1121,6 +1225,12 @@ static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl) + if (opts->present & DL_OPT_RESOURCE_SIZE) + mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_SIZE, + opts->resource_size); ++ if (opts->present & DL_OPT_PARAM_NAME) ++ mnl_attr_put_strz(nlh, DEVLINK_ATTR_PARAM_NAME, ++ opts->param_name); ++ if (opts->present & DL_OPT_PARAM_CMODE) ++ mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, ++ opts->cmode); + } + + static int dl_argv_parse_put(struct nlmsghdr *nlh, struct dl *dl, +@@ -1179,6 +1289,8 @@ static void cmd_dev_help(void) + pr_err(" [ inline-mode { none | link | network | transport } ]\n"); + pr_err(" [ encap { disable | enable } ]\n"); + pr_err(" devlink dev eswitch show DEV\n"); ++ pr_err(" devlink dev param set DEV name PARAMETER value VALUE cmode { permanent | driverinit | runtime }\n"); ++ pr_err(" devlink dev param show [DEV name PARAMETER]\n"); + pr_err(" devlink dev reload DEV\n"); + } + +@@ -1393,6 +1505,14 @@ static void pr_out_str(struct dl *dl, const char *name, const char *val) + } + } + ++static void pr_out_bool(struct dl *dl, const char *name, bool val) ++{ ++ if (val) ++ pr_out_str(dl, name, "true"); ++ else ++ pr_out_str(dl, name, "false"); ++} ++ + static void pr_out_uint(struct dl *dl, const char *name, unsigned int val) + { + if (dl->json_output) { +@@ -1475,6 +1595,19 @@ static void pr_out_entry_end(struct dl *dl) + __pr_out_newline(); + } + ++static const char *param_cmode_name(uint8_t cmode) ++{ ++ switch (cmode) { ++ case DEVLINK_PARAM_CMODE_RUNTIME: ++ return PARAM_CMODE_RUNTIME_STR; ++ case DEVLINK_PARAM_CMODE_DRIVERINIT: ++ return PARAM_CMODE_DRIVERINIT_STR; ++ case DEVLINK_PARAM_CMODE_PERMANENT: ++ return PARAM_CMODE_PERMANENT_STR; ++ default: return ""; ++ } ++} ++ + static const char *eswitch_mode_name(uint32_t mode) + { + switch (mode) { +@@ -1593,6 +1726,304 @@ static int cmd_dev_eswitch(struct dl *dl) + return -ENOENT; + } + ++static void pr_out_param_value(struct dl *dl, int nla_type, struct nlattr *nl) ++{ ++ struct nlattr *nla_value[DEVLINK_ATTR_MAX + 1] = {}; ++ struct nlattr *val_attr; ++ int err; ++ ++ err = mnl_attr_parse_nested(nl, attr_cb, nla_value); ++ if (err != MNL_CB_OK) ++ return; ++ ++ if (!nla_value[DEVLINK_ATTR_PARAM_VALUE_CMODE] || ++ (nla_type != MNL_TYPE_FLAG && ++ !nla_value[DEVLINK_ATTR_PARAM_VALUE_DATA])) ++ return; ++ ++ pr_out_str(dl, "cmode", ++ param_cmode_name(mnl_attr_get_u8(nla_value[DEVLINK_ATTR_PARAM_VALUE_CMODE]))); ++ val_attr = nla_value[DEVLINK_ATTR_PARAM_VALUE_DATA]; ++ ++ switch (nla_type) { ++ case MNL_TYPE_U8: ++ pr_out_uint(dl, "value", mnl_attr_get_u8(val_attr)); ++ break; ++ case MNL_TYPE_U16: ++ pr_out_uint(dl, "value", mnl_attr_get_u16(val_attr)); ++ break; ++ case MNL_TYPE_U32: ++ pr_out_uint(dl, "value", mnl_attr_get_u32(val_attr)); ++ break; ++ case MNL_TYPE_STRING: ++ pr_out_str(dl, "value", mnl_attr_get_str(val_attr)); ++ break; ++ case MNL_TYPE_FLAG: ++ pr_out_bool(dl, "value", val_attr ? true : false); ++ break; ++ } ++} ++ ++static void pr_out_param(struct dl *dl, struct nlattr **tb, bool array) ++{ ++ struct nlattr *nla_param[DEVLINK_ATTR_MAX + 1] = {}; ++ struct nlattr *param_value_attr; ++ int nla_type; ++ int err; ++ ++ err = mnl_attr_parse_nested(tb[DEVLINK_ATTR_PARAM], attr_cb, nla_param); ++ if (err != MNL_CB_OK) ++ return; ++ if (!nla_param[DEVLINK_ATTR_PARAM_NAME] || ++ !nla_param[DEVLINK_ATTR_PARAM_TYPE] || ++ !nla_param[DEVLINK_ATTR_PARAM_VALUES_LIST]) ++ return; ++ ++ if (array) ++ pr_out_handle_start_arr(dl, tb); ++ else ++ __pr_out_handle_start(dl, tb, true, false); ++ ++ nla_type = mnl_attr_get_u8(nla_param[DEVLINK_ATTR_PARAM_TYPE]); ++ ++ pr_out_str(dl, "name", ++ mnl_attr_get_str(nla_param[DEVLINK_ATTR_PARAM_NAME])); ++ ++ if (!nla_param[DEVLINK_ATTR_PARAM_GENERIC]) ++ pr_out_str(dl, "type", "driver-specific"); ++ else ++ pr_out_str(dl, "type", "generic"); ++ ++ pr_out_array_start(dl, "values"); ++ mnl_attr_for_each_nested(param_value_attr, ++ nla_param[DEVLINK_ATTR_PARAM_VALUES_LIST]) { ++ pr_out_entry_start(dl); ++ pr_out_param_value(dl, nla_type, param_value_attr); ++ pr_out_entry_end(dl); ++ } ++ pr_out_array_end(dl); ++ pr_out_handle_end(dl); ++} ++ ++static int cmd_dev_param_show_cb(const struct nlmsghdr *nlh, void *data) ++{ ++ struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); ++ struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; ++ struct dl *dl = data; ++ ++ mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); ++ if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || ++ !tb[DEVLINK_ATTR_PARAM]) ++ return MNL_CB_ERROR; ++ pr_out_param(dl, tb, true); ++ return MNL_CB_OK; ++} ++ ++struct param_ctx { ++ struct dl *dl; ++ int nla_type; ++ union { ++ uint8_t vu8; ++ uint16_t vu16; ++ uint32_t vu32; ++ const char *vstr; ++ bool vbool; ++ } value; ++}; ++ ++static int cmd_dev_param_set_cb(const struct nlmsghdr *nlh, void *data) ++{ ++ struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); ++ struct nlattr *nla_param[DEVLINK_ATTR_MAX + 1] = {}; ++ struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; ++ struct nlattr *param_value_attr; ++ enum devlink_param_cmode cmode; ++ struct param_ctx *ctx = data; ++ struct dl *dl = ctx->dl; ++ int nla_type; ++ int err; ++ ++ mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); ++ if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || ++ !tb[DEVLINK_ATTR_PARAM]) ++ return MNL_CB_ERROR; ++ ++ err = mnl_attr_parse_nested(tb[DEVLINK_ATTR_PARAM], attr_cb, nla_param); ++ if (err != MNL_CB_OK) ++ return MNL_CB_ERROR; ++ ++ if (!nla_param[DEVLINK_ATTR_PARAM_TYPE] || ++ !nla_param[DEVLINK_ATTR_PARAM_VALUES_LIST]) ++ return MNL_CB_ERROR; ++ ++ nla_type = mnl_attr_get_u8(nla_param[DEVLINK_ATTR_PARAM_TYPE]); ++ mnl_attr_for_each_nested(param_value_attr, ++ nla_param[DEVLINK_ATTR_PARAM_VALUES_LIST]) { ++ struct nlattr *nla_value[DEVLINK_ATTR_MAX + 1] = {}; ++ struct nlattr *val_attr; ++ ++ err = mnl_attr_parse_nested(param_value_attr, ++ attr_cb, nla_value); ++ if (err != MNL_CB_OK) ++ return MNL_CB_ERROR; ++ ++ if (!nla_value[DEVLINK_ATTR_PARAM_VALUE_CMODE] || ++ (nla_type != MNL_TYPE_FLAG && ++ !nla_value[DEVLINK_ATTR_PARAM_VALUE_DATA])) ++ return MNL_CB_ERROR; ++ ++ cmode = mnl_attr_get_u8(nla_value[DEVLINK_ATTR_PARAM_VALUE_CMODE]); ++ if (cmode == dl->opts.cmode) { ++ val_attr = nla_value[DEVLINK_ATTR_PARAM_VALUE_DATA]; ++ switch (nla_type) { ++ case MNL_TYPE_U8: ++ ctx->value.vu8 = mnl_attr_get_u8(val_attr); ++ break; ++ case MNL_TYPE_U16: ++ ctx->value.vu16 = mnl_attr_get_u16(val_attr); ++ break; ++ case MNL_TYPE_U32: ++ ctx->value.vu32 = mnl_attr_get_u32(val_attr); ++ break; ++ case MNL_TYPE_STRING: ++ ctx->value.vstr = mnl_attr_get_str(val_attr); ++ break; ++ case MNL_TYPE_FLAG: ++ ctx->value.vbool = val_attr ? true : false; ++ break; ++ } ++ break; ++ } ++ } ++ ctx->nla_type = nla_type; ++ return MNL_CB_OK; ++} ++ ++static int cmd_dev_param_set(struct dl *dl) ++{ ++ struct param_ctx ctx = {}; ++ struct nlmsghdr *nlh; ++ uint32_t val_u32; ++ uint16_t val_u16; ++ uint8_t val_u8; ++ bool val_bool; ++ int err; ++ ++ err = dl_argv_parse(dl, DL_OPT_HANDLE | ++ DL_OPT_PARAM_NAME | ++ DL_OPT_PARAM_VALUE | ++ DL_OPT_PARAM_CMODE, 0); ++ if (err) ++ return err; ++ ++ /* Get value type */ ++ nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PARAM_GET, ++ NLM_F_REQUEST | NLM_F_ACK); ++ dl_opts_put(nlh, dl); ++ ++ ctx.dl = dl; ++ err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dev_param_set_cb, &ctx); ++ if (err) ++ return err; ++ ++ nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PARAM_SET, ++ NLM_F_REQUEST | NLM_F_ACK); ++ dl_opts_put(nlh, dl); ++ ++ mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_TYPE, ctx.nla_type); ++ switch (ctx.nla_type) { ++ case MNL_TYPE_U8: ++ err = strtouint8_t(dl->opts.param_value, &val_u8); ++ if (err) ++ goto err_param_value_parse; ++ if (val_u8 == ctx.value.vu8) ++ return 0; ++ mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, val_u8); ++ break; ++ case MNL_TYPE_U16: ++ err = strtouint16_t(dl->opts.param_value, &val_u16); ++ if (err) ++ goto err_param_value_parse; ++ if (val_u16 == ctx.value.vu16) ++ return 0; ++ mnl_attr_put_u16(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, val_u16); ++ break; ++ case MNL_TYPE_U32: ++ err = strtouint32_t(dl->opts.param_value, &val_u32); ++ if (err) ++ goto err_param_value_parse; ++ if (val_u32 == ctx.value.vu32) ++ return 0; ++ mnl_attr_put_u32(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, val_u32); ++ break; ++ case MNL_TYPE_FLAG: ++ err = strtobool(dl->opts.param_value, &val_bool); ++ if (err) ++ goto err_param_value_parse; ++ if (val_bool == ctx.value.vbool) ++ return 0; ++ if (val_bool) ++ mnl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, ++ 0, NULL); ++ break; ++ case MNL_TYPE_STRING: ++ mnl_attr_put_strz(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, ++ dl->opts.param_value); ++ if (!strcmp(dl->opts.param_value, ctx.value.vstr)) ++ return 0; ++ break; ++ default: ++ printf("Value type not supported\n"); ++ return -ENOTSUP; ++ } ++ return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); ++ ++err_param_value_parse: ++ pr_err("Value \"%s\" is not a number or not within range\n", ++ dl->opts.param_value); ++ return err; ++} ++ ++static int cmd_dev_param_show(struct dl *dl) ++{ ++ uint16_t flags = NLM_F_REQUEST | NLM_F_ACK; ++ struct nlmsghdr *nlh; ++ int err; ++ ++ if (dl_argc(dl) == 0) ++ flags |= NLM_F_DUMP; ++ ++ nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PARAM_GET, flags); ++ ++ if (dl_argc(dl) > 0) { ++ err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE | ++ DL_OPT_PARAM_NAME, 0); ++ if (err) ++ return err; ++ } ++ ++ pr_out_section_start(dl, "param"); ++ err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dev_param_show_cb, dl); ++ pr_out_section_end(dl); ++ return err; ++} ++ ++static int cmd_dev_param(struct dl *dl) ++{ ++ if (dl_argv_match(dl, "help")) { ++ cmd_dev_help(); ++ return 0; ++ } else if (dl_argv_match(dl, "show") || ++ dl_argv_match(dl, "list") || dl_no_arg(dl)) { ++ dl_arg_inc(dl); ++ return cmd_dev_param_show(dl); ++ } else if (dl_argv_match(dl, "set")) { ++ dl_arg_inc(dl); ++ return cmd_dev_param_set(dl); ++ } ++ pr_err("Command \"%s\" not found\n", dl_argv(dl)); ++ return -ENOENT; ++} + static int cmd_dev_show_cb(const struct nlmsghdr *nlh, void *data) + { + struct dl *dl = data; +@@ -1669,6 +2100,9 @@ static int cmd_dev(struct dl *dl) + } else if (dl_argv_match(dl, "reload")) { + dl_arg_inc(dl); + return cmd_dev_reload(dl); ++ } else if (dl_argv_match(dl, "param")) { ++ dl_arg_inc(dl); ++ return cmd_dev_param(dl); + } + pr_err("Command \"%s\" not found\n", dl_argv(dl)); + return -ENOENT; +@@ -2632,6 +3066,10 @@ static const char *cmd_name(uint8_t cmd) + case DEVLINK_CMD_PORT_SET: return "set"; + case DEVLINK_CMD_PORT_NEW: return "new"; + case DEVLINK_CMD_PORT_DEL: return "del"; ++ case DEVLINK_CMD_PARAM_GET: return "get"; ++ case DEVLINK_CMD_PARAM_SET: return "set"; ++ case DEVLINK_CMD_PARAM_NEW: return "new"; ++ case DEVLINK_CMD_PARAM_DEL: return "del"; + default: return ""; + } + } +@@ -2650,6 +3088,11 @@ static const char *cmd_obj(uint8_t cmd) + case DEVLINK_CMD_PORT_NEW: + case DEVLINK_CMD_PORT_DEL: + return "port"; ++ case DEVLINK_CMD_PARAM_GET: ++ case DEVLINK_CMD_PARAM_SET: ++ case DEVLINK_CMD_PARAM_NEW: ++ case DEVLINK_CMD_PARAM_DEL: ++ return "param"; + default: return ""; + } + } +@@ -2706,6 +3149,17 @@ static int cmd_mon_show_cb(const struct nlmsghdr *nlh, void *data) + pr_out_mon_header(genl->cmd); + pr_out_port(dl, tb); + break; ++ case DEVLINK_CMD_PARAM_GET: /* fall through */ ++ case DEVLINK_CMD_PARAM_SET: /* fall through */ ++ case DEVLINK_CMD_PARAM_NEW: /* fall through */ ++ case DEVLINK_CMD_PARAM_DEL: ++ mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); ++ if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || ++ !tb[DEVLINK_ATTR_PARAM]) ++ return MNL_CB_ERROR; ++ pr_out_mon_header(genl->cmd); ++ pr_out_param(dl, tb, false); ++ break; + } + return MNL_CB_OK; + } +diff --git a/man/man8/devlink-dev.8 b/man/man8/devlink-dev.8 +index 7c749ddabaeeb..d985da172aa05 100644 +--- a/man/man8/devlink-dev.8 ++++ b/man/man8/devlink-dev.8 +@@ -42,6 +42,23 @@ devlink-dev \- devlink device configuration + .BR "devlink dev eswitch show" + .IR DEV + ++.ti -8 ++.BR "devlink dev param set" ++.IR DEV ++.BR name ++.IR PARAMETER ++.BR value ++.IR VALUE ++.BR cmode " { " runtime " | " driverinit " | " permanent " } " ++ ++.ti -8 ++.BR "devlink dev param show" ++.RI "[ " ++.IR DEV ++.BR name ++.IR PARAMETER ++.RI "]" ++ + .ti -8 + .BR "devlink dev reload" + .IR DEV +@@ -98,6 +115,36 @@ Set eswitch encapsulation support + .I enable + - Enable encapsulation support + ++.SS devlink dev param set - set new value to devlink device configuration parameter ++ ++.TP ++.BI name " PARAMETER" ++Specify parameter name to set. ++ ++.TP ++.BI value " VALUE" ++New value to set. ++ ++.TP ++.BR cmode " { " runtime " | " driverinit " | " permanent " } " ++Configuration mode in which the new value is set. ++ ++.I runtime ++- Set new value while driver is running. This configuration mode doesn't require any reset to apply the new value. ++ ++.I driverinit ++- Set new value which will be applied during driver initialization. This configuration mode requires restart driver by devlink reload command to apply the new value. ++ ++.I permanent ++- New value is written to device's non-volatile memory. This configuration mode requires hard reset to apply the new value. ++ ++.SS devlink dev param show - display devlink device supported configuration parameters attributes ++ ++.BR name ++.IR PARAMETER ++Specify parameter name to show. ++If this argument is omitted all parameters supported by devlink devices are listed. ++ + .SS devlink dev reload - perform hot reload of the driver. + + .PP +@@ -126,6 +173,16 @@ devlink dev eswitch set pci/0000:01:00.0 mode switchdev + Sets the eswitch mode of specified devlink device to switchdev. + .RE + .PP ++devlink dev param show pci/0000:01:00.0 name max_macs ++.RS 4 ++Shows the parameter max_macs attributes. ++.RE ++.PP ++devlink dev param set pci/0000:01:00.0 name internal_error_reset value true cmode runtime ++.RS 4 ++Sets the parameter internal_error_reset of specified devlink device to true. ++.RE ++.PP + devlink dev reload pci/0000:01:00.0 + .RS 4 + Performs hot reload of specified devlink device. +-- +2.20.1 + diff --git a/SOURCES/0050-libnetlink-Convert-GETADDR-dumps-to-use-rtnl_addrdum.patch b/SOURCES/0050-libnetlink-Convert-GETADDR-dumps-to-use-rtnl_addrdum.patch new file mode 100644 index 0000000..340a376 --- /dev/null +++ b/SOURCES/0050-libnetlink-Convert-GETADDR-dumps-to-use-rtnl_addrdum.patch @@ -0,0 +1,100 @@ +From ae7cf70848d837d2ed85a21f6d7cbdf4c6ee6e4e Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 10 Jun 2019 14:25:21 +0200 +Subject: [PATCH] libnetlink: Convert GETADDR dumps to use rtnl_addrdump_req + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716772 +Upstream Status: iproute2.git commit 46917d0895fb0 + +commit 46917d0895fb0fb1df9b3c1575ccd467b4a1f860 +Author: David Ahern +Date: Sat Sep 29 08:41:46 2018 -0700 + + libnetlink: Convert GETADDR dumps to use rtnl_addrdump_req + + Add rtnl_addrdump_req for address dumps using the proper ifaddrmsg + as the header. Convert existing RTM_GETADDR dumps to use it. + + Signed-off-by: David Ahern +--- + include/libnetlink.h | 4 ++++ + ip/ipaddress.c | 6 +++--- + lib/libnetlink.c | 16 ++++++++++++++++ + 3 files changed, 23 insertions(+), 3 deletions(-) + +diff --git a/include/libnetlink.h b/include/libnetlink.h +index 9d9249e634dc3..2d9f6190230c1 100644 +--- a/include/libnetlink.h ++++ b/include/libnetlink.h +@@ -46,6 +46,10 @@ int rtnl_open_byproto(struct rtnl_handle *rth, unsigned int subscriptions, + __attribute__((warn_unused_result)); + + void rtnl_close(struct rtnl_handle *rth); ++ ++int rtnl_addrdump_req(struct rtnl_handle *rth, int family) ++ __attribute__((warn_unused_result)); ++ + int rtnl_wilddump_request(struct rtnl_handle *rth, int fam, int type) + __attribute__((warn_unused_result)); + int rtnl_wilddump_req_filter(struct rtnl_handle *rth, int fam, int type, +diff --git a/ip/ipaddress.c b/ip/ipaddress.c +index f315a815e945d..4714bce60db1b 100644 +--- a/ip/ipaddress.c ++++ b/ip/ipaddress.c +@@ -1672,7 +1672,7 @@ static int ipaddr_flush(void) + filter.flushe = sizeof(flushb); + + while ((max_flush_loops == 0) || (round < max_flush_loops)) { +- if (rtnl_wilddump_request(&rth, filter.family, RTM_GETADDR) < 0) { ++ if (rtnl_addrdump_req(&rth, filter.family) < 0) { + perror("Cannot send dump request"); + exit(1); + } +@@ -1764,7 +1764,7 @@ int ip_linkaddr_list(int family, req_filter_fn_t filter_fn, + } + + if (ainfo) { +- if (rtnl_wilddump_request(&rth, family, RTM_GETADDR) < 0) { ++ if (rtnl_addrdump_req(&rth, family) < 0) { + perror("Cannot send dump request"); + return 1; + } +@@ -1889,7 +1889,7 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action) + if (ipadd_save_prep()) + exit(1); + +- if (rtnl_wilddump_request(&rth, preferred_family, RTM_GETADDR) < 0) { ++ if (rtnl_addrdump_req(&rth, preferred_family) < 0) { + perror("Cannot send dump request"); + exit(1); + } +diff --git a/lib/libnetlink.c b/lib/libnetlink.c +index a9932d423126e..db625b9bd18ca 100644 +--- a/lib/libnetlink.c ++++ b/lib/libnetlink.c +@@ -199,6 +199,22 @@ int rtnl_open(struct rtnl_handle *rth, unsigned int subscriptions) + return rtnl_open_byproto(rth, subscriptions, NETLINK_ROUTE); + } + ++int rtnl_addrdump_req(struct rtnl_handle *rth, int family) ++{ ++ struct { ++ struct nlmsghdr nlh; ++ struct ifaddrmsg ifm; ++ } req = { ++ .nlh.nlmsg_len = sizeof(req), ++ .nlh.nlmsg_type = RTM_GETADDR, ++ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, ++ .nlh.nlmsg_seq = rth->dump = ++rth->seq, ++ .ifm.ifa_family = family, ++ }; ++ ++ return send(rth->fd, &req, sizeof(req), 0); ++} ++ + int rtnl_wilddump_request(struct rtnl_handle *rth, int family, int type) + { + return rtnl_wilddump_req_filter(rth, family, type, RTEXT_FILTER_VF); +-- +2.20.1 + diff --git a/SOURCES/0051-rdma-Update-kernel-include-file-to-support-IB-device.patch b/SOURCES/0051-rdma-Update-kernel-include-file-to-support-IB-device.patch new file mode 100644 index 0000000..bebf939 --- /dev/null +++ b/SOURCES/0051-rdma-Update-kernel-include-file-to-support-IB-device.patch @@ -0,0 +1,43 @@ +From 0a919849a14e65e4ff4b2c71f02862598c79633b Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 10 Jun 2019 15:32:55 +0200 +Subject: [PATCH] rdma: Update kernel include file to support IB device + renaming + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1663228 +Upstream Status: iproute2.git commit 3fb00075d9043 + +commit 3fb00075d904389afce507fffe06ca3a8500ebf3 +Author: Leon Romanovsky +Date: Wed Oct 31 09:17:55 2018 +0200 + + rdma: Update kernel include file to support IB device renaming + + Bring kernel header file changes upto commit 05d940d3a3ec + ("RDMA/nldev: Allow IB device rename through RDMA netlink") + + Signed-off-by: Leon Romanovsky + Reviewed-by: Steve Wise + Signed-off-by: David Ahern +--- + rdma/include/uapi/rdma/rdma_netlink.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/rdma/include/uapi/rdma/rdma_netlink.h b/rdma/include/uapi/rdma/rdma_netlink.h +index 6513fb89abfa1..e2228c0917154 100644 +--- a/rdma/include/uapi/rdma/rdma_netlink.h ++++ b/rdma/include/uapi/rdma/rdma_netlink.h +@@ -227,8 +227,9 @@ enum rdma_nldev_command { + RDMA_NLDEV_CMD_UNSPEC, + + RDMA_NLDEV_CMD_GET, /* can dump */ ++ RDMA_NLDEV_CMD_SET, + +- /* 2 - 4 are free to use */ ++ /* 3 - 4 are free to use */ + + RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */ + +-- +2.20.1 + diff --git a/SOURCES/0052-rdma-Introduce-command-execution-helper-with-require.patch b/SOURCES/0052-rdma-Introduce-command-execution-helper-with-require.patch new file mode 100644 index 0000000..68919fb --- /dev/null +++ b/SOURCES/0052-rdma-Introduce-command-execution-helper-with-require.patch @@ -0,0 +1,63 @@ +From 349c43f99f876e0663fb0b00396ac3d387bc32e9 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 10 Jun 2019 15:32:55 +0200 +Subject: [PATCH] rdma: Introduce command execution helper with required device + name + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1663228 +Upstream Status: iproute2.git commit a14ceed32524c + +commit a14ceed32524c7f9c05572886cd63e921e4c0faf +Author: Leon Romanovsky +Date: Wed Oct 31 09:17:56 2018 +0200 + + rdma: Introduce command execution helper with required device name + + In contradiction to various show commands, the set command explicitly + requires to use device name as an argument. Provide new command + execution helper which enforces it. + + Signed-off-by: Leon Romanovsky + Reviewed-by: Steve Wise + Signed-off-by: David Ahern +--- + rdma/rdma.h | 1 + + rdma/utils.c | 10 ++++++++++ + 2 files changed, 11 insertions(+) + +diff --git a/rdma/rdma.h b/rdma/rdma.h +index c3b7530b6cc71..547bb5749a39f 100644 +--- a/rdma/rdma.h ++++ b/rdma/rdma.h +@@ -90,6 +90,7 @@ int cmd_link(struct rd *rd); + int cmd_res(struct rd *rd); + int rd_exec_cmd(struct rd *rd, const struct rd_cmd *c, const char *str); + int rd_exec_dev(struct rd *rd, int (*cb)(struct rd *rd)); ++int rd_exec_require_dev(struct rd *rd, int (*cb)(struct rd *rd)); + int rd_exec_link(struct rd *rd, int (*cb)(struct rd *rd), bool strict_port); + void rd_free(struct rd *rd); + int rd_set_arg_to_devname(struct rd *rd); +diff --git a/rdma/utils.c b/rdma/utils.c +index 4840bf226d54d..61f4aeb1bcf27 100644 +--- a/rdma/utils.c ++++ b/rdma/utils.c +@@ -577,6 +577,16 @@ out: + return ret; + } + ++int rd_exec_require_dev(struct rd *rd, int (*cb)(struct rd *rd)) ++{ ++ if (rd_no_arg(rd)) { ++ pr_err("Please provide device name.\n"); ++ return -EINVAL; ++ } ++ ++ return rd_exec_dev(rd, cb); ++} ++ + int rd_exec_cmd(struct rd *rd, const struct rd_cmd *cmds, const char *str) + { + const struct rd_cmd *c; +-- +2.20.1 + diff --git a/SOURCES/0053-rdma-Add-an-option-to-rename-IB-device-interface.patch b/SOURCES/0053-rdma-Add-an-option-to-rename-IB-device-interface.patch new file mode 100644 index 0000000..ab3a09e --- /dev/null +++ b/SOURCES/0053-rdma-Add-an-option-to-rename-IB-device-interface.patch @@ -0,0 +1,92 @@ +From 452611d090e456cf7b49bfbb2522df2928452e10 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 10 Jun 2019 15:32:55 +0200 +Subject: [PATCH] rdma: Add an option to rename IB device interface + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1663228 +Upstream Status: iproute2.git commit 4443c9c6a01ea + +commit 4443c9c6a01eac8c8f2743d4d185ceb9be4d1207 +Author: Leon Romanovsky +Date: Wed Oct 31 09:17:57 2018 +0200 + + rdma: Add an option to rename IB device interface + + Enrich rdmatool with an option to rename IB devices, + the command interface follows Iproute2 convention: + "rdma dev set [OLD-DEVNAME] name NEW-DEVNAME" + + Signed-off-by: Leon Romanovsky + Reviewed-by: Steve Wise + Signed-off-by: David Ahern +--- + rdma/dev.c | 35 +++++++++++++++++++++++++++++++++++ + 1 file changed, 35 insertions(+) + +diff --git a/rdma/dev.c b/rdma/dev.c +index e2eafe47311b0..760b7fb3bb18f 100644 +--- a/rdma/dev.c ++++ b/rdma/dev.c +@@ -14,6 +14,7 @@ + static int dev_help(struct rd *rd) + { + pr_out("Usage: %s dev show [DEV]\n", rd->filename); ++ pr_out(" %s dev set [DEV] name DEVNAME\n", rd->filename); + return 0; + } + +@@ -240,17 +241,51 @@ static int dev_one_show(struct rd *rd) + return rd_exec_cmd(rd, cmds, "parameter"); + } + ++static int dev_set_name(struct rd *rd) ++{ ++ uint32_t seq; ++ ++ if (rd_no_arg(rd)) { ++ pr_err("Please provide device new name.\n"); ++ return -EINVAL; ++ } ++ ++ rd_prepare_msg(rd, RDMA_NLDEV_CMD_SET, ++ &seq, (NLM_F_REQUEST | NLM_F_ACK)); ++ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx); ++ mnl_attr_put_strz(rd->nlh, RDMA_NLDEV_ATTR_DEV_NAME, rd_argv(rd)); ++ ++ return rd_send_msg(rd); ++} ++ ++static int dev_one_set(struct rd *rd) ++{ ++ const struct rd_cmd cmds[] = { ++ { NULL, dev_help}, ++ { "name", dev_set_name}, ++ { 0 } ++ }; ++ ++ return rd_exec_cmd(rd, cmds, "parameter"); ++} ++ + static int dev_show(struct rd *rd) + { + return rd_exec_dev(rd, dev_one_show); + } + ++static int dev_set(struct rd *rd) ++{ ++ return rd_exec_require_dev(rd, dev_one_set); ++} ++ + int cmd_dev(struct rd *rd) + { + const struct rd_cmd cmds[] = { + { NULL, dev_show }, + { "show", dev_show }, + { "list", dev_show }, ++ { "set", dev_set }, + { "help", dev_help }, + { 0 } + }; +-- +2.20.1 + diff --git a/SOURCES/0054-rdma-Document-IB-device-renaming-option.patch b/SOURCES/0054-rdma-Document-IB-device-renaming-option.patch new file mode 100644 index 0000000..8648fb8 --- /dev/null +++ b/SOURCES/0054-rdma-Document-IB-device-renaming-option.patch @@ -0,0 +1,77 @@ +From 1857f106b1fda133918456617efe74cd068c1a86 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Mon, 10 Jun 2019 15:32:55 +0200 +Subject: [PATCH] rdma: Document IB device renaming option + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1663228 +Upstream Status: iproute2.git commit e89feffae3626 + +commit e89feffae3626da5b5fda352ae73db132ac60a47 +Author: Leon Romanovsky +Date: Sun Nov 4 21:11:22 2018 +0200 + + rdma: Document IB device renaming option + + [leonro@server /]$ lspci |grep -i Ether + 00:08.0 Ethernet controller: Red Hat, Inc. Virtio network device + 00:09.0 Ethernet controller: Mellanox Technologies MT27700 Family [ConnectX-4] + [leonro@server /]$ sudo rdma dev + 1: mlx5_0: node_type ca fw 3.8.9999 node_guid 5254:00c0:fe12:3455 + sys_image_guid 5254:00c0:fe12:3455 + [leonro@server /]$ sudo rdma dev set mlx5_0 name hfi1_0 + [leonro@server /]$ sudo rdma dev + 1: hfi1_0: node_type ca fw 3.8.9999 node_guid 5254:00c0:fe12:3455 + sys_image_guid 5254:00c0:fe12:3455 + + Signed-off-by: Leon Romanovsky + Signed-off-by: David Ahern +--- + man/man8/rdma-dev.8 | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/man/man8/rdma-dev.8 b/man/man8/rdma-dev.8 +index b7abfe1088c2f..069f471791904 100644 +--- a/man/man8/rdma-dev.8 ++++ b/man/man8/rdma-dev.8 +@@ -1,6 +1,6 @@ + .TH RDMA\-DEV 8 "06 Jul 2017" "iproute2" "Linux" + .SH NAME +-rdmak-dev \- RDMA device configuration ++rdma-dev \- RDMA device configuration + .SH SYNOPSIS + .sp + .ad l +@@ -22,10 +22,18 @@ rdmak-dev \- RDMA device configuration + .B rdma dev show + .RI "[ " DEV " ]" + ++.ti -8 ++.B rdma dev set ++.RI "[ " DEV " ]" ++.BR name ++.BR NEWNAME ++ + .ti -8 + .B rdma dev help + + .SH "DESCRIPTION" ++.SS rdma dev set - rename rdma device ++ + .SS rdma dev show - display rdma device attributes + + .PP +@@ -45,6 +53,11 @@ rdma dev show mlx5_3 + Shows the state of specified RDMA device. + .RE + .PP ++rdma dev set mlx5_3 name rdma_0 ++.RS 4 ++Renames the mlx5_3 device to rdma_0. ++.RE ++.PP + + .SH SEE ALSO + .BR rdma (8), +-- +2.20.1 + diff --git a/SOURCES/0055-iplink-add-support-for-reporting-multiple-XDP-progra.patch b/SOURCES/0055-iplink-add-support-for-reporting-multiple-XDP-progra.patch new file mode 100644 index 0000000..4a49a42 --- /dev/null +++ b/SOURCES/0055-iplink-add-support-for-reporting-multiple-XDP-progra.patch @@ -0,0 +1,210 @@ +From 907e2adbcb6e02453972d5ada93de7bbaefedb2a Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Thu, 13 Jun 2019 14:37:56 +0200 +Subject: [PATCH] iplink: add support for reporting multiple XDP programs + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716361 +Upstream Status: iproute2.git commit da083b5a483bf + +commit da083b5a483bfd52dfe72912f62a3bc16d775b87 +Author: Jakub Kicinski +Date: Fri Jul 13 15:54:51 2018 -0700 + + iplink: add support for reporting multiple XDP programs + + Kernel now supports attaching XDP programs in the driver + and hardware at the same time. Print that information + correctly. + + In case there are multiple programs attached kernel will + not provide IFLA_XDP_PROG_ID, so don't expect it to be + there (this also improves the printing for very old kernels + slightly, as it avoids unnecessary "prog/xdp" line). + + In short mode preserve the current outputs but don't print + IDs if there are multiple. + + 6: netdevsim0: mtu 1500 xdpoffload/id:11 qdisc [...] + + and: + + 6: netdevsim0: mtu 1500 xdpmulti qdisc [...] + + ip link output will keep using prog/xdp prefix if only one program + is attached, but can also print multiple program lines: + + prog/xdp id 8 tag fc7a51d1a693a99e jited + + vs: + + prog/xdpdrv id 8 tag fc7a51d1a693a99e jited + prog/xdpoffload id 9 tag fc7a51d1a693a99e + + JSON output gains a new array called "attached" which will + contain the full list of attached programs along with their + attachment modes: + + "xdp": { + "mode": 3, + "prog": { + "id": 11, + "tag": "fc7a51d1a693a99e", + "jited": 0 + }, + "attached": [ { + "mode": 3, + "prog": { + "id": 11, + "tag": "fc7a51d1a693a99e", + "jited": 0 + } + } ] + }, + + In case there are multiple programs attached the general "xdp" + section will not contain program information: + + "xdp": { + "mode": 4, + "attached": [ { + "mode": 1, + "prog": { + "id": 10, + "tag": "fc7a51d1a693a99e", + "jited": 1 + } + },{ + "mode": 3, + "prog": { + "id": 11, + "tag": "fc7a51d1a693a99e", + "jited": 0 + } + } ] + }, + + Signed-off-by: Jakub Kicinski + Reviewed-by: Quentin Monnet + Acked-by: Daniel Borkmann + Signed-off-by: David Ahern +--- + ip/iplink_xdp.c | 73 +++++++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 61 insertions(+), 12 deletions(-) + +diff --git a/ip/iplink_xdp.c b/ip/iplink_xdp.c +index dd4fd1fd3a3b1..4a490bc8fb66c 100644 +--- a/ip/iplink_xdp.c ++++ b/ip/iplink_xdp.c +@@ -91,6 +91,18 @@ int xdp_parse(int *argc, char ***argv, struct iplink_req *req, + return 0; + } + ++static void xdp_dump_json_one(struct rtattr *tb[IFLA_XDP_MAX + 1], __u32 attr, ++ __u8 mode) ++{ ++ if (!tb[attr]) ++ return; ++ ++ open_json_object(NULL); ++ print_uint(PRINT_JSON, "mode", NULL, mode); ++ bpf_dump_prog_info(NULL, rta_getattr_u32(tb[attr])); ++ close_json_object(); ++} ++ + static void xdp_dump_json(struct rtattr *tb[IFLA_XDP_MAX + 1]) + { + __u32 prog_id = 0; +@@ -104,13 +116,48 @@ static void xdp_dump_json(struct rtattr *tb[IFLA_XDP_MAX + 1]) + print_uint(PRINT_JSON, "mode", NULL, mode); + if (prog_id) + bpf_dump_prog_info(NULL, prog_id); ++ ++ open_json_array(PRINT_JSON, "attached"); ++ if (tb[IFLA_XDP_SKB_PROG_ID] || ++ tb[IFLA_XDP_DRV_PROG_ID] || ++ tb[IFLA_XDP_HW_PROG_ID]) { ++ xdp_dump_json_one(tb, IFLA_XDP_SKB_PROG_ID, XDP_ATTACHED_SKB); ++ xdp_dump_json_one(tb, IFLA_XDP_DRV_PROG_ID, XDP_ATTACHED_DRV); ++ xdp_dump_json_one(tb, IFLA_XDP_HW_PROG_ID, XDP_ATTACHED_HW); ++ } else if (tb[IFLA_XDP_PROG_ID]) { ++ /* Older kernel - use IFLA_XDP_PROG_ID */ ++ xdp_dump_json_one(tb, IFLA_XDP_PROG_ID, mode); ++ } ++ close_json_array(PRINT_JSON, NULL); ++ + close_json_object(); + } + ++static void xdp_dump_prog_one(FILE *fp, struct rtattr *tb[IFLA_XDP_MAX + 1], ++ __u32 attr, bool link, bool details, ++ const char *pfx) ++{ ++ __u32 prog_id; ++ ++ if (!tb[attr]) ++ return; ++ ++ prog_id = rta_getattr_u32(tb[attr]); ++ if (!details) { ++ if (prog_id && !link && attr == IFLA_XDP_PROG_ID) ++ fprintf(fp, "/id:%u", prog_id); ++ return; ++ } ++ ++ if (prog_id) { ++ fprintf(fp, "%s prog/xdp%s ", _SL_, pfx); ++ bpf_dump_prog_info(fp, prog_id); ++ } ++} ++ + void xdp_dump(FILE *fp, struct rtattr *xdp, bool link, bool details) + { + struct rtattr *tb[IFLA_XDP_MAX + 1]; +- __u32 prog_id = 0; + __u8 mode; + + parse_rtattr_nested(tb, IFLA_XDP_MAX, xdp); +@@ -124,27 +171,29 @@ void xdp_dump(FILE *fp, struct rtattr *xdp, bool link, bool details) + else if (is_json_context()) + return details ? (void)0 : xdp_dump_json(tb); + else if (details && link) +- fprintf(fp, "%s prog/xdp", _SL_); ++ /* don't print mode */; + else if (mode == XDP_ATTACHED_DRV) + fprintf(fp, "xdp"); + else if (mode == XDP_ATTACHED_SKB) + fprintf(fp, "xdpgeneric"); + else if (mode == XDP_ATTACHED_HW) + fprintf(fp, "xdpoffload"); ++ else if (mode == XDP_ATTACHED_MULTI) ++ fprintf(fp, "xdpmulti"); + else + fprintf(fp, "xdp[%u]", mode); + +- if (tb[IFLA_XDP_PROG_ID]) +- prog_id = rta_getattr_u32(tb[IFLA_XDP_PROG_ID]); +- if (!details) { +- if (prog_id && !link) +- fprintf(fp, "/id:%u", prog_id); +- fprintf(fp, " "); +- return; ++ xdp_dump_prog_one(fp, tb, IFLA_XDP_PROG_ID, link, details, ""); ++ ++ if (mode == XDP_ATTACHED_MULTI) { ++ xdp_dump_prog_one(fp, tb, IFLA_XDP_SKB_PROG_ID, link, details, ++ "generic"); ++ xdp_dump_prog_one(fp, tb, IFLA_XDP_DRV_PROG_ID, link, details, ++ "drv"); ++ xdp_dump_prog_one(fp, tb, IFLA_XDP_HW_PROG_ID, link, details, ++ "offload"); + } + +- if (prog_id) { ++ if (!details || !link) + fprintf(fp, " "); +- bpf_dump_prog_info(fp, prog_id); +- } + } +-- +2.20.1 + diff --git a/SOURCES/0056-bpf-move-bpf_elf_map-fixup-notification-under-verbos.patch b/SOURCES/0056-bpf-move-bpf_elf_map-fixup-notification-under-verbos.patch new file mode 100644 index 0000000..50c380c --- /dev/null +++ b/SOURCES/0056-bpf-move-bpf_elf_map-fixup-notification-under-verbos.patch @@ -0,0 +1,43 @@ +From f6fa6c4f178d2bbd3f33cfd4c32265692b91fe5d Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Thu, 13 Jun 2019 14:37:56 +0200 +Subject: [PATCH] bpf: move bpf_elf_map fixup notification under verbose + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716361 +Upstream Status: iproute2.git commit 282a1fe1f8fc8 + +commit 282a1fe1f8fc87b1ebca4ca6f4440d2e69cf4b8f +Author: Daniel Borkmann +Date: Wed Jul 18 01:31:19 2018 +0200 + + bpf: move bpf_elf_map fixup notification under verbose + + No need to spam the user with this if it can be fixed gracefully + anyway. Therefore, move it under verbose option. + + Signed-off-by: Daniel Borkmann + Signed-off-by: David Ahern +--- + lib/bpf.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/lib/bpf.c b/lib/bpf.c +index 65e26989a1f30..9dc37c787d907 100644 +--- a/lib/bpf.c ++++ b/lib/bpf.c +@@ -1898,9 +1898,9 @@ static int bpf_fetch_maps_end(struct bpf_elf_ctx *ctx) + } + + memcpy(ctx->maps, fixup, sizeof(fixup)); +- +- printf("Note: %zu bytes struct bpf_elf_map fixup performed due to size mismatch!\n", +- sizeof(struct bpf_elf_map) - ctx->map_len); ++ if (ctx->verbose) ++ printf("%zu bytes struct bpf_elf_map fixup performed due to size mismatch!\n", ++ sizeof(struct bpf_elf_map) - ctx->map_len); + return 0; + } + +-- +2.20.1 + diff --git a/SOURCES/0057-bpf-remove-strict-dependency-on-af_alg.patch b/SOURCES/0057-bpf-remove-strict-dependency-on-af_alg.patch new file mode 100644 index 0000000..7197210 --- /dev/null +++ b/SOURCES/0057-bpf-remove-strict-dependency-on-af_alg.patch @@ -0,0 +1,181 @@ +From 19729e1302017ef33e139903b28f9a778b2a8748 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Thu, 13 Jun 2019 14:37:56 +0200 +Subject: [PATCH] bpf: remove strict dependency on af_alg + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716361 +Upstream Status: iproute2.git commit 6e5094dbb7c06 + +commit 6e5094dbb7c0682a9ca6eb2a64ec51f0a8a33a22 +Author: Daniel Borkmann +Date: Wed Jul 18 01:31:20 2018 +0200 + + bpf: remove strict dependency on af_alg + + Do not bail out when AF_ALG is not supported by the kernel and + only do so when a map is requested in object ns where we're + calculating the hash. Otherwise, the loader can operate just + fine, therefore lets not fail early when it's not needed. + + Signed-off-by: Daniel Borkmann + Signed-off-by: David Ahern +--- + lib/bpf.c | 74 +++++++++++++++++++++---------------------------------- + 1 file changed, 28 insertions(+), 46 deletions(-) + +diff --git a/lib/bpf.c b/lib/bpf.c +index 9dc37c787d907..ead8b5a7219f0 100644 +--- a/lib/bpf.c ++++ b/lib/bpf.c +@@ -1130,6 +1130,7 @@ struct bpf_elf_ctx { + GElf_Ehdr elf_hdr; + Elf_Data *sym_tab; + Elf_Data *str_tab; ++ char obj_uid[64]; + int obj_fd; + int map_fds[ELF_MAX_MAPS]; + struct bpf_elf_map maps[ELF_MAX_MAPS]; +@@ -1143,6 +1144,7 @@ struct bpf_elf_ctx { + enum bpf_prog_type type; + __u32 ifindex; + bool verbose; ++ bool noafalg; + struct bpf_elf_st stat; + struct bpf_hash_entry *ht[256]; + char *log; +@@ -1258,22 +1260,15 @@ static int bpf_obj_hash(const char *object, uint8_t *out, size_t len) + return -EINVAL; + + cfd = socket(AF_ALG, SOCK_SEQPACKET, 0); +- if (cfd < 0) { +- fprintf(stderr, "Cannot get AF_ALG socket: %s\n", +- strerror(errno)); ++ if (cfd < 0) + return cfd; +- } + + ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg)); +- if (ret < 0) { +- fprintf(stderr, "Error binding socket: %s\n", strerror(errno)); ++ if (ret < 0) + goto out_cfd; +- } + + ofd = accept(cfd, NULL, 0); + if (ofd < 0) { +- fprintf(stderr, "Error accepting socket: %s\n", +- strerror(errno)); + ret = ofd; + goto out_cfd; + } +@@ -1318,29 +1313,7 @@ out_cfd: + return ret; + } + +-static const char *bpf_get_obj_uid(const char *pathname) +-{ +- static bool bpf_uid_cached; +- static char bpf_uid[64]; +- uint8_t tmp[20]; +- int ret; +- +- if (bpf_uid_cached) +- goto done; +- +- ret = bpf_obj_hash(pathname, tmp, sizeof(tmp)); +- if (ret) { +- fprintf(stderr, "Object hashing failed!\n"); +- return NULL; +- } +- +- hexstring_n2a(tmp, sizeof(tmp), bpf_uid, sizeof(bpf_uid)); +- bpf_uid_cached = true; +-done: +- return bpf_uid; +-} +- +-static int bpf_init_env(const char *pathname) ++static void bpf_init_env(void) + { + struct rlimit limit = { + .rlim_cur = RLIM_INFINITY, +@@ -1350,15 +1323,8 @@ static int bpf_init_env(const char *pathname) + /* Don't bother in case we fail! */ + setrlimit(RLIMIT_MEMLOCK, &limit); + +- if (!bpf_get_work_dir(BPF_PROG_TYPE_UNSPEC)) { ++ if (!bpf_get_work_dir(BPF_PROG_TYPE_UNSPEC)) + fprintf(stderr, "Continuing without mounted eBPF fs. Too old kernel?\n"); +- return 0; +- } +- +- if (!bpf_get_obj_uid(pathname)) +- return -1; +- +- return 0; + } + + static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx, +@@ -1394,7 +1360,7 @@ static void bpf_make_pathname(char *pathname, size_t len, const char *name, + case PIN_OBJECT_NS: + snprintf(pathname, len, "%s/%s/%s", + bpf_get_work_dir(ctx->type), +- bpf_get_obj_uid(NULL), name); ++ ctx->obj_uid, name); + break; + case PIN_GLOBAL_NS: + snprintf(pathname, len, "%s/%s/%s", +@@ -1427,7 +1393,7 @@ static int bpf_make_obj_path(const struct bpf_elf_ctx *ctx) + int ret; + + snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_work_dir(ctx->type), +- bpf_get_obj_uid(NULL)); ++ ctx->obj_uid); + + ret = mkdir(tmp, S_IRWXU); + if (ret && errno != EEXIST) { +@@ -1696,6 +1662,12 @@ static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx) + const char *map_name; + + for (i = 0; i < ctx->map_num; i++) { ++ if (ctx->maps[i].pinning == PIN_OBJECT_NS && ++ ctx->noafalg) { ++ fprintf(stderr, "Missing kernel AF_ALG support for PIN_OBJECT_NS!\n"); ++ return -ENOTSUP; ++ } ++ + map_name = bpf_map_fetch_name(ctx, i); + if (!map_name) + return -EIO; +@@ -2451,14 +2423,24 @@ static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname, + enum bpf_prog_type type, __u32 ifindex, + bool verbose) + { +- int ret = -EINVAL; ++ uint8_t tmp[20]; ++ int ret; + +- if (elf_version(EV_CURRENT) == EV_NONE || +- bpf_init_env(pathname)) +- return ret; ++ if (elf_version(EV_CURRENT) == EV_NONE) ++ return -EINVAL; ++ ++ bpf_init_env(); + + memset(ctx, 0, sizeof(*ctx)); + bpf_get_cfg(ctx); ++ ++ ret = bpf_obj_hash(pathname, tmp, sizeof(tmp)); ++ if (ret) ++ ctx->noafalg = true; ++ else ++ hexstring_n2a(tmp, sizeof(tmp), ctx->obj_uid, ++ sizeof(ctx->obj_uid)); ++ + ctx->verbose = verbose; + ctx->type = type; + ctx->ifindex = ifindex; +-- +2.20.1 + diff --git a/SOURCES/0058-bpf-implement-bpf-to-bpf-calls-support.patch b/SOURCES/0058-bpf-implement-bpf-to-bpf-calls-support.patch new file mode 100644 index 0000000..18cc137 --- /dev/null +++ b/SOURCES/0058-bpf-implement-bpf-to-bpf-calls-support.patch @@ -0,0 +1,460 @@ +From 80dcb40f8442f79a043c520ae9eef067519ee7ca Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Thu, 13 Jun 2019 14:37:56 +0200 +Subject: [PATCH] bpf: implement bpf to bpf calls support + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716361 +Upstream Status: iproute2.git commit b5cb33aec65cb + +commit b5cb33aec65cb77183abbdfa5b61ecc9877ec776 +Author: Daniel Borkmann +Date: Wed Jul 18 01:31:21 2018 +0200 + + bpf: implement bpf to bpf calls support + + Implement missing bpf to bpf calls support. The loader will + recognize .text section and handle relocation entries that + are emitted by LLVM. + + First step is processing of map related relocation entries + for .text section, and in a second step loader will copy .text + section into program section and adjust call instruction + offset accordingly. + + Example with test_xdp_noinline.o from kernel selftests: + + 1) Every function as __attribute__ ((always_inline)), rest + left unchanged: + + # ip -force link set dev lo xdp obj test_xdp_noinline.o sec xdp-test + # ip a + 1: lo: mtu 65536 xdpgeneric/id:233 qdisc noqueue state UNKNOWN group default qlen 1000 + link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 + inet 127.0.0.1/8 scope host lo + valid_lft forever preferred_lft forever + inet6 ::1/128 scope host + valid_lft forever preferred_lft forever + [...] + # bpftool prog dump xlated id 233 + [...] + 1669: (2d) if r3 > r2 goto pc+4 + 1670: (79) r2 = *(u64 *)(r10 -136) + 1671: (61) r2 = *(u32 *)(r2 +0) + 1672: (63) *(u32 *)(r1 +0) = r2 + 1673: (b7) r0 = 1 + 1674: (95) exit <-- 1674 insns total + + 2) Every function as __attribute__ ((noinline)), rest + left unchanged: + + # ip -force link set dev lo xdp obj test_xdp_noinline.o sec xdp-test + # ip a + 1: lo: mtu 65536 xdpgeneric/id:236 qdisc noqueue state UNKNOWN group default qlen 1000 + link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 + inet 127.0.0.1/8 scope host lo + valid_lft forever preferred_lft forever + inet6 ::1/128 scope host + valid_lft forever preferred_lft forever + [...] + # bpftool prog dump xlated id 236 + [...] + 1000: (bf) r1 = r6 + 1001: (b7) r2 = 24 + 1002: (85) call pc+3 <-- pc-relative call insns + 1003: (1f) r7 -= r0 + 1004: (bf) r0 = r7 + 1005: (95) exit + 1006: (bf) r0 = r1 + 1007: (bf) r1 = r2 + 1008: (67) r1 <<= 32 + 1009: (77) r1 >>= 32 + 1010: (bf) r3 = r0 + 1011: (6f) r3 <<= r1 + 1012: (87) r2 = -r2 + 1013: (57) r2 &= 31 + 1014: (67) r0 <<= 32 + 1015: (77) r0 >>= 32 + 1016: (7f) r0 >>= r2 + 1017: (4f) r0 |= r3 + 1018: (95) exit <-- 1018 insns total + + Signed-off-by: Daniel Borkmann + Signed-off-by: David Ahern +--- + lib/bpf.c | 233 ++++++++++++++++++++++++++++++++++++------------------ + 1 file changed, 157 insertions(+), 76 deletions(-) + +diff --git a/lib/bpf.c b/lib/bpf.c +index ead8b5a7219f0..1b87490555050 100644 +--- a/lib/bpf.c ++++ b/lib/bpf.c +@@ -1109,7 +1109,8 @@ int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns, + #ifdef HAVE_ELF + struct bpf_elf_prog { + enum bpf_prog_type type; +- const struct bpf_insn *insns; ++ struct bpf_insn *insns; ++ unsigned int insns_num; + size_t size; + const char *license; + }; +@@ -1135,11 +1136,13 @@ struct bpf_elf_ctx { + int map_fds[ELF_MAX_MAPS]; + struct bpf_elf_map maps[ELF_MAX_MAPS]; + struct bpf_map_ext maps_ext[ELF_MAX_MAPS]; ++ struct bpf_elf_prog prog_text; + int sym_num; + int map_num; + int map_len; + bool *sec_done; + int sec_maps; ++ int sec_text; + char license[ELF_MAX_LICENSE_LEN]; + enum bpf_prog_type type; + __u32 ifindex; +@@ -1904,12 +1907,25 @@ static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section, + return 0; + } + ++static int bpf_fetch_text(struct bpf_elf_ctx *ctx, int section, ++ struct bpf_elf_sec_data *data) ++{ ++ ctx->sec_text = section; ++ ctx->sec_done[section] = true; ++ return 0; ++} ++ + static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx) + { + return ctx->sym_tab && ctx->str_tab && ctx->sec_maps; + } + +-static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx) ++static bool bpf_has_call_data(const struct bpf_elf_ctx *ctx) ++{ ++ return ctx->sec_text; ++} ++ ++static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx, bool check_text_sec) + { + struct bpf_elf_sec_data data; + int i, ret = -1; +@@ -1925,6 +1941,11 @@ static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx) + else if (data.sec_hdr.sh_type == SHT_PROGBITS && + !strcmp(data.sec_name, ELF_SECTION_LICENSE)) + ret = bpf_fetch_license(ctx, i, &data); ++ else if (data.sec_hdr.sh_type == SHT_PROGBITS && ++ (data.sec_hdr.sh_flags & SHF_EXECINSTR) && ++ !strcmp(data.sec_name, ".text") && ++ check_text_sec) ++ ret = bpf_fetch_text(ctx, i, &data); + else if (data.sec_hdr.sh_type == SHT_SYMTAB && + !strcmp(data.sec_name, ".symtab")) + ret = bpf_fetch_symtab(ctx, i, &data); +@@ -1969,17 +1990,18 @@ static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section, + ret = bpf_fill_section_data(ctx, i, &data); + if (ret < 0 || + !(data.sec_hdr.sh_type == SHT_PROGBITS && +- data.sec_hdr.sh_flags & SHF_EXECINSTR && ++ (data.sec_hdr.sh_flags & SHF_EXECINSTR) && + !strcmp(data.sec_name, section))) + continue; + + *sseen = true; + + memset(&prog, 0, sizeof(prog)); +- prog.type = ctx->type; +- prog.insns = data.sec_data->d_buf; +- prog.size = data.sec_data->d_size; +- prog.license = ctx->license; ++ prog.type = ctx->type; ++ prog.license = ctx->license; ++ prog.size = data.sec_data->d_size; ++ prog.insns_num = prog.size / sizeof(struct bpf_insn); ++ prog.insns = data.sec_data->d_buf; + + fd = bpf_prog_attach(section, &prog, ctx); + if (fd < 0) +@@ -1992,84 +2014,120 @@ static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section, + return fd; + } + +-struct bpf_tail_call_props { +- unsigned int total; +- unsigned int jited; ++struct bpf_relo_props { ++ struct bpf_tail_call { ++ unsigned int total; ++ unsigned int jited; ++ } tc; ++ int main_num; + }; + ++static int bpf_apply_relo_map(struct bpf_elf_ctx *ctx, struct bpf_elf_prog *prog, ++ GElf_Rel *relo, GElf_Sym *sym, ++ struct bpf_relo_props *props) ++{ ++ unsigned int insn_off = relo->r_offset / sizeof(struct bpf_insn); ++ unsigned int map_idx = sym->st_value / ctx->map_len; ++ ++ if (insn_off >= prog->insns_num) ++ return -EINVAL; ++ if (prog->insns[insn_off].code != (BPF_LD | BPF_IMM | BPF_DW)) { ++ fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n", ++ insn_off); ++ return -EINVAL; ++ } ++ ++ if (map_idx >= ARRAY_SIZE(ctx->map_fds)) ++ return -EINVAL; ++ if (!ctx->map_fds[map_idx]) ++ return -EINVAL; ++ if (ctx->maps[map_idx].type == BPF_MAP_TYPE_PROG_ARRAY) { ++ props->tc.total++; ++ if (ctx->maps_ext[map_idx].owner.jited || ++ (ctx->maps_ext[map_idx].owner.type == 0 && ++ ctx->cfg.jit_enabled)) ++ props->tc.jited++; ++ } ++ ++ prog->insns[insn_off].src_reg = BPF_PSEUDO_MAP_FD; ++ prog->insns[insn_off].imm = ctx->map_fds[map_idx]; ++ return 0; ++} ++ ++static int bpf_apply_relo_call(struct bpf_elf_ctx *ctx, struct bpf_elf_prog *prog, ++ GElf_Rel *relo, GElf_Sym *sym, ++ struct bpf_relo_props *props) ++{ ++ unsigned int insn_off = relo->r_offset / sizeof(struct bpf_insn); ++ struct bpf_elf_prog *prog_text = &ctx->prog_text; ++ ++ if (insn_off >= prog->insns_num) ++ return -EINVAL; ++ if (prog->insns[insn_off].code != (BPF_JMP | BPF_CALL) && ++ prog->insns[insn_off].src_reg != BPF_PSEUDO_CALL) { ++ fprintf(stderr, "ELF contains relo data for non call instruction at offset %u! Compiler bug?!\n", ++ insn_off); ++ return -EINVAL; ++ } ++ ++ if (!props->main_num) { ++ struct bpf_insn *insns = realloc(prog->insns, ++ prog->size + prog_text->size); ++ if (!insns) ++ return -ENOMEM; ++ ++ memcpy(insns + prog->insns_num, prog_text->insns, ++ prog_text->size); ++ props->main_num = prog->insns_num; ++ prog->insns = insns; ++ prog->insns_num += prog_text->insns_num; ++ prog->size += prog_text->size; ++ } ++ ++ prog->insns[insn_off].imm += props->main_num - insn_off; ++ return 0; ++} ++ + static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx, + struct bpf_elf_sec_data *data_relo, +- struct bpf_elf_sec_data *data_insn, +- struct bpf_tail_call_props *props) ++ struct bpf_elf_prog *prog, ++ struct bpf_relo_props *props) + { +- Elf_Data *idata = data_insn->sec_data; + GElf_Shdr *rhdr = &data_relo->sec_hdr; + int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize; +- struct bpf_insn *insns = idata->d_buf; +- unsigned int num_insns = idata->d_size / sizeof(*insns); + + for (relo_ent = 0; relo_ent < relo_num; relo_ent++) { +- unsigned int ioff, rmap; + GElf_Rel relo; + GElf_Sym sym; ++ int ret = -EIO; + + if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo) + return -EIO; +- +- ioff = relo.r_offset / sizeof(struct bpf_insn); +- if (ioff >= num_insns || +- insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) { +- fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n", +- ioff); +- fprintf(stderr, " - Current section: %s\n", data_relo->sec_name); +- if (ioff < num_insns && +- insns[ioff].code == (BPF_JMP | BPF_CALL)) +- fprintf(stderr, " - Try to annotate functions with always_inline attribute!\n"); +- return -EINVAL; +- } +- + if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym) + return -EIO; +- if (sym.st_shndx != ctx->sec_maps) { +- fprintf(stderr, "ELF contains non-map related relo data in entry %u pointing to section %u! Compiler bug?!\n", +- relo_ent, sym.st_shndx); +- return -EIO; +- } + +- rmap = sym.st_value / ctx->map_len; +- if (rmap >= ARRAY_SIZE(ctx->map_fds)) +- return -EINVAL; +- if (!ctx->map_fds[rmap]) +- return -EINVAL; +- if (ctx->maps[rmap].type == BPF_MAP_TYPE_PROG_ARRAY) { +- props->total++; +- if (ctx->maps_ext[rmap].owner.jited || +- (ctx->maps_ext[rmap].owner.type == 0 && +- ctx->cfg.jit_enabled)) +- props->jited++; +- } +- +- if (ctx->verbose) +- fprintf(stderr, "Map \'%s\' (%d) injected into prog section \'%s\' at offset %u!\n", +- bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap], +- data_insn->sec_name, ioff); +- +- insns[ioff].src_reg = BPF_PSEUDO_MAP_FD; +- insns[ioff].imm = ctx->map_fds[rmap]; ++ if (sym.st_shndx == ctx->sec_maps) ++ ret = bpf_apply_relo_map(ctx, prog, &relo, &sym, props); ++ else if (sym.st_shndx == ctx->sec_text) ++ ret = bpf_apply_relo_call(ctx, prog, &relo, &sym, props); ++ else ++ fprintf(stderr, "ELF contains non-{map,call} related relo data in entry %u pointing to section %u! Compiler bug?!\n", ++ relo_ent, sym.st_shndx); ++ if (ret < 0) ++ return ret; + } + + return 0; + } + + static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section, +- bool *lderr, bool *sseen) ++ bool *lderr, bool *sseen, struct bpf_elf_prog *prog) + { + struct bpf_elf_sec_data data_relo, data_insn; +- struct bpf_elf_prog prog; + int ret, idx, i, fd = -1; + + for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { +- struct bpf_tail_call_props props = {}; ++ struct bpf_relo_props props = {}; + + ret = bpf_fill_section_data(ctx, i, &data_relo); + if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL) +@@ -2080,40 +2138,54 @@ static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section, + ret = bpf_fill_section_data(ctx, idx, &data_insn); + if (ret < 0 || + !(data_insn.sec_hdr.sh_type == SHT_PROGBITS && +- data_insn.sec_hdr.sh_flags & SHF_EXECINSTR && ++ (data_insn.sec_hdr.sh_flags & SHF_EXECINSTR) && + !strcmp(data_insn.sec_name, section))) + continue; ++ if (sseen) ++ *sseen = true; ++ ++ memset(prog, 0, sizeof(*prog)); ++ prog->type = ctx->type; ++ prog->license = ctx->license; ++ prog->size = data_insn.sec_data->d_size; ++ prog->insns_num = prog->size / sizeof(struct bpf_insn); ++ prog->insns = malloc(prog->size); ++ if (!prog->insns) { ++ *lderr = true; ++ return -ENOMEM; ++ } + +- *sseen = true; ++ memcpy(prog->insns, data_insn.sec_data->d_buf, prog->size); + +- ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn, &props); ++ ret = bpf_apply_relo_data(ctx, &data_relo, prog, &props); + if (ret < 0) { + *lderr = true; ++ if (ctx->sec_text != idx) ++ free(prog->insns); + return ret; + } ++ if (ctx->sec_text == idx) { ++ fd = 0; ++ goto out; ++ } + +- memset(&prog, 0, sizeof(prog)); +- prog.type = ctx->type; +- prog.insns = data_insn.sec_data->d_buf; +- prog.size = data_insn.sec_data->d_size; +- prog.license = ctx->license; +- +- fd = bpf_prog_attach(section, &prog, ctx); ++ fd = bpf_prog_attach(section, prog, ctx); ++ free(prog->insns); + if (fd < 0) { + *lderr = true; +- if (props.total) { ++ if (props.tc.total) { + if (ctx->cfg.jit_enabled && +- props.total != props.jited) ++ props.tc.total != props.tc.jited) + fprintf(stderr, "JIT enabled, but only %u/%u tail call maps in the program have JITed owner!\n", +- props.jited, props.total); ++ props.tc.jited, props.tc.total); + if (!ctx->cfg.jit_enabled && +- props.jited) ++ props.tc.jited) + fprintf(stderr, "JIT disabled, but %u/%u tail call maps in the program have JITed owner!\n", +- props.jited, props.total); ++ props.tc.jited, props.tc.total); + } + return fd; + } +- ++out: + ctx->sec_done[i] = true; + ctx->sec_done[idx] = true; + break; +@@ -2125,10 +2197,18 @@ static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section, + static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section) + { + bool lderr = false, sseen = false; ++ struct bpf_elf_prog prog; + int ret = -1; + +- if (bpf_has_map_data(ctx)) +- ret = bpf_fetch_prog_relo(ctx, section, &lderr, &sseen); ++ if (bpf_has_call_data(ctx)) { ++ ret = bpf_fetch_prog_relo(ctx, ".text", &lderr, NULL, ++ &ctx->prog_text); ++ if (ret < 0) ++ return ret; ++ } ++ ++ if (bpf_has_map_data(ctx) || bpf_has_call_data(ctx)) ++ ret = bpf_fetch_prog_relo(ctx, section, &lderr, &sseen, &prog); + if (ret < 0 && !lderr) + ret = bpf_fetch_prog(ctx, section, &sseen); + if (ret < 0 && !sseen) +@@ -2525,6 +2605,7 @@ static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure) + + bpf_hash_destroy(ctx); + ++ free(ctx->prog_text.insns); + free(ctx->sec_done); + free(ctx->log); + +@@ -2546,7 +2627,7 @@ static int bpf_obj_open(const char *pathname, enum bpf_prog_type type, + return ret; + } + +- ret = bpf_fetch_ancillary(ctx); ++ ret = bpf_fetch_ancillary(ctx, strcmp(section, ".text")); + if (ret < 0) { + fprintf(stderr, "Error fetching ELF ancillary data!\n"); + goto out; +-- +2.20.1 + diff --git a/SOURCES/0059-bpf-implement-btf-handling-and-map-annotation.patch b/SOURCES/0059-bpf-implement-btf-handling-and-map-annotation.patch new file mode 100644 index 0000000..d03774e --- /dev/null +++ b/SOURCES/0059-bpf-implement-btf-handling-and-map-annotation.patch @@ -0,0 +1,624 @@ +From e8386c4e1fa3b5486487fa4d6c350a0d5e300aaf Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Thu, 13 Jun 2019 14:37:56 +0200 +Subject: [PATCH] bpf: implement btf handling and map annotation + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716361 +Upstream Status: iproute2.git commit f823f36012fb5 + +commit f823f36012fb5ab4ddfca6ed4ff56188730f281e +Author: Daniel Borkmann +Date: Wed Jul 18 01:31:22 2018 +0200 + + bpf: implement btf handling and map annotation + + Implement loading of .BTF section from object file and build up + internal table for retrieving key/value id related to maps in + the BPF program. Latter is done by setting up struct btf_type + table. + + One of the issues is that there's a disconnect between the data + types used in the map and struct bpf_elf_map, meaning the underlying + types are unknown from the map description. One way to overcome + this is to add a annotation such that the loader will recognize + the relation to both. BPF_ANNOTATE_KV_PAIR(map_foo, struct key, + struct val); has been added to the API that programs can use. + + The loader will then pick the corresponding key/value type ids and + attach it to the maps for creation. This can later on be dumped via + bpftool for introspection. + + Example with test_xdp_noinline.o from kernel selftests: + + [...] + + struct ctl_value { + union { + __u64 value; + __u32 ifindex; + __u8 mac[6]; + }; + }; + + struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct ctl_value), + .max_entries = 16, + .map_flags = 0, + }; + BPF_ANNOTATE_KV_PAIR(ctl_array, __u32, struct ctl_value); + + [...] + + Above could also further be wrapped in a macro. Compiling through LLVM and + converting to BTF: + + # llc --version + LLVM (http://llvm.org/): + LLVM version 7.0.0svn + Optimized build. + Default target: x86_64-unknown-linux-gnu + Host CPU: skylake + + Registered Targets: + bpf - BPF (host endian) + bpfeb - BPF (big endian) + bpfel - BPF (little endian) + [...] + + # clang [...] -O2 -target bpf -g -emit-llvm -c test_xdp_noinline.c -o - | + llc -march=bpf -mcpu=probe -mattr=dwarfris -filetype=obj -o test_xdp_noinline.o + # pahole -J test_xdp_noinline.o + + Checking pahole dump of BPF object file: + + # file test_xdp_noinline.o + test_xdp_noinline.o: ELF 64-bit LSB relocatable, *unknown arch 0xf7* version 1 (SYSV), with debug_info, not stripped + # pahole test_xdp_noinline.o + [...] + struct ctl_value { + union { + __u64 value; /* 0 8 */ + __u32 ifindex; /* 0 4 */ + __u8 mac[0]; /* 0 0 */ + }; /* 0 8 */ + + /* size: 8, cachelines: 1, members: 1 */ + /* last cacheline: 8 bytes */ + }; + + Now loading into kernel and dumping the map via bpftool: + + # ip -force link set dev lo xdp obj test_xdp_noinline.o sec xdp-test + # ip a + 1: lo: mtu 65536 xdpgeneric/id:227 qdisc noqueue state UNKNOWN group default qlen 1000 + link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 + inet 127.0.0.1/8 scope host lo + valid_lft forever preferred_lft forever + inet6 ::1/128 scope host + valid_lft forever preferred_lft forever + [...] + # bpftool prog show id 227 + 227: xdp tag a85e060c275c5616 gpl + loaded_at 2018-07-17T14:41:29+0000 uid 0 + xlated 8152B not jited memlock 12288B map_ids 381,385,386,382,384,383 + # bpftool map dump id 386 + [{ + "key": 0, + "value": { + "": { + "value": 0, + "ifindex": 0, + "mac": [] + } + } + },{ + "key": 1, + "value": { + "": { + "value": 0, + "ifindex": 0, + "mac": [] + } + } + },{ + [...] + + Signed-off-by: Daniel Borkmann + Signed-off-by: David Ahern +--- + include/bpf_elf.h | 9 ++ + include/bpf_util.h | 1 + + lib/bpf.c | 332 ++++++++++++++++++++++++++++++++++++++++++++- + 3 files changed, 338 insertions(+), 4 deletions(-) + +diff --git a/include/bpf_elf.h b/include/bpf_elf.h +index a8e360f3bbb28..84e8ae00834c8 100644 +--- a/include/bpf_elf.h ++++ b/include/bpf_elf.h +@@ -41,4 +41,13 @@ struct bpf_elf_map { + __u32 inner_idx; + }; + ++#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \ ++ struct ____btf_map_##name { \ ++ type_key key; \ ++ type_val value; \ ++ }; \ ++ struct ____btf_map_##name \ ++ __attribute__ ((section(".maps." #name), used)) \ ++ ____btf_map_##name = { } ++ + #endif /* __BPF_ELF__ */ +diff --git a/include/bpf_util.h b/include/bpf_util.h +index 219beb40cd253..63837a04e56fe 100644 +--- a/include/bpf_util.h ++++ b/include/bpf_util.h +@@ -14,6 +14,7 @@ + #define __BPF_UTIL__ + + #include ++#include + #include + #include + #include +diff --git a/lib/bpf.c b/lib/bpf.c +index 1b87490555050..d093d0bd86eae 100644 +--- a/lib/bpf.c ++++ b/lib/bpf.c +@@ -393,6 +393,8 @@ struct bpf_prog_data { + + struct bpf_map_ext { + struct bpf_prog_data owner; ++ unsigned int btf_id_key; ++ unsigned int btf_id_val; + }; + + static int bpf_derive_elf_map_from_fdinfo(int fd, struct bpf_elf_map *map, +@@ -1125,24 +1127,36 @@ struct bpf_config { + unsigned int jit_enabled; + }; + ++struct bpf_btf { ++ const struct btf_header *hdr; ++ const void *raw; ++ const char *strings; ++ const struct btf_type **types; ++ int types_num; ++}; ++ + struct bpf_elf_ctx { + struct bpf_config cfg; + Elf *elf_fd; + GElf_Ehdr elf_hdr; + Elf_Data *sym_tab; + Elf_Data *str_tab; ++ Elf_Data *btf_data; + char obj_uid[64]; + int obj_fd; ++ int btf_fd; + int map_fds[ELF_MAX_MAPS]; + struct bpf_elf_map maps[ELF_MAX_MAPS]; + struct bpf_map_ext maps_ext[ELF_MAX_MAPS]; + struct bpf_elf_prog prog_text; ++ struct bpf_btf btf; + int sym_num; + int map_num; + int map_len; + bool *sec_done; + int sec_maps; + int sec_text; ++ int sec_btf; + char license[ELF_MAX_LICENSE_LEN]; + enum bpf_prog_type type; + __u32 ifindex; +@@ -1167,6 +1181,11 @@ struct bpf_map_data { + struct bpf_elf_map *ent; + }; + ++static bool bpf_log_has_data(struct bpf_elf_ctx *ctx) ++{ ++ return ctx->log && ctx->log[0]; ++} ++ + static __check_format_string(2, 3) void + bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...) + { +@@ -1176,7 +1195,7 @@ bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...) + vfprintf(stderr, format, vl); + va_end(vl); + +- if (ctx->log && ctx->log[0]) { ++ if (bpf_log_has_data(ctx)) { + if (ctx->verbose) { + fprintf(stderr, "%s\n", ctx->log); + } else { +@@ -1223,7 +1242,9 @@ static int bpf_log_realloc(struct bpf_elf_ctx *ctx) + + static int bpf_map_create(enum bpf_map_type type, uint32_t size_key, + uint32_t size_value, uint32_t max_elem, +- uint32_t flags, int inner_fd, uint32_t ifindex) ++ uint32_t flags, int inner_fd, int btf_fd, ++ uint32_t ifindex, uint32_t btf_id_key, ++ uint32_t btf_id_val) + { + union bpf_attr attr = {}; + +@@ -1234,10 +1255,30 @@ static int bpf_map_create(enum bpf_map_type type, uint32_t size_key, + attr.map_flags = flags; + attr.inner_map_fd = inner_fd; + attr.map_ifindex = ifindex; ++ attr.btf_fd = btf_fd; ++ attr.btf_key_type_id = btf_id_key; ++ attr.btf_value_type_id = btf_id_val; + + return bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + } + ++static int bpf_btf_load(void *btf, size_t size_btf, ++ char *log, size_t size_log) ++{ ++ union bpf_attr attr = {}; ++ ++ attr.btf = bpf_ptr_to_u64(btf); ++ attr.btf_size = size_btf; ++ ++ if (size_log > 0) { ++ attr.btf_log_buf = bpf_ptr_to_u64(log); ++ attr.btf_log_size = size_log; ++ attr.btf_log_level = 1; ++ } ++ ++ return bpf(BPF_BTF_LOAD, &attr, sizeof(attr)); ++} ++ + static int bpf_obj_pin(int fd, const char *pathname) + { + union bpf_attr attr = {}; +@@ -1613,7 +1654,8 @@ static int bpf_map_attach(const char *name, struct bpf_elf_ctx *ctx, + ifindex = bpf_map_offload_neutral(map->type) ? 0 : ctx->ifindex; + errno = 0; + fd = bpf_map_create(map->type, map->size_key, map->size_value, +- map->max_elem, map->flags, map_inner_fd, ifindex); ++ map->max_elem, map->flags, map_inner_fd, ctx->btf_fd, ++ ifindex, ext->btf_id_key, ext->btf_id_val); + + if (fd < 0 || ctx->verbose) { + bpf_map_report(fd, name, map, ctx, map_inner_fd); +@@ -1638,8 +1680,80 @@ static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx, + return ctx->str_tab->d_buf + sym->st_name; + } + ++static int bpf_btf_find(struct bpf_elf_ctx *ctx, const char *name) ++{ ++ const struct btf_type *type; ++ const char *res; ++ int id; ++ ++ for (id = 1; id < ctx->btf.types_num; id++) { ++ type = ctx->btf.types[id]; ++ if (type->name_off >= ctx->btf.hdr->str_len) ++ continue; ++ res = &ctx->btf.strings[type->name_off]; ++ if (!strcmp(res, name)) ++ return id; ++ } ++ ++ return -ENOENT; ++} ++ ++static int bpf_btf_find_kv(struct bpf_elf_ctx *ctx, const struct bpf_elf_map *map, ++ const char *name, uint32_t *id_key, uint32_t *id_val) ++{ ++ const struct btf_member *key, *val; ++ const struct btf_type *type; ++ char btf_name[512]; ++ const char *res; ++ int id; ++ ++ snprintf(btf_name, sizeof(btf_name), "____btf_map_%s", name); ++ id = bpf_btf_find(ctx, btf_name); ++ if (id < 0) ++ return id; ++ ++ type = ctx->btf.types[id]; ++ if (BTF_INFO_KIND(type->info) != BTF_KIND_STRUCT) ++ return -EINVAL; ++ if (BTF_INFO_VLEN(type->info) != 2) ++ return -EINVAL; ++ ++ key = ((void *) type) + sizeof(*type); ++ val = key + 1; ++ if (!key->type || key->type >= ctx->btf.types_num || ++ !val->type || val->type >= ctx->btf.types_num) ++ return -EINVAL; ++ ++ if (key->name_off >= ctx->btf.hdr->str_len || ++ val->name_off >= ctx->btf.hdr->str_len) ++ return -EINVAL; ++ ++ res = &ctx->btf.strings[key->name_off]; ++ if (strcmp(res, "key")) ++ return -EINVAL; ++ ++ res = &ctx->btf.strings[val->name_off]; ++ if (strcmp(res, "value")) ++ return -EINVAL; ++ ++ *id_key = key->type; ++ *id_val = val->type; ++ return 0; ++} ++ ++static void bpf_btf_annotate(struct bpf_elf_ctx *ctx, int which, const char *name) ++{ ++ uint32_t id_key = 0, id_val = 0; ++ ++ if (!bpf_btf_find_kv(ctx, &ctx->maps[which], name, &id_key, &id_val)) { ++ ctx->maps_ext[which].btf_id_key = id_key; ++ ctx->maps_ext[which].btf_id_val = id_val; ++ } ++} ++ + static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which) + { ++ const char *name; + GElf_Sym sym; + int i; + +@@ -1653,7 +1767,9 @@ static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which) + sym.st_value / ctx->map_len != which) + continue; + +- return bpf_str_tab_name(ctx, &sym); ++ name = bpf_str_tab_name(ctx, &sym); ++ bpf_btf_annotate(ctx, which, name); ++ return name; + } + + return NULL; +@@ -1915,11 +2031,210 @@ static int bpf_fetch_text(struct bpf_elf_ctx *ctx, int section, + return 0; + } + ++static void bpf_btf_report(int fd, struct bpf_elf_ctx *ctx) ++{ ++ fprintf(stderr, "\nBTF debug data section \'.BTF\' %s%s (%d)!\n", ++ fd < 0 ? "rejected: " : "loaded", ++ fd < 0 ? strerror(errno) : "", ++ fd < 0 ? errno : fd); ++ ++ fprintf(stderr, " - Length: %zu\n", ctx->btf_data->d_size); ++ ++ bpf_dump_error(ctx, "Verifier analysis:\n\n"); ++} ++ ++static int bpf_btf_attach(struct bpf_elf_ctx *ctx) ++{ ++ int tries = 0, fd; ++retry: ++ errno = 0; ++ fd = bpf_btf_load(ctx->btf_data->d_buf, ctx->btf_data->d_size, ++ ctx->log, ctx->log_size); ++ if (fd < 0 || ctx->verbose) { ++ if (fd < 0 && (errno == ENOSPC || !ctx->log_size)) { ++ if (tries++ < 10 && !bpf_log_realloc(ctx)) ++ goto retry; ++ ++ fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n", ++ ctx->log_size, tries); ++ return fd; ++ } ++ ++ if (bpf_log_has_data(ctx)) ++ bpf_btf_report(fd, ctx); ++ } ++ ++ return fd; ++} ++ ++static int bpf_fetch_btf_begin(struct bpf_elf_ctx *ctx, int section, ++ struct bpf_elf_sec_data *data) ++{ ++ ctx->btf_data = data->sec_data; ++ ctx->sec_btf = section; ++ ctx->sec_done[section] = true; ++ return 0; ++} ++ ++static int bpf_btf_check_header(struct bpf_elf_ctx *ctx) ++{ ++ const struct btf_header *hdr = ctx->btf_data->d_buf; ++ const char *str_start, *str_end; ++ unsigned int data_len; ++ ++ if (hdr->magic != BTF_MAGIC) { ++ fprintf(stderr, "Object has wrong BTF magic: %x, expected: %x!\n", ++ hdr->magic, BTF_MAGIC); ++ return -EINVAL; ++ } ++ ++ if (hdr->version != BTF_VERSION) { ++ fprintf(stderr, "Object has wrong BTF version: %u, expected: %u!\n", ++ hdr->version, BTF_VERSION); ++ return -EINVAL; ++ } ++ ++ if (hdr->flags) { ++ fprintf(stderr, "Object has unsupported BTF flags %x!\n", ++ hdr->flags); ++ return -EINVAL; ++ } ++ ++ data_len = ctx->btf_data->d_size - sizeof(*hdr); ++ if (data_len < hdr->type_off || ++ data_len < hdr->str_off || ++ data_len < hdr->type_len + hdr->str_len || ++ hdr->type_off >= hdr->str_off || ++ hdr->type_off + hdr->type_len != hdr->str_off || ++ hdr->str_off + hdr->str_len != data_len || ++ (hdr->type_off & (sizeof(uint32_t) - 1))) { ++ fprintf(stderr, "Object has malformed BTF data!\n"); ++ return -EINVAL; ++ } ++ ++ ctx->btf.hdr = hdr; ++ ctx->btf.raw = hdr + 1; ++ ++ str_start = ctx->btf.raw + hdr->str_off; ++ str_end = str_start + hdr->str_len; ++ if (!hdr->str_len || ++ hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || ++ str_start[0] || str_end[-1]) { ++ fprintf(stderr, "Object has malformed BTF string data!\n"); ++ return -EINVAL; ++ } ++ ++ ctx->btf.strings = str_start; ++ return 0; ++} ++ ++static int bpf_btf_register_type(struct bpf_elf_ctx *ctx, ++ const struct btf_type *type) ++{ ++ int cur = ctx->btf.types_num, num = cur + 1; ++ const struct btf_type **types; ++ ++ types = realloc(ctx->btf.types, num * sizeof(type)); ++ if (!types) { ++ free(ctx->btf.types); ++ ctx->btf.types = NULL; ++ ctx->btf.types_num = 0; ++ return -ENOMEM; ++ } ++ ++ ctx->btf.types = types; ++ ctx->btf.types[cur] = type; ++ ctx->btf.types_num = num; ++ return 0; ++} ++ ++static struct btf_type btf_type_void; ++ ++static int bpf_btf_prep_type_data(struct bpf_elf_ctx *ctx) ++{ ++ const void *type_cur = ctx->btf.raw + ctx->btf.hdr->type_off; ++ const void *type_end = ctx->btf.raw + ctx->btf.hdr->str_off; ++ const struct btf_type *type; ++ uint16_t var_len; ++ int ret, kind; ++ ++ ret = bpf_btf_register_type(ctx, &btf_type_void); ++ if (ret < 0) ++ return ret; ++ ++ while (type_cur < type_end) { ++ type = type_cur; ++ type_cur += sizeof(*type); ++ ++ var_len = BTF_INFO_VLEN(type->info); ++ kind = BTF_INFO_KIND(type->info); ++ ++ switch (kind) { ++ case BTF_KIND_INT: ++ type_cur += sizeof(int); ++ break; ++ case BTF_KIND_ARRAY: ++ type_cur += sizeof(struct btf_array); ++ break; ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: ++ type_cur += var_len * sizeof(struct btf_member); ++ break; ++ case BTF_KIND_ENUM: ++ type_cur += var_len * sizeof(struct btf_enum); ++ break; ++ case BTF_KIND_TYPEDEF: ++ case BTF_KIND_PTR: ++ case BTF_KIND_FWD: ++ case BTF_KIND_VOLATILE: ++ case BTF_KIND_CONST: ++ case BTF_KIND_RESTRICT: ++ break; ++ default: ++ fprintf(stderr, "Object has unknown BTF type: %u!\n", kind); ++ return -EINVAL; ++ } ++ ++ ret = bpf_btf_register_type(ctx, type); ++ if (ret < 0) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int bpf_btf_prep_data(struct bpf_elf_ctx *ctx) ++{ ++ int ret = bpf_btf_check_header(ctx); ++ ++ if (!ret) ++ return bpf_btf_prep_type_data(ctx); ++ return ret; ++} ++ ++static void bpf_fetch_btf_end(struct bpf_elf_ctx *ctx) ++{ ++ int fd = bpf_btf_attach(ctx); ++ ++ if (fd < 0) ++ return; ++ ctx->btf_fd = fd; ++ if (bpf_btf_prep_data(ctx) < 0) { ++ close(ctx->btf_fd); ++ ctx->btf_fd = 0; ++ } ++} ++ + static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx) + { + return ctx->sym_tab && ctx->str_tab && ctx->sec_maps; + } + ++static bool bpf_has_btf_data(const struct bpf_elf_ctx *ctx) ++{ ++ return ctx->sec_btf; ++} ++ + static bool bpf_has_call_data(const struct bpf_elf_ctx *ctx) + { + return ctx->sec_text; +@@ -1952,6 +2267,9 @@ static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx, bool check_text_sec) + else if (data.sec_hdr.sh_type == SHT_STRTAB && + !strcmp(data.sec_name, ".strtab")) + ret = bpf_fetch_strtab(ctx, i, &data); ++ else if (data.sec_hdr.sh_type == SHT_PROGBITS && ++ !strcmp(data.sec_name, ".BTF")) ++ ret = bpf_fetch_btf_begin(ctx, i, &data); + if (ret < 0) { + fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n", + i); +@@ -1959,6 +2277,8 @@ static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx, bool check_text_sec) + } + } + ++ if (bpf_has_btf_data(ctx)) ++ bpf_fetch_btf_end(ctx); + if (bpf_has_map_data(ctx)) { + ret = bpf_fetch_maps_end(ctx); + if (ret < 0) { +@@ -2596,6 +2916,10 @@ static void bpf_maps_teardown(struct bpf_elf_ctx *ctx) + if (ctx->map_fds[i]) + close(ctx->map_fds[i]); + } ++ ++ if (ctx->btf_fd) ++ close(ctx->btf_fd); ++ free(ctx->btf.types); + } + + static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure) +-- +2.20.1 + diff --git a/SOURCES/0060-bpf-check-map-symbol-type-properly-with-newer-llvm-c.patch b/SOURCES/0060-bpf-check-map-symbol-type-properly-with-newer-llvm-c.patch new file mode 100644 index 0000000..215fe51 --- /dev/null +++ b/SOURCES/0060-bpf-check-map-symbol-type-properly-with-newer-llvm-c.patch @@ -0,0 +1,113 @@ +From 9783e8b3de077c2e6399a9aa83f93237690bd744 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Thu, 13 Jun 2019 14:37:57 +0200 +Subject: [PATCH] bpf: check map symbol type properly with newer llvm compiler + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716361 +Upstream Status: iproute2.git commit 7a04dd84a7f93 + +commit 7a04dd84a7f938f72fcef9efe8383314b0a66274 +Author: Yonghong Song +Date: Mon Oct 29 15:32:03 2018 -0700 + + bpf: check map symbol type properly with newer llvm compiler + + With llvm 7.0 or earlier, the map symbol type is STT_NOTYPE. + -bash-4.4$ cat t.c + __attribute__((section("maps"))) int g; + -bash-4.4$ clang -target bpf -O2 -c t.c + -bash-4.4$ readelf -s t.o + + Symbol table '.symtab' contains 2 entries: + Num: Value Size Type Bind Vis Ndx Name + 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND + 1: 0000000000000000 0 NOTYPE GLOBAL DEFAULT 3 g + + The following llvm commit enables BPF target to generate + proper symbol type and size. + commit bf6ec206615b9718869d48b4e5400d0c6e3638dd + Author: Yonghong Song + Date: Wed Sep 19 16:04:13 2018 +0000 + + [bpf] Symbol sizes and types in object file + + Clang-compiled object files currently don't include the symbol sizes and + types. Some tools however need that information. For example, ctfconvert + uses that information to generate FreeBSD's CTF representation from ELF + files. + With this patch, symbol sizes and types are included in object files. + + Signed-off-by: Paul Chaignon + Reported-by: Yutaro Hayakawa + + Hence, for llvm 8.0.0 (currently trunk), symbol type will be not NOTYPE, but OBJECT. + -bash-4.4$ clang -target bpf -O2 -c t.c + -bash-4.4$ readelf -s t.o + + Symbol table '.symtab' contains 3 entries: + Num: Value Size Type Bind Vis Ndx Name + 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND + 1: 0000000000000000 0 FILE LOCAL DEFAULT ABS t.c + 2: 0000000000000000 4 OBJECT GLOBAL DEFAULT 3 g + + This patch makes sure bpf library accepts both NOTYPE and OBJECT types + of global map symbols. + + Signed-off-by: Yonghong Song + Acked-by: Daniel Borkmann + Signed-off-by: Stephen Hemminger +--- + lib/bpf.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/lib/bpf.c b/lib/bpf.c +index d093d0bd86eae..45f279fa4a416 100644 +--- a/lib/bpf.c ++++ b/lib/bpf.c +@@ -1758,11 +1758,13 @@ static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which) + int i; + + for (i = 0; i < ctx->sym_num; i++) { ++ int type = GELF_ST_TYPE(sym.st_info); ++ + if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) + continue; + + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || +- GELF_ST_TYPE(sym.st_info) != STT_NOTYPE || ++ (type != STT_NOTYPE && type != STT_OBJECT) || + sym.st_shndx != ctx->sec_maps || + sym.st_value / ctx->map_len != which) + continue; +@@ -1849,11 +1851,13 @@ static int bpf_map_num_sym(struct bpf_elf_ctx *ctx) + GElf_Sym sym; + + for (i = 0; i < ctx->sym_num; i++) { ++ int type = GELF_ST_TYPE(sym.st_info); ++ + if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) + continue; + + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || +- GELF_ST_TYPE(sym.st_info) != STT_NOTYPE || ++ (type != STT_NOTYPE && type != STT_OBJECT) || + sym.st_shndx != ctx->sec_maps) + continue; + num++; +@@ -1927,10 +1931,12 @@ static int bpf_map_verify_all_offs(struct bpf_elf_ctx *ctx, int end) + * the table again. + */ + for (i = 0; i < ctx->sym_num; i++) { ++ int type = GELF_ST_TYPE(sym.st_info); ++ + if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) + continue; + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || +- GELF_ST_TYPE(sym.st_info) != STT_NOTYPE || ++ (type != STT_NOTYPE && type != STT_OBJECT) || + sym.st_shndx != ctx->sec_maps) + continue; + if (sym.st_value == off) +-- +2.20.1 + diff --git a/SOURCES/0061-Use-libbsd-for-strlcpy-if-available.patch b/SOURCES/0061-Use-libbsd-for-strlcpy-if-available.patch new file mode 100644 index 0000000..6cb418c --- /dev/null +++ b/SOURCES/0061-Use-libbsd-for-strlcpy-if-available.patch @@ -0,0 +1,258 @@ +From d3153cc39f5dca57e2cfc2faaefc690f64af398f Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Thu, 13 Jun 2019 14:37:57 +0200 +Subject: [PATCH] Use libbsd for strlcpy if available + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716361 +Upstream Status: iproute2.git commit 508f3c231efb1 + +commit 508f3c231efb179fb842d222e8151b395937b136 +Author: Luca Boccassi +Date: Wed Oct 31 18:00:11 2018 +0000 + + Use libbsd for strlcpy if available + + If libc does not provide strlcpy check for libbsd with pkg-config to + avoid relying on inline version. + + Signed-off-by: Luca Boccassi + Signed-off-by: Stephen Hemminger +--- + configure | 11 +++++++++-- + genl/ctrl.c | 3 +++ + ip/iplink.c | 3 +++ + ip/ipnetns.c | 3 +++ + ip/iproute_lwtunnel.c | 3 +++ + ip/ipvrf.c | 3 +++ + ip/ipxfrm.c | 3 +++ + ip/tunnel.c | 3 +++ + ip/xfrm_state.c | 3 +++ + lib/bpf.c | 3 +++ + lib/fs.c | 3 +++ + lib/inet_proto.c | 3 +++ + misc/ss.c | 3 +++ + tc/em_ipset.c | 3 +++ + tc/m_pedit.c | 3 +++ + 15 files changed, 51 insertions(+), 2 deletions(-) + +diff --git a/configure b/configure +index 5ef5cd4cf9cde..07c18f9bda4a2 100755 +--- a/configure ++++ b/configure +@@ -330,8 +330,15 @@ EOF + then + echo "no" + else +- echo 'CFLAGS += -DNEED_STRLCPY' >>$CONFIG +- echo "yes" ++ if ${PKG_CONFIG} libbsd --exists ++ then ++ echo 'CFLAGS += -DHAVE_LIBBSD' `${PKG_CONFIG} libbsd --cflags` >>$CONFIG ++ echo 'LDLIBS +=' `${PKG_CONFIG} libbsd --libs` >> $CONFIG ++ echo "no" ++ else ++ echo 'CFLAGS += -DNEED_STRLCPY' >>$CONFIG ++ echo "yes" ++ fi + fi + rm -f $TMPDIR/strtest.c $TMPDIR/strtest + } +diff --git a/genl/ctrl.c b/genl/ctrl.c +index 0d9c5f2517b78..4063ec0ba474b 100644 +--- a/genl/ctrl.c ++++ b/genl/ctrl.c +@@ -18,6 +18,9 @@ + #include + #include + #include ++#ifdef HAVE_LIBBSD ++#include ++#endif + + #include "utils.h" + #include "genl_utils.h" +diff --git a/ip/iplink.c b/ip/iplink.c +index 0ba5f1af76697..2f8f3bf1f84bb 100644 +--- a/ip/iplink.c ++++ b/ip/iplink.c +@@ -24,6 +24,9 @@ + #include + #include + #include ++#ifdef HAVE_LIBBSD ++#include ++#endif + #include + #include + #include +diff --git a/ip/ipnetns.c b/ip/ipnetns.c +index 368be0cbc0a48..5991592e947b6 100644 +--- a/ip/ipnetns.c ++++ b/ip/ipnetns.c +@@ -8,6 +8,9 @@ + #include + #include + #include ++#ifdef HAVE_LIBBSD ++#include ++#endif + #include + #include + #include +diff --git a/ip/iproute_lwtunnel.c b/ip/iproute_lwtunnel.c +index 388cd19a3ef0b..be9f60c3b2137 100644 +--- a/ip/iproute_lwtunnel.c ++++ b/ip/iproute_lwtunnel.c +@@ -16,6 +16,9 @@ + #include + #include + #include ++#ifdef HAVE_LIBBSD ++#include ++#endif + #include + #include + #include +diff --git a/ip/ipvrf.c b/ip/ipvrf.c +index 8a6b7f977b142..8572b4f23e3dc 100644 +--- a/ip/ipvrf.c ++++ b/ip/ipvrf.c +@@ -21,6 +21,9 @@ + #include + #include + #include ++#ifdef HAVE_LIBBSD ++#include ++#endif + #include + #include + #include +diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c +index 12c2f721571b6..5304dfc1af906 100644 +--- a/ip/ipxfrm.c ++++ b/ip/ipxfrm.c +@@ -28,6 +28,9 @@ + #include + #include + #include ++#ifdef HAVE_LIBBSD ++#include ++#endif + #include + #include + #include +diff --git a/ip/tunnel.c b/ip/tunnel.c +index 79de7f2406f0e..d54505d483d22 100644 +--- a/ip/tunnel.c ++++ b/ip/tunnel.c +@@ -24,6 +24,9 @@ + + #include + #include ++#ifdef HAVE_LIBBSD ++#include ++#endif + #include + #include + #include +diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c +index 85d959cc4f44f..0c8df7e6e10cd 100644 +--- a/ip/xfrm_state.c ++++ b/ip/xfrm_state.c +@@ -27,6 +27,9 @@ + #include + #include + #include ++#ifdef HAVE_LIBBSD ++#include ++#endif + #include + #include "utils.h" + #include "xfrm.h" +diff --git a/lib/bpf.c b/lib/bpf.c +index 45f279fa4a416..35d7c45a2924d 100644 +--- a/lib/bpf.c ++++ b/lib/bpf.c +@@ -15,6 +15,9 @@ + #include + #include + #include ++#ifdef HAVE_LIBBSD ++#include ++#endif + #include + #include + #include +diff --git a/lib/fs.c b/lib/fs.c +index 86efd4ed2ed80..af36bea0987fa 100644 +--- a/lib/fs.c ++++ b/lib/fs.c +@@ -20,6 +20,9 @@ + #include + #include + #include ++#ifdef HAVE_LIBBSD ++#include ++#endif + #include + #include + +diff --git a/lib/inet_proto.c b/lib/inet_proto.c +index 0836a4c96a0b4..b379d8f8e720e 100644 +--- a/lib/inet_proto.c ++++ b/lib/inet_proto.c +@@ -18,6 +18,9 @@ + #include + #include + #include ++#ifdef HAVE_LIBBSD ++#include ++#endif + + #include "rt_names.h" + #include "utils.h" +diff --git a/misc/ss.c b/misc/ss.c +index 41e7762bb61f5..7e94f2c8d1baa 100644 +--- a/misc/ss.c ++++ b/misc/ss.c +@@ -18,6 +18,9 @@ + #include + #include + #include ++#ifdef HAVE_LIBBSD ++#include ++#endif + #include + #include + #include +diff --git a/tc/em_ipset.c b/tc/em_ipset.c +index 48b287f5ba3b2..550b2101a0579 100644 +--- a/tc/em_ipset.c ++++ b/tc/em_ipset.c +@@ -20,6 +20,9 @@ + #include + #include + #include ++#ifdef HAVE_LIBBSD ++#include ++#endif + #include + #include + +diff --git a/tc/m_pedit.c b/tc/m_pedit.c +index 2aeb56d9615f1..baacc80dd94b7 100644 +--- a/tc/m_pedit.c ++++ b/tc/m_pedit.c +@@ -23,6 +23,9 @@ + #include + #include + #include ++#ifdef HAVE_LIBBSD ++#include ++#endif + #include + #include "utils.h" + #include "tc_util.h" +-- +2.20.1 + diff --git a/SOURCES/0062-Include-bsd-string.h-only-in-include-utils.h.patch b/SOURCES/0062-Include-bsd-string.h-only-in-include-utils.h.patch new file mode 100644 index 0000000..245f113 --- /dev/null +++ b/SOURCES/0062-Include-bsd-string.h-only-in-include-utils.h.patch @@ -0,0 +1,255 @@ +From f416b73a7f47494cf6d18cdaad5e86709bc43a63 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Thu, 13 Jun 2019 14:37:57 +0200 +Subject: [PATCH] Include bsd/string.h only in include/utils.h + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716361 +Upstream Status: iproute2.git commit 6d2fd4a53f63b + +commit 6d2fd4a53f63bd20667b1a8f2ec8fde1fc3a54d4 +Author: Luca Boccassi +Date: Thu Nov 1 22:25:27 2018 +0000 + + Include bsd/string.h only in include/utils.h + + This is simpler and cleaner, and avoids having to include the header + from every file where the functions are used. The prototypes of the + internal implementation are in this header, so utils.h will have to be + included anyway for those. + + Fixes: 508f3c231efb ("Use libbsd for strlcpy if available") + + Signed-off-by: Luca Boccassi + Signed-off-by: Stephen Hemminger +--- + genl/ctrl.c | 3 --- + include/utils.h | 4 ++++ + ip/iplink.c | 3 --- + ip/ipnetns.c | 3 --- + ip/iproute_lwtunnel.c | 3 --- + ip/ipvrf.c | 3 --- + ip/ipxfrm.c | 3 --- + ip/tunnel.c | 3 --- + ip/xfrm_state.c | 3 --- + lib/bpf.c | 3 --- + lib/fs.c | 3 --- + lib/inet_proto.c | 3 --- + misc/ss.c | 3 --- + tc/em_ipset.c | 3 --- + tc/m_pedit.c | 3 --- + 15 files changed, 4 insertions(+), 42 deletions(-) + +diff --git a/genl/ctrl.c b/genl/ctrl.c +index 4063ec0ba474b..0d9c5f2517b78 100644 +--- a/genl/ctrl.c ++++ b/genl/ctrl.c +@@ -18,9 +18,6 @@ + #include + #include + #include +-#ifdef HAVE_LIBBSD +-#include +-#endif + + #include "utils.h" + #include "genl_utils.h" +diff --git a/include/utils.h b/include/utils.h +index 8cb4349e8a89f..c32b37a1797d8 100644 +--- a/include/utils.h ++++ b/include/utils.h +@@ -9,6 +9,10 @@ + #include + #include + ++#ifdef HAVE_LIBBSD ++#include ++#endif ++ + #include "libnetlink.h" + #include "ll_map.h" + #include "rtm_map.h" +diff --git a/ip/iplink.c b/ip/iplink.c +index 2f8f3bf1f84bb..0ba5f1af76697 100644 +--- a/ip/iplink.c ++++ b/ip/iplink.c +@@ -24,9 +24,6 @@ + #include + #include + #include +-#ifdef HAVE_LIBBSD +-#include +-#endif + #include + #include + #include +diff --git a/ip/ipnetns.c b/ip/ipnetns.c +index 5991592e947b6..368be0cbc0a48 100644 +--- a/ip/ipnetns.c ++++ b/ip/ipnetns.c +@@ -8,9 +8,6 @@ + #include + #include + #include +-#ifdef HAVE_LIBBSD +-#include +-#endif + #include + #include + #include +diff --git a/ip/iproute_lwtunnel.c b/ip/iproute_lwtunnel.c +index be9f60c3b2137..388cd19a3ef0b 100644 +--- a/ip/iproute_lwtunnel.c ++++ b/ip/iproute_lwtunnel.c +@@ -16,9 +16,6 @@ + #include + #include + #include +-#ifdef HAVE_LIBBSD +-#include +-#endif + #include + #include + #include +diff --git a/ip/ipvrf.c b/ip/ipvrf.c +index 8572b4f23e3dc..8a6b7f977b142 100644 +--- a/ip/ipvrf.c ++++ b/ip/ipvrf.c +@@ -21,9 +21,6 @@ + #include + #include + #include +-#ifdef HAVE_LIBBSD +-#include +-#endif + #include + #include + #include +diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c +index 5304dfc1af906..12c2f721571b6 100644 +--- a/ip/ipxfrm.c ++++ b/ip/ipxfrm.c +@@ -28,9 +28,6 @@ + #include + #include + #include +-#ifdef HAVE_LIBBSD +-#include +-#endif + #include + #include + #include +diff --git a/ip/tunnel.c b/ip/tunnel.c +index d54505d483d22..79de7f2406f0e 100644 +--- a/ip/tunnel.c ++++ b/ip/tunnel.c +@@ -24,9 +24,6 @@ + + #include + #include +-#ifdef HAVE_LIBBSD +-#include +-#endif + #include + #include + #include +diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c +index 0c8df7e6e10cd..85d959cc4f44f 100644 +--- a/ip/xfrm_state.c ++++ b/ip/xfrm_state.c +@@ -27,9 +27,6 @@ + #include + #include + #include +-#ifdef HAVE_LIBBSD +-#include +-#endif + #include + #include "utils.h" + #include "xfrm.h" +diff --git a/lib/bpf.c b/lib/bpf.c +index 35d7c45a2924d..45f279fa4a416 100644 +--- a/lib/bpf.c ++++ b/lib/bpf.c +@@ -15,9 +15,6 @@ + #include + #include + #include +-#ifdef HAVE_LIBBSD +-#include +-#endif + #include + #include + #include +diff --git a/lib/fs.c b/lib/fs.c +index af36bea0987fa..86efd4ed2ed80 100644 +--- a/lib/fs.c ++++ b/lib/fs.c +@@ -20,9 +20,6 @@ + #include + #include + #include +-#ifdef HAVE_LIBBSD +-#include +-#endif + #include + #include + +diff --git a/lib/inet_proto.c b/lib/inet_proto.c +index b379d8f8e720e..0836a4c96a0b4 100644 +--- a/lib/inet_proto.c ++++ b/lib/inet_proto.c +@@ -18,9 +18,6 @@ + #include + #include + #include +-#ifdef HAVE_LIBBSD +-#include +-#endif + + #include "rt_names.h" + #include "utils.h" +diff --git a/misc/ss.c b/misc/ss.c +index 7e94f2c8d1baa..41e7762bb61f5 100644 +--- a/misc/ss.c ++++ b/misc/ss.c +@@ -18,9 +18,6 @@ + #include + #include + #include +-#ifdef HAVE_LIBBSD +-#include +-#endif + #include + #include + #include +diff --git a/tc/em_ipset.c b/tc/em_ipset.c +index 550b2101a0579..48b287f5ba3b2 100644 +--- a/tc/em_ipset.c ++++ b/tc/em_ipset.c +@@ -20,9 +20,6 @@ + #include + #include + #include +-#ifdef HAVE_LIBBSD +-#include +-#endif + #include + #include + +diff --git a/tc/m_pedit.c b/tc/m_pedit.c +index baacc80dd94b7..2aeb56d9615f1 100644 +--- a/tc/m_pedit.c ++++ b/tc/m_pedit.c +@@ -23,9 +23,6 @@ + #include + #include + #include +-#ifdef HAVE_LIBBSD +-#include +-#endif + #include + #include "utils.h" + #include "tc_util.h" +-- +2.20.1 + diff --git a/SOURCES/0063-bpf-initialise-map-symbol-before-retrieving-and-comp.patch b/SOURCES/0063-bpf-initialise-map-symbol-before-retrieving-and-comp.patch new file mode 100644 index 0000000..62985b9 --- /dev/null +++ b/SOURCES/0063-bpf-initialise-map-symbol-before-retrieving-and-comp.patch @@ -0,0 +1,90 @@ +From 9348ded117d05ba1d54a748173db009d473c707c Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Thu, 13 Jun 2019 14:37:57 +0200 +Subject: [PATCH] bpf: initialise map symbol before retrieving and comparing + its type + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716361 +Upstream Status: iproute2.git commit 1a7d3ad8a5862 + +commit 1a7d3ad8a5862ca9ce9dd19326faeea77e5e6142 +Author: Quentin Monnet +Date: Tue Nov 20 01:26:27 2018 +0000 + + bpf: initialise map symbol before retrieving and comparing its type + + In order to compare BPF map symbol type correctly in regard to the + latest LLVM, commit 7a04dd84a7f9 ("bpf: check map symbol type properly + with newer llvm compiler") compares map symbol type to both NOTYPE and + OBJECT. To do so, it first retrieves the type from "sym.st_info" and + stores it into a temporary variable. + + However, the type is collected from the symbol "sym" before this latter + symbol is actually updated. gelf_getsym() is called after that and + updates "sym", and when comparison with OBJECT or NOTYPE happens it is + done on the type of the symbol collected in the previous passage of the + loop (or on an uninitialised symbol on the first passage). This may + eventually break map collection from the ELF file. + + Fix this by assigning the type to the temporary variable only after the + call to gelf_getsym(). + + Fixes: 7a04dd84a7f9 ("bpf: check map symbol type properly with newer llvm compiler") + Reported-by: Ron Philip + Signed-off-by: Quentin Monnet + Reviewed-by: Jiong Wang + Acked-by: Yonghong Song + Signed-off-by: Stephen Hemminger +--- + lib/bpf.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/lib/bpf.c b/lib/bpf.c +index 45f279fa4a416..6aff8f7bad7fb 100644 +--- a/lib/bpf.c ++++ b/lib/bpf.c +@@ -1758,11 +1758,12 @@ static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which) + int i; + + for (i = 0; i < ctx->sym_num; i++) { +- int type = GELF_ST_TYPE(sym.st_info); ++ int type; + + if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) + continue; + ++ type = GELF_ST_TYPE(sym.st_info); + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || + (type != STT_NOTYPE && type != STT_OBJECT) || + sym.st_shndx != ctx->sec_maps || +@@ -1851,11 +1852,12 @@ static int bpf_map_num_sym(struct bpf_elf_ctx *ctx) + GElf_Sym sym; + + for (i = 0; i < ctx->sym_num; i++) { +- int type = GELF_ST_TYPE(sym.st_info); ++ int type; + + if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) + continue; + ++ type = GELF_ST_TYPE(sym.st_info); + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || + (type != STT_NOTYPE && type != STT_OBJECT) || + sym.st_shndx != ctx->sec_maps) +@@ -1931,10 +1933,12 @@ static int bpf_map_verify_all_offs(struct bpf_elf_ctx *ctx, int end) + * the table again. + */ + for (i = 0; i < ctx->sym_num; i++) { +- int type = GELF_ST_TYPE(sym.st_info); ++ int type; + + if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) + continue; ++ ++ type = GELF_ST_TYPE(sym.st_info); + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || + (type != STT_NOTYPE && type != STT_OBJECT) || + sym.st_shndx != ctx->sec_maps) +-- +2.20.1 + diff --git a/SOURCES/0064-lib-bpf-fix-build-warning-if-no-elf.patch b/SOURCES/0064-lib-bpf-fix-build-warning-if-no-elf.patch new file mode 100644 index 0000000..ea2e81e --- /dev/null +++ b/SOURCES/0064-lib-bpf-fix-build-warning-if-no-elf.patch @@ -0,0 +1,57 @@ +From ac8f163e0b2e14afdc8a1a1d449f1e5db07075ba Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Thu, 13 Jun 2019 14:37:57 +0200 +Subject: [PATCH] lib/bpf: fix build warning if no elf +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716361 +Upstream Status: iproute2.git commit 33fde2b60081e + +commit 33fde2b60081ed9ac16f7dd81c48233803855689 +Author: Stephen Hemminger +Date: Mon Dec 10 13:50:17 2018 -0800 + + lib/bpf: fix build warning if no elf + + Function was not used unlesss HAVE_ELF causing: + + bpf.c:105:13: warning: ‘bpf_map_offload_neutral’ defined but not used [-Wunused-function] + + Signed-off-by: Stephen Hemminger +--- + lib/bpf.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/lib/bpf.c b/lib/bpf.c +index 6aff8f7bad7fb..5e85cfc0bdd5b 100644 +--- a/lib/bpf.c ++++ b/lib/bpf.c +@@ -102,11 +102,6 @@ static const struct bpf_prog_meta __bpf_prog_meta[] = { + }, + }; + +-static bool bpf_map_offload_neutral(enum bpf_map_type type) +-{ +- return type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; +-} +- + static const char *bpf_prog_to_subdir(enum bpf_prog_type type) + { + assert(type < ARRAY_SIZE(__bpf_prog_meta) && +@@ -1610,6 +1605,11 @@ static bool bpf_is_map_in_map_type(const struct bpf_elf_map *map) + map->type == BPF_MAP_TYPE_HASH_OF_MAPS; + } + ++static bool bpf_map_offload_neutral(enum bpf_map_type type) ++{ ++ return type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; ++} ++ + static int bpf_map_attach(const char *name, struct bpf_elf_ctx *ctx, + const struct bpf_elf_map *map, struct bpf_map_ext *ext, + int *have_map_in_map) +-- +2.20.1 + diff --git a/SOURCES/0065-bpf-add-btf-func-and-func_proto-kind-support.patch b/SOURCES/0065-bpf-add-btf-func-and-func_proto-kind-support.patch new file mode 100644 index 0000000..9f515c3 --- /dev/null +++ b/SOURCES/0065-bpf-add-btf-func-and-func_proto-kind-support.patch @@ -0,0 +1,61 @@ +From 5c940644dfc632f1270f39ee909e1abb877ff081 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Thu, 13 Jun 2019 14:37:57 +0200 +Subject: [PATCH] bpf: add btf func and func_proto kind support + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716361 +Upstream Status: iproute2.git commit 3da6d055d93fe + +commit 3da6d055d93fefe40bf88a9bc37b4ce3433696ee +Author: Yonghong Song +Date: Thu Jan 24 16:41:07 2019 -0800 + + bpf: add btf func and func_proto kind support + + The issue is discovered for bpf selftest test_skb_cgroup.sh. + Currently we have, + $ ./test_skb_cgroup_id.sh + Wait for testing link-local IP to become available ... OK + Object has unknown BTF type: 13! + [PASS] + + In the above the BTF type 13 refers to BTF kind + BTF_KIND_FUNC_PROTO. + This patch added support of BTF_KIND_FUNC_PROTO and + BTF_KIND_FUNC during type parsing. + With this patch, I got + $ ./test_skb_cgroup_id.sh + Wait for testing link-local IP to become available ... OK + [PASS] + + Signed-off-by: Yonghong Song + Acked-by: Daniel Borkmann + Signed-off-by: Stephen Hemminger +--- + lib/bpf.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/lib/bpf.c b/lib/bpf.c +index 5e85cfc0bdd5b..762f8857453ff 100644 +--- a/lib/bpf.c ++++ b/lib/bpf.c +@@ -2193,12 +2193,16 @@ static int bpf_btf_prep_type_data(struct bpf_elf_ctx *ctx) + case BTF_KIND_ENUM: + type_cur += var_len * sizeof(struct btf_enum); + break; ++ case BTF_KIND_FUNC_PROTO: ++ type_cur += var_len * sizeof(struct btf_param); ++ break; + case BTF_KIND_TYPEDEF: + case BTF_KIND_PTR: + case BTF_KIND_FWD: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: ++ case BTF_KIND_FUNC: + break; + default: + fprintf(stderr, "Object has unknown BTF type: %u!\n", kind); +-- +2.20.1 + diff --git a/SOURCES/0066-uapi-update-headers-to-4.20-rc1.patch b/SOURCES/0066-uapi-update-headers-to-4.20-rc1.patch new file mode 100644 index 0000000..9d67465 --- /dev/null +++ b/SOURCES/0066-uapi-update-headers-to-4.20-rc1.patch @@ -0,0 +1,282 @@ +From 3caa0fed6aa58a8f7a05486f98572878a8ad5b30 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Thu, 13 Jun 2019 14:37:57 +0200 +Subject: [PATCH] uapi: update headers to 4.20-rc1 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716361 +Upstream Status: iproute2.git commit 39776a8665f2d + +commit 39776a8665f2db1255ebed1f7cc992f69437bc36 +Author: Stephen Hemminger +Date: Mon Nov 5 08:37:41 2018 -0800 + + uapi: update headers to 4.20-rc1 + + Signed-off-by: Stephen Hemminger +--- + include/uapi/linux/bpf.h | 142 ++++++++++++++++++++++++++++++++++- + include/uapi/linux/elf-em.h | 1 + + include/uapi/linux/if_link.h | 1 + + include/uapi/linux/magic.h | 1 + + include/uapi/linux/netlink.h | 1 + + include/uapi/linux/sctp.h | 1 + + 6 files changed, 145 insertions(+), 2 deletions(-) + +diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h +index abb7f7748c2de..2bbe33db8aefa 100644 +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -103,6 +103,7 @@ enum bpf_cmd { + BPF_BTF_LOAD, + BPF_BTF_GET_FD_BY_ID, + BPF_TASK_FD_QUERY, ++ BPF_MAP_LOOKUP_AND_DELETE_ELEM, + }; + + enum bpf_map_type { +@@ -127,6 +128,9 @@ enum bpf_map_type { + BPF_MAP_TYPE_SOCKHASH, + BPF_MAP_TYPE_CGROUP_STORAGE, + BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, ++ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, ++ BPF_MAP_TYPE_QUEUE, ++ BPF_MAP_TYPE_STACK, + }; + + enum bpf_prog_type { +@@ -461,6 +465,28 @@ union bpf_attr { + * Return + * 0 on success, or a negative error in case of failure. + * ++ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) ++ * Description ++ * Push an element *value* in *map*. *flags* is one of: ++ * ++ * **BPF_EXIST** ++ * If the queue/stack is full, the oldest element is removed to ++ * make room for this. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_map_pop_elem(struct bpf_map *map, void *value) ++ * Description ++ * Pop an element from *map*. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_map_peek_elem(struct bpf_map *map, void *value) ++ * Description ++ * Get an element from *map* without removing it. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * + * int bpf_probe_read(void *dst, u32 size, const void *src) + * Description + * For tracing programs, safely attempt to read *size* bytes from +@@ -1432,7 +1458,7 @@ union bpf_attr { + * Return + * 0 on success, or a negative error in case of failure. + * +- * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags) ++ * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) + * Description + * Grow or shrink the room for data in the packet associated to + * *skb* by *len_diff*, and according to the selected *mode*. +@@ -2143,6 +2169,94 @@ union bpf_attr { + * request in the skb. + * Return + * 0 on success, or a negative error in case of failure. ++ * ++ * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) ++ * Description ++ * Look for TCP socket matching *tuple*, optionally in a child ++ * network namespace *netns*. The return value must be checked, ++ * and if non-NULL, released via **bpf_sk_release**\ (). ++ * ++ * The *ctx* should point to the context of the program, such as ++ * the skb or socket (depending on the hook in use). This is used ++ * to determine the base network namespace for the lookup. ++ * ++ * *tuple_size* must be one of: ++ * ++ * **sizeof**\ (*tuple*\ **->ipv4**) ++ * Look for an IPv4 socket. ++ * **sizeof**\ (*tuple*\ **->ipv6**) ++ * Look for an IPv6 socket. ++ * ++ * If the *netns* is zero, then the socket lookup table in the ++ * netns associated with the *ctx* will be used. For the TC hooks, ++ * this in the netns of the device in the skb. For socket hooks, ++ * this in the netns of the socket. If *netns* is non-zero, then ++ * it specifies the ID of the netns relative to the netns ++ * associated with the *ctx*. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * This helper is available only if the kernel was compiled with ++ * **CONFIG_NET** configuration option. ++ * Return ++ * Pointer to *struct bpf_sock*, or NULL in case of failure. ++ * ++ * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) ++ * Description ++ * Look for UDP socket matching *tuple*, optionally in a child ++ * network namespace *netns*. The return value must be checked, ++ * and if non-NULL, released via **bpf_sk_release**\ (). ++ * ++ * The *ctx* should point to the context of the program, such as ++ * the skb or socket (depending on the hook in use). This is used ++ * to determine the base network namespace for the lookup. ++ * ++ * *tuple_size* must be one of: ++ * ++ * **sizeof**\ (*tuple*\ **->ipv4**) ++ * Look for an IPv4 socket. ++ * **sizeof**\ (*tuple*\ **->ipv6**) ++ * Look for an IPv6 socket. ++ * ++ * If the *netns* is zero, then the socket lookup table in the ++ * netns associated with the *ctx* will be used. For the TC hooks, ++ * this in the netns of the device in the skb. For socket hooks, ++ * this in the netns of the socket. If *netns* is non-zero, then ++ * it specifies the ID of the netns relative to the netns ++ * associated with the *ctx*. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * This helper is available only if the kernel was compiled with ++ * **CONFIG_NET** configuration option. ++ * Return ++ * Pointer to *struct bpf_sock*, or NULL in case of failure. ++ * ++ * int bpf_sk_release(struct bpf_sock *sk) ++ * Description ++ * Release the reference held by *sock*. *sock* must be a non-NULL ++ * pointer that was returned from bpf_sk_lookup_xxx\ (). ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) ++ * Description ++ * For socket policies, insert *len* bytes into msg at offset ++ * *start*. ++ * ++ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a ++ * *msg* it may want to insert metadata or options into the msg. ++ * This can later be read and used by any of the lower layer BPF ++ * hooks. ++ * ++ * This helper may fail if under memory pressure (a malloc ++ * fails) in these cases BPF programs will get an appropriate ++ * error and BPF programs will need to handle them. ++ * ++ * Return ++ * 0 on success, or a negative error in case of failure. + */ + #define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ +@@ -2228,7 +2342,14 @@ union bpf_attr { + FN(get_current_cgroup_id), \ + FN(get_local_storage), \ + FN(sk_select_reuseport), \ +- FN(skb_ancestor_cgroup_id), ++ FN(skb_ancestor_cgroup_id), \ ++ FN(sk_lookup_tcp), \ ++ FN(sk_lookup_udp), \ ++ FN(sk_release), \ ++ FN(map_push_elem), \ ++ FN(map_pop_elem), \ ++ FN(map_peek_elem), \ ++ FN(msg_push_data), + + /* integer value in 'imm' field of BPF_CALL instruction selects which helper + * function eBPF program intends to call +@@ -2398,6 +2519,23 @@ struct bpf_sock { + */ + }; + ++struct bpf_sock_tuple { ++ union { ++ struct { ++ __be32 saddr; ++ __be32 daddr; ++ __be16 sport; ++ __be16 dport; ++ } ipv4; ++ struct { ++ __be32 saddr[4]; ++ __be32 daddr[4]; ++ __be16 sport; ++ __be16 dport; ++ } ipv6; ++ }; ++}; ++ + #define XDP_PACKET_HEADROOM 256 + + /* User return codes for XDP prog type. +diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h +index 31aa101783351..93722e60204c6 100644 +--- a/include/uapi/linux/elf-em.h ++++ b/include/uapi/linux/elf-em.h +@@ -41,6 +41,7 @@ + #define EM_TILEPRO 188 /* Tilera TILEPro */ + #define EM_MICROBLAZE 189 /* Xilinx MicroBlaze */ + #define EM_TILEGX 191 /* Tilera TILE-Gx */ ++#define EM_RISCV 243 /* RISC-V */ + #define EM_BPF 247 /* Linux BPF - in-kernel virtual machine */ + #define EM_FRV 0x5441 /* Fujitsu FR-V */ + +diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h +index 9c254603ebdad..52e95197e0790 100644 +--- a/include/uapi/linux/if_link.h ++++ b/include/uapi/linux/if_link.h +@@ -285,6 +285,7 @@ enum { + IFLA_BR_MCAST_STATS_ENABLED, + IFLA_BR_MCAST_IGMP_VERSION, + IFLA_BR_MCAST_MLD_VERSION, ++ IFLA_BR_VLAN_STATS_PER_PORT, + __IFLA_BR_MAX, + }; + +diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h +index 1a6fee974116a..96c24478d8ced 100644 +--- a/include/uapi/linux/magic.h ++++ b/include/uapi/linux/magic.h +@@ -29,6 +29,7 @@ + #define HPFS_SUPER_MAGIC 0xf995e849 + #define ISOFS_SUPER_MAGIC 0x9660 + #define JFFS2_SUPER_MAGIC 0x72b6 ++#define XFS_SUPER_MAGIC 0x58465342 /* "XFSB" */ + #define PSTOREFS_MAGIC 0x6165676C + #define EFIVARFS_MAGIC 0xde5e81e4 + #define HOSTFS_SUPER_MAGIC 0x00c0ffee +diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h +index 0b2c29bd081fa..2966171b9b95c 100644 +--- a/include/uapi/linux/netlink.h ++++ b/include/uapi/linux/netlink.h +@@ -153,6 +153,7 @@ enum nlmsgerr_attrs { + #define NETLINK_LIST_MEMBERSHIPS 9 + #define NETLINK_CAP_ACK 10 + #define NETLINK_EXT_ACK 11 ++#define NETLINK_DUMP_STRICT_CHK 12 + + struct nl_pktinfo { + __u32 group; +diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h +index dd164d7f4f41a..626480b68fb59 100644 +--- a/include/uapi/linux/sctp.h ++++ b/include/uapi/linux/sctp.h +@@ -301,6 +301,7 @@ enum sctp_sinfo_flags { + SCTP_SACK_IMMEDIATELY = (1 << 3), /* SACK should be sent without delay. */ + /* 2 bits here have been used by SCTP_PR_SCTP_MASK */ + SCTP_SENDALL = (1 << 6), ++ SCTP_PR_SCTP_ALL = (1 << 7), + SCTP_NOTIFICATION = MSG_NOTIFICATION, /* Next message is not user msg but notification. */ + SCTP_EOF = MSG_FIN, /* Initiate graceful shutdown process. */ + }; +-- +2.20.1 + diff --git a/SOURCES/0067-uapi-update-bpf-header.patch b/SOURCES/0067-uapi-update-bpf-header.patch new file mode 100644 index 0000000..22c447c --- /dev/null +++ b/SOURCES/0067-uapi-update-bpf-header.patch @@ -0,0 +1,155 @@ +From 415044d7e6f956daec990a7ae358f9f324bd2dcd Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Thu, 13 Jun 2019 14:37:57 +0200 +Subject: [PATCH] uapi: update bpf header + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716361 +Upstream Status: iproute2.git commit 028766aed21a4 + +commit 028766aed21a4d8eb2e60c9ef667f75f9354a104 +Author: Stephen Hemminger +Date: Mon Dec 10 09:22:23 2018 -0800 + + uapi: update bpf header + + Changes from 4.20-rc6 + + Signed-off-by: Stephen Hemminger +--- + include/uapi/linux/bpf.h | 56 ++++++++++++++++++++++++++-------------- + 1 file changed, 37 insertions(+), 19 deletions(-) + +diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h +index 2bbe33db8aefa..ff651ca661308 100644 +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -2170,7 +2170,7 @@ union bpf_attr { + * Return + * 0 on success, or a negative error in case of failure. + * +- * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) ++ * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) + * Description + * Look for TCP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, +@@ -2187,12 +2187,14 @@ union bpf_attr { + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * +- * If the *netns* is zero, then the socket lookup table in the +- * netns associated with the *ctx* will be used. For the TC hooks, +- * this in the netns of the device in the skb. For socket hooks, +- * this in the netns of the socket. If *netns* is non-zero, then +- * it specifies the ID of the netns relative to the netns +- * associated with the *ctx*. ++ * If the *netns* is a negative signed 32-bit integer, then the ++ * socket lookup table in the netns associated with the *ctx* will ++ * will be used. For the TC hooks, this is the netns of the device ++ * in the skb. For socket hooks, this is the netns of the socket. ++ * If *netns* is any other signed 32-bit value greater than or ++ * equal to zero then it specifies the ID of the netns relative to ++ * the netns associated with the *ctx*. *netns* values beyond the ++ * range of 32-bit integers are reserved for future use. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. +@@ -2201,8 +2203,10 @@ union bpf_attr { + * **CONFIG_NET** configuration option. + * Return + * Pointer to *struct bpf_sock*, or NULL in case of failure. ++ * For sockets with reuseport option, the *struct bpf_sock* ++ * result is from reuse->socks[] using the hash of the tuple. + * +- * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) ++ * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) + * Description + * Look for UDP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, +@@ -2219,12 +2223,14 @@ union bpf_attr { + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * +- * If the *netns* is zero, then the socket lookup table in the +- * netns associated with the *ctx* will be used. For the TC hooks, +- * this in the netns of the device in the skb. For socket hooks, +- * this in the netns of the socket. If *netns* is non-zero, then +- * it specifies the ID of the netns relative to the netns +- * associated with the *ctx*. ++ * If the *netns* is a negative signed 32-bit integer, then the ++ * socket lookup table in the netns associated with the *ctx* will ++ * will be used. For the TC hooks, this is the netns of the device ++ * in the skb. For socket hooks, this is the netns of the socket. ++ * If *netns* is any other signed 32-bit value greater than or ++ * equal to zero then it specifies the ID of the netns relative to ++ * the netns associated with the *ctx*. *netns* values beyond the ++ * range of 32-bit integers are reserved for future use. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. +@@ -2233,6 +2239,8 @@ union bpf_attr { + * **CONFIG_NET** configuration option. + * Return + * Pointer to *struct bpf_sock*, or NULL in case of failure. ++ * For sockets with reuseport option, the *struct bpf_sock* ++ * result is from reuse->socks[] using the hash of the tuple. + * + * int bpf_sk_release(struct bpf_sock *sk) + * Description +@@ -2405,6 +2413,9 @@ enum bpf_func_id { + /* BPF_FUNC_perf_event_output for sk_buff input context. */ + #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) + ++/* Current network namespace */ ++#define BPF_F_CURRENT_NETNS (-1L) ++ + /* Mode for BPF_FUNC_skb_adjust_room helper. */ + enum bpf_adj_room_mode { + BPF_ADJ_ROOM_NET, +@@ -2422,6 +2433,12 @@ enum bpf_lwt_encap_mode { + BPF_LWT_ENCAP_SEG6_INLINE + }; + ++#define __bpf_md_ptr(type, name) \ ++union { \ ++ type name; \ ++ __u64 :64; \ ++} __attribute__((aligned(8))) ++ + /* user accessible mirror of in-kernel sk_buff. + * new fields can only be added to the end of this structure + */ +@@ -2456,7 +2473,7 @@ struct __sk_buff { + /* ... here. */ + + __u32 data_meta; +- struct bpf_flow_keys *flow_keys; ++ __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); + }; + + struct bpf_tunnel_key { +@@ -2572,8 +2589,8 @@ enum sk_action { + * be added to the end of this structure + */ + struct sk_msg_md { +- void *data; +- void *data_end; ++ __bpf_md_ptr(void *, data); ++ __bpf_md_ptr(void *, data_end); + + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ +@@ -2589,8 +2606,9 @@ struct sk_reuseport_md { + * Start of directly accessible data. It begins from + * the tcp/udp header. + */ +- void *data; +- void *data_end; /* End of directly accessible data */ ++ __bpf_md_ptr(void *, data); ++ /* End of directly accessible data */ ++ __bpf_md_ptr(void *, data_end); + /* + * Total length of packet (starting from the tcp/udp header). + * Note that the directly accessible bytes (data_end - data) +-- +2.20.1 + diff --git a/SOURCES/0068-Update-kernel-headers.patch b/SOURCES/0068-Update-kernel-headers.patch new file mode 100644 index 0000000..741afb5 --- /dev/null +++ b/SOURCES/0068-Update-kernel-headers.patch @@ -0,0 +1,492 @@ +From 356758b3303ab24b6fe8dccf94ed98ed7cbad224 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Thu, 13 Jun 2019 14:37:57 +0200 +Subject: [PATCH] Update kernel headers + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716361 +Upstream Status: iproute2.git commit 17689d3075c8b + +commit 17689d3075c8b9a29b8f398a57defb9dcabafe81 +Author: David Ahern +Date: Wed Dec 19 12:47:29 2018 -0800 + + Update kernel headers + + Update kernel headers to commit + 055722716c39 ("tipc: fix uninitialized value for broadcast retransmission") + + Signed-off-by: David Ahern +--- + include/uapi/linux/bpf.h | 175 +++++++++++++++++++++-------- + include/uapi/linux/btf.h | 18 ++- + include/uapi/linux/if_bridge.h | 21 ++++ + include/uapi/linux/if_link.h | 1 + + include/uapi/linux/if_tun.h | 1 + + include/uapi/linux/neighbour.h | 1 + + include/uapi/linux/net_namespace.h | 2 + + include/uapi/linux/snmp.h | 1 + + 8 files changed, 171 insertions(+), 49 deletions(-) + +diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h +index ff651ca661308..960a7f0a4d99b 100644 +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -232,6 +232,20 @@ enum bpf_attach_type { + */ + #define BPF_F_STRICT_ALIGNMENT (1U << 0) + ++/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the ++ * verifier will allow any alignment whatsoever. On platforms ++ * with strict alignment requirements for loads ands stores (such ++ * as sparc and mips) the verifier validates that all loads and ++ * stores provably follow this requirement. This flag turns that ++ * checking and enforcement off. ++ * ++ * It is mostly used for testing when we want to validate the ++ * context and memory access aspects of the verifier, but because ++ * of an unaligned access the alignment check would trigger before ++ * the one we are interested in. ++ */ ++#define BPF_F_ANY_ALIGNMENT (1U << 1) ++ + /* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ + #define BPF_PSEUDO_MAP_FD 1 + +@@ -257,9 +271,6 @@ enum bpf_attach_type { + /* Specify numa node during map creation */ + #define BPF_F_NUMA_NODE (1U << 2) + +-/* flags for BPF_PROG_QUERY */ +-#define BPF_F_QUERY_EFFECTIVE (1U << 0) +- + #define BPF_OBJ_NAME_LEN 16U + + /* Flags for accessing BPF object */ +@@ -269,6 +280,12 @@ enum bpf_attach_type { + /* Flag for stack_map, store build_id+offset instead of pointer */ + #define BPF_F_STACK_BUILD_ID (1U << 5) + ++/* Zero-initialize hash function seed. This should only be used for testing. */ ++#define BPF_F_ZERO_SEED (1U << 6) ++ ++/* flags for BPF_PROG_QUERY */ ++#define BPF_F_QUERY_EFFECTIVE (1U << 0) ++ + enum bpf_stack_build_id_status { + /* user space need an empty entry to identify end of a trace */ + BPF_STACK_BUILD_ID_EMPTY = 0, +@@ -335,6 +352,13 @@ union bpf_attr { + * (context accesses, allowed helpers, etc). + */ + __u32 expected_attach_type; ++ __u32 prog_btf_fd; /* fd pointing to BTF type data */ ++ __u32 func_info_rec_size; /* userspace bpf_func_info size */ ++ __aligned_u64 func_info; /* func info */ ++ __u32 func_info_cnt; /* number of bpf_func_info records */ ++ __u32 line_info_rec_size; /* userspace bpf_line_info size */ ++ __aligned_u64 line_info; /* line info */ ++ __u32 line_info_cnt; /* number of bpf_line_info records */ + }; + + struct { /* anonymous struct used by BPF_OBJ_* commands */ +@@ -353,8 +377,11 @@ union bpf_attr { + struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ + __u32 prog_fd; + __u32 retval; +- __u32 data_size_in; +- __u32 data_size_out; ++ __u32 data_size_in; /* input: len of data_in */ ++ __u32 data_size_out; /* input/output: len of data_out ++ * returns ENOSPC if data_out ++ * is too small. ++ */ + __aligned_u64 data_in; + __aligned_u64 data_out; + __u32 repeat; +@@ -475,18 +502,6 @@ union bpf_attr { + * Return + * 0 on success, or a negative error in case of failure. + * +- * int bpf_map_pop_elem(struct bpf_map *map, void *value) +- * Description +- * Pop an element from *map*. +- * Return +- * 0 on success, or a negative error in case of failure. +- * +- * int bpf_map_peek_elem(struct bpf_map *map, void *value) +- * Description +- * Get an element from *map* without removing it. +- * Return +- * 0 on success, or a negative error in case of failure. +- * + * int bpf_probe_read(void *dst, u32 size, const void *src) + * Description + * For tracing programs, safely attempt to read *size* bytes from +@@ -1910,9 +1925,9 @@ union bpf_attr { + * is set to metric from route (IPv4/IPv6 only), and ifindex + * is set to the device index of the nexthop from the FIB lookup. + * +- * *plen* argument is the size of the passed in struct. +- * *flags* argument can be a combination of one or more of the +- * following values: ++ * *plen* argument is the size of the passed in struct. ++ * *flags* argument can be a combination of one or more of the ++ * following values: + * + * **BPF_FIB_LOOKUP_DIRECT** + * Do a direct table lookup vs full lookup using FIB +@@ -1921,9 +1936,9 @@ union bpf_attr { + * Perform lookup from an egress perspective (default is + * ingress). + * +- * *ctx* is either **struct xdp_md** for XDP programs or +- * **struct sk_buff** tc cls_act programs. +- * Return ++ * *ctx* is either **struct xdp_md** for XDP programs or ++ * **struct sk_buff** tc cls_act programs. ++ * Return + * * < 0 if any input argument is invalid + * * 0 on success (packet is forwarded, nexthop neighbor exists) + * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the +@@ -2068,8 +2083,8 @@ union bpf_attr { + * translated to a keycode using the rc keymap, and reported as + * an input key down event. After a period a key up event is + * generated. This period can be extended by calling either +- * **bpf_rc_keydown** () again with the same values, or calling +- * **bpf_rc_repeat** (). ++ * **bpf_rc_keydown**\ () again with the same values, or calling ++ * **bpf_rc_repeat**\ (). + * + * Some protocols include a toggle bit, in case the button was + * released and pressed again between consecutive scancodes. +@@ -2152,21 +2167,22 @@ union bpf_attr { + * The *flags* meaning is specific for each map type, + * and has to be 0 for cgroup local storage. + * +- * Depending on the bpf program type, a local storage area +- * can be shared between multiple instances of the bpf program, ++ * Depending on the BPF program type, a local storage area ++ * can be shared between multiple instances of the BPF program, + * running simultaneously. + * + * A user should care about the synchronization by himself. +- * For example, by using the BPF_STX_XADD instruction to alter ++ * For example, by using the **BPF_STX_XADD** instruction to alter + * the shared data. + * Return +- * Pointer to the local storage area. ++ * A pointer to the local storage area. + * + * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) + * Description +- * Select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY map +- * It checks the selected sk is matching the incoming +- * request in the skb. ++ * Select a **SO_REUSEPORT** socket from a ++ * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. ++ * It checks the selected socket is matching the incoming ++ * request in the socket buffer. + * Return + * 0 on success, or a negative error in case of failure. + * +@@ -2174,7 +2190,7 @@ union bpf_attr { + * Description + * Look for TCP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, +- * and if non-NULL, released via **bpf_sk_release**\ (). ++ * and if non-**NULL**, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used +@@ -2202,15 +2218,15 @@ union bpf_attr { + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return +- * Pointer to *struct bpf_sock*, or NULL in case of failure. +- * For sockets with reuseport option, the *struct bpf_sock* +- * result is from reuse->socks[] using the hash of the tuple. ++ * Pointer to **struct bpf_sock**, or **NULL** in case of failure. ++ * For sockets with reuseport option, the **struct bpf_sock** ++ * result is from **reuse->socks**\ [] using the hash of the tuple. + * + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) + * Description + * Look for UDP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, +- * and if non-NULL, released via **bpf_sk_release**\ (). ++ * and if non-**NULL**, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used +@@ -2238,33 +2254,71 @@ union bpf_attr { + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return +- * Pointer to *struct bpf_sock*, or NULL in case of failure. +- * For sockets with reuseport option, the *struct bpf_sock* +- * result is from reuse->socks[] using the hash of the tuple. ++ * Pointer to **struct bpf_sock**, or **NULL** in case of failure. ++ * For sockets with reuseport option, the **struct bpf_sock** ++ * result is from **reuse->socks**\ [] using the hash of the tuple. + * +- * int bpf_sk_release(struct bpf_sock *sk) ++ * int bpf_sk_release(struct bpf_sock *sock) + * Description +- * Release the reference held by *sock*. *sock* must be a non-NULL +- * pointer that was returned from bpf_sk_lookup_xxx\ (). ++ * Release the reference held by *sock*. *sock* must be a ++ * non-**NULL** pointer that was returned from ++ * **bpf_sk_lookup_xxx**\ (). + * Return + * 0 on success, or a negative error in case of failure. + * ++ * int bpf_map_pop_elem(struct bpf_map *map, void *value) ++ * Description ++ * Pop an element from *map*. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_map_peek_elem(struct bpf_map *map, void *value) ++ * Description ++ * Get an element from *map* without removing it. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * + * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) + * Description +- * For socket policies, insert *len* bytes into msg at offset ++ * For socket policies, insert *len* bytes into *msg* at offset + * *start*. + * + * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a +- * *msg* it may want to insert metadata or options into the msg. ++ * *msg* it may want to insert metadata or options into the *msg*. + * This can later be read and used by any of the lower layer BPF + * hooks. + * + * This helper may fail if under memory pressure (a malloc + * fails) in these cases BPF programs will get an appropriate + * error and BPF programs will need to handle them. ++ * Return ++ * 0 on success, or a negative error in case of failure. + * ++ * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags) ++ * Description ++ * Will remove *pop* bytes from a *msg* starting at byte *start*. ++ * This may result in **ENOMEM** errors under certain situations if ++ * an allocation and copy are required due to a full ring buffer. ++ * However, the helper will try to avoid doing the allocation ++ * if possible. Other errors can occur if input parameters are ++ * invalid either due to *start* byte not being valid part of *msg* ++ * payload and/or *pop* value being to large. + * Return + * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) ++ * Description ++ * This helper is used in programs implementing IR decoding, to ++ * report a successfully decoded pointer movement. ++ * ++ * The *ctx* should point to the lirc sample as passed into ++ * the program. ++ * ++ * This helper is only available is the kernel was compiled with ++ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to ++ * "**y**". ++ * Return ++ * 0 + */ + #define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ +@@ -2357,7 +2411,9 @@ union bpf_attr { + FN(map_push_elem), \ + FN(map_pop_elem), \ + FN(map_peek_elem), \ +- FN(msg_push_data), ++ FN(msg_push_data), \ ++ FN(msg_pop_data), \ ++ FN(rc_pointer_rel), + + /* integer value in 'imm' field of BPF_CALL instruction selects which helper + * function eBPF program intends to call +@@ -2474,6 +2530,8 @@ struct __sk_buff { + + __u32 data_meta; + __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); ++ __u64 tstamp; ++ __u32 wire_len; + }; + + struct bpf_tunnel_key { +@@ -2649,6 +2707,16 @@ struct bpf_prog_info { + __u32 nr_jited_func_lens; + __aligned_u64 jited_ksyms; + __aligned_u64 jited_func_lens; ++ __u32 btf_id; ++ __u32 func_info_rec_size; ++ __aligned_u64 func_info; ++ __u32 nr_func_info; ++ __u32 nr_line_info; ++ __aligned_u64 line_info; ++ __aligned_u64 jited_line_info; ++ __u32 nr_jited_line_info; ++ __u32 line_info_rec_size; ++ __u32 jited_line_info_rec_size; + } __attribute__((aligned(8))); + + struct bpf_map_info { +@@ -2960,4 +3028,19 @@ struct bpf_flow_keys { + }; + }; + ++struct bpf_func_info { ++ __u32 insn_off; ++ __u32 type_id; ++}; ++ ++#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10) ++#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff) ++ ++struct bpf_line_info { ++ __u32 insn_off; ++ __u32 file_name_off; ++ __u32 line_off; ++ __u32 line_col; ++}; ++ + #endif /* __LINUX_BPF_H__ */ +diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h +index 8d2a8ffad56f9..f43d5a8e13d31 100644 +--- a/include/uapi/linux/btf.h ++++ b/include/uapi/linux/btf.h +@@ -40,7 +40,8 @@ struct btf_type { + /* "size" is used by INT, ENUM, STRUCT and UNION. + * "size" tells the size of the type it is describing. + * +- * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT. ++ * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, ++ * FUNC and FUNC_PROTO. + * "type" is a type_id referring to another type. + */ + union { +@@ -64,8 +65,10 @@ struct btf_type { + #define BTF_KIND_VOLATILE 9 /* Volatile */ + #define BTF_KIND_CONST 10 /* Const */ + #define BTF_KIND_RESTRICT 11 /* Restrict */ +-#define BTF_KIND_MAX 11 +-#define NR_BTF_KINDS 12 ++#define BTF_KIND_FUNC 12 /* Function */ ++#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ ++#define BTF_KIND_MAX 13 ++#define NR_BTF_KINDS 14 + + /* For some specific BTF_KIND, "struct btf_type" is immediately + * followed by extra data. +@@ -110,4 +113,13 @@ struct btf_member { + __u32 offset; /* offset in bits */ + }; + ++/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param". ++ * The exact number of btf_param is stored in the vlen (of the ++ * info in "struct btf_type"). ++ */ ++struct btf_param { ++ __u32 name_off; ++ __u32 type; ++}; ++ + #endif /* __LINUX_BTF_H__ */ +diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h +index bdfecf9411320..04f763cf53029 100644 +--- a/include/uapi/linux/if_bridge.h ++++ b/include/uapi/linux/if_bridge.h +@@ -292,4 +292,25 @@ struct br_mcast_stats { + __u64 mcast_bytes[BR_MCAST_DIR_SIZE]; + __u64 mcast_packets[BR_MCAST_DIR_SIZE]; + }; ++ ++/* bridge boolean options ++ * BR_BOOLOPT_NO_LL_LEARN - disable learning from link-local packets ++ * ++ * IMPORTANT: if adding a new option do not forget to handle ++ * it in br_boolopt_toggle/get and bridge sysfs ++ */ ++enum br_boolopt_id { ++ BR_BOOLOPT_NO_LL_LEARN, ++ BR_BOOLOPT_MAX ++}; ++ ++/* struct br_boolopt_multi - change multiple bridge boolean options ++ * ++ * @optval: new option values (bit per option) ++ * @optmask: options to change (bit per option) ++ */ ++struct br_boolopt_multi { ++ __u32 optval; ++ __u32 optmask; ++}; + #endif /* _LINUX_IF_BRIDGE_H */ +diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h +index 52e95197e0790..8e6087aee2c29 100644 +--- a/include/uapi/linux/if_link.h ++++ b/include/uapi/linux/if_link.h +@@ -286,6 +286,7 @@ enum { + IFLA_BR_MCAST_IGMP_VERSION, + IFLA_BR_MCAST_MLD_VERSION, + IFLA_BR_VLAN_STATS_PER_PORT, ++ IFLA_BR_MULTI_BOOLOPT, + __IFLA_BR_MAX, + }; + +diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h +index be9b744a16458..2f01165514a77 100644 +--- a/include/uapi/linux/if_tun.h ++++ b/include/uapi/linux/if_tun.h +@@ -59,6 +59,7 @@ + #define TUNGETVNETBE _IOR('T', 223, int) + #define TUNSETSTEERINGEBPF _IOR('T', 224, int) + #define TUNSETFILTEREBPF _IOR('T', 225, int) ++#define TUNSETCARRIER _IOW('T', 226, int) + + /* TUNSETIFF ifr flags */ + #define IFF_TUN 0x0001 +diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h +index 998155444e0db..cd144e3099a3c 100644 +--- a/include/uapi/linux/neighbour.h ++++ b/include/uapi/linux/neighbour.h +@@ -28,6 +28,7 @@ enum { + NDA_MASTER, + NDA_LINK_NETNSID, + NDA_SRC_VNI, ++ NDA_PROTOCOL, /* Originator of entry */ + __NDA_MAX + }; + +diff --git a/include/uapi/linux/net_namespace.h b/include/uapi/linux/net_namespace.h +index 6d64d0716800f..fa81f1e5ffa8f 100644 +--- a/include/uapi/linux/net_namespace.h ++++ b/include/uapi/linux/net_namespace.h +@@ -16,6 +16,8 @@ enum { + NETNSA_NSID, + NETNSA_PID, + NETNSA_FD, ++ NETNSA_TARGET_NSID, ++ NETNSA_CURRENT_NSID, + __NETNSA_MAX, + }; + +diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h +index f80135e5feaa8..86dc24a96c90a 100644 +--- a/include/uapi/linux/snmp.h ++++ b/include/uapi/linux/snmp.h +@@ -243,6 +243,7 @@ enum + LINUX_MIB_TCPREQQFULLDROP, /* TCPReqQFullDrop */ + LINUX_MIB_TCPRETRANSFAIL, /* TCPRetransFail */ + LINUX_MIB_TCPRCVCOALESCE, /* TCPRcvCoalesce */ ++ LINUX_MIB_TCPBACKLOGCOALESCE, /* TCPBacklogCoalesce */ + LINUX_MIB_TCPOFOQUEUE, /* TCPOFOQueue */ + LINUX_MIB_TCPOFODROP, /* TCPOFODrop */ + LINUX_MIB_TCPOFOMERGE, /* TCPOFOMerge */ +-- +2.20.1 + diff --git a/SOURCES/0069-ip-xfrm-Respect-family-in-deleteall-and-list-command.patch b/SOURCES/0069-ip-xfrm-Respect-family-in-deleteall-and-list-command.patch new file mode 100644 index 0000000..6871452 --- /dev/null +++ b/SOURCES/0069-ip-xfrm-Respect-family-in-deleteall-and-list-command.patch @@ -0,0 +1,104 @@ +From 2f95b860ca09f8dc798204514b06b69cdfa0bd61 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Fri, 14 Jun 2019 11:04:17 +0200 +Subject: [PATCH] ip-xfrm: Respect family in deleteall and list commands + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1656717 +Upstream Status: iproute2.git commit cd21ae40130b4 +Conflicts: on ip xfrm manpage due to missing commit a6af9f2e6195d + ("xfrm: add option to hide keys in state output") + +commit cd21ae40130b4d1ddb3ef500800840e35e7bfad1 +Author: Phil Sutter +Date: Mon May 6 19:09:56 2019 +0200 + + ip-xfrm: Respect family in deleteall and list commands + + Allow to limit 'ip xfrm {state|policy} list' output to a certain address + family and to delete all states/policies by family. + + Although preferred_family was already set in filters, the filter + function ignored it. To enable filtering despite the lack of other + selectors, filter.use has to be set if family is not AF_UNSPEC. + + Signed-off-by: Phil Sutter + Signed-off-by: Stephen Hemminger +--- + ip/xfrm_policy.c | 6 +++++- + ip/xfrm_state.c | 6 +++++- + man/man8/ip-xfrm.8 | 4 ++-- + 3 files changed, 12 insertions(+), 4 deletions(-) + +diff --git a/ip/xfrm_policy.c b/ip/xfrm_policy.c +index d54402691ca0a..5bb3e873d2e8c 100644 +--- a/ip/xfrm_policy.c ++++ b/ip/xfrm_policy.c +@@ -400,6 +400,10 @@ static int xfrm_policy_filter_match(struct xfrm_userpolicy_info *xpinfo, + if (!filter.use) + return 1; + ++ if (filter.xpinfo.sel.family != AF_UNSPEC && ++ filter.xpinfo.sel.family != xpinfo->sel.family) ++ return 0; ++ + if ((xpinfo->dir^filter.xpinfo.dir)&filter.dir_mask) + return 0; + +@@ -773,7 +777,7 @@ static int xfrm_policy_list_or_deleteall(int argc, char **argv, int deleteall) + char *selp = NULL; + struct rtnl_handle rth; + +- if (argc > 0) ++ if (argc > 0 || preferred_family != AF_UNSPEC) + filter.use = 1; + filter.xpinfo.sel.family = preferred_family; + +diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c +index 85d959cc4f44f..2441959e98992 100644 +--- a/ip/xfrm_state.c ++++ b/ip/xfrm_state.c +@@ -876,6 +876,10 @@ static int xfrm_state_filter_match(struct xfrm_usersa_info *xsinfo) + if (!filter.use) + return 1; + ++ if (filter.xsinfo.family != AF_UNSPEC && ++ filter.xsinfo.family != xsinfo->family) ++ return 0; ++ + if (filter.id_src_mask) + if (xfrm_addr_match(&xsinfo->saddr, &filter.xsinfo.saddr, + filter.id_src_mask)) +@@ -1140,7 +1144,7 @@ static int xfrm_state_list_or_deleteall(int argc, char **argv, int deleteall) + char *idp = NULL; + struct rtnl_handle rth; + +- if (argc > 0) ++ if (argc > 0 || preferred_family != AF_UNSPEC) + filter.use = 1; + filter.xsinfo.family = preferred_family; + +diff --git a/man/man8/ip-xfrm.8 b/man/man8/ip-xfrm.8 +index 988cc6aa61d14..d5b9f083147c4 100644 +--- a/man/man8/ip-xfrm.8 ++++ b/man/man8/ip-xfrm.8 +@@ -87,7 +87,7 @@ ip-xfrm \- transform configuration + .IR MASK " ] ]" + + .ti -8 +-.BR "ip xfrm state" " { " deleteall " | " list " } [" ++.BR ip " [ " -4 " | " -6 " ] " "xfrm state" " { " deleteall " | " list " } [" + .IR ID " ]" + .RB "[ " mode + .IR MODE " ]" +@@ -244,7 +244,7 @@ ip-xfrm \- transform configuration + .IR PTYPE " ]" + + .ti -8 +-.BR "ip xfrm policy" " { " deleteall " | " list " }" ++.BR ip " [ " -4 " | " -6 " ] " "xfrm policy" " { " deleteall " | " list " }" + .RB "[ " nosock " ]" + .RI "[ " SELECTOR " ]" + .RB "[ " dir +-- +2.20.1 + diff --git a/SOURCES/0070-ss-Review-ssfilter.patch b/SOURCES/0070-ss-Review-ssfilter.patch new file mode 100644 index 0000000..f3801a0 --- /dev/null +++ b/SOURCES/0070-ss-Review-ssfilter.patch @@ -0,0 +1,137 @@ +From da77e40e234599218a3d61434abb5af2815d72a7 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Sun, 2 Jun 2019 16:06:23 +0200 +Subject: [PATCH] ss: Review ssfilter + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1698401 +Upstream Status: iproute2.git commit 38d209ecf2ae9 + +commit 38d209ecf2ae966b9b25de4acb60cdffb0e06ced +Author: Phil Sutter +Date: Tue Aug 14 14:18:06 2018 +0200 + + ss: Review ssfilter + + The original problem was ssfilter rejecting single expressions if + enclosed in braces, such as: + + | sport = 22 or ( dport = 22 ) + + This is fixed by allowing 'expr' to be an 'exprlist' enclosed in braces. + The no longer required recursion in 'exprlist' being an 'exprlist' + enclosed in braces is dropped. + + In addition to that, a few other things are changed: + + * Remove pointless 'null' prefix in 'appled' before 'exprlist'. + * For simple equals matches, '=' operator was required for ports but not + allowed for hosts. Make this consistent by making '=' operator + optional in both cases. + + Reported-by: Samuel Mannehed + Fixes: b2038cc0b2403 ("ssfilter: Eliminate shift/reduce conflicts") + Signed-off-by: Phil Sutter + Signed-off-by: Stephen Hemminger +--- + misc/ssfilter.y | 36 +++++++++++++++++++++--------------- + 1 file changed, 21 insertions(+), 15 deletions(-) + +diff --git a/misc/ssfilter.y b/misc/ssfilter.y +index 88d4229a9b241..0413dddaa7584 100644 +--- a/misc/ssfilter.y ++++ b/misc/ssfilter.y +@@ -42,24 +42,22 @@ static void yyerror(char *s) + %nonassoc '!' + + %% +-applet: null exprlist ++applet: exprlist + { +- *yy_ret = $2; +- $$ = $2; ++ *yy_ret = $1; ++ $$ = $1; + } + | null + ; ++ + null: /* NOTHING */ { $$ = NULL; } + ; ++ + exprlist: expr + | '!' expr + { + $$ = alloc_node(SSF_NOT, $2); + } +- | '(' exprlist ')' +- { +- $$ = $2; +- } + | exprlist '|' expr + { + $$ = alloc_node(SSF_OR, $1); +@@ -77,13 +75,21 @@ exprlist: expr + } + ; + +-expr: DCOND HOSTCOND ++eq: '=' ++ | /* nothing */ ++ ; ++ ++expr: '(' exprlist ')' ++ { ++ $$ = $2; ++ } ++ | DCOND eq HOSTCOND + { +- $$ = alloc_node(SSF_DCOND, $2); ++ $$ = alloc_node(SSF_DCOND, $3); + } +- | SCOND HOSTCOND ++ | SCOND eq HOSTCOND + { +- $$ = alloc_node(SSF_SCOND, $2); ++ $$ = alloc_node(SSF_SCOND, $3); + } + | DPORT GEQ HOSTCOND + { +@@ -101,7 +107,7 @@ expr: DCOND HOSTCOND + { + $$ = alloc_node(SSF_NOT, alloc_node(SSF_D_GE, $3)); + } +- | DPORT '=' HOSTCOND ++ | DPORT eq HOSTCOND + { + $$ = alloc_node(SSF_DCOND, $3); + } +@@ -126,7 +132,7 @@ expr: DCOND HOSTCOND + { + $$ = alloc_node(SSF_NOT, alloc_node(SSF_S_GE, $3)); + } +- | SPORT '=' HOSTCOND ++ | SPORT eq HOSTCOND + { + $$ = alloc_node(SSF_SCOND, $3); + } +@@ -134,7 +140,7 @@ expr: DCOND HOSTCOND + { + $$ = alloc_node(SSF_NOT, alloc_node(SSF_SCOND, $3)); + } +- | DEVNAME '=' DEVCOND ++ | DEVNAME eq DEVCOND + { + $$ = alloc_node(SSF_DEVCOND, $3); + } +@@ -142,7 +148,7 @@ expr: DCOND HOSTCOND + { + $$ = alloc_node(SSF_NOT, alloc_node(SSF_DEVCOND, $3)); + } +- | FWMARK '=' MARKMASK ++ | FWMARK eq MARKMASK + { + $$ = alloc_node(SSF_MARKMASK, $3); + } +-- +2.20.1 + diff --git a/SOURCES/0071-ip-reset-netns-after-each-command-in-batch-mode.patch b/SOURCES/0071-ip-reset-netns-after-each-command-in-batch-mode.patch new file mode 100644 index 0000000..978daf4 --- /dev/null +++ b/SOURCES/0071-ip-reset-netns-after-each-command-in-batch-mode.patch @@ -0,0 +1,186 @@ +From e94d7e4519668e840f1c768a569486eebdc3825d Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Tue, 25 Jun 2019 19:03:18 +0200 +Subject: [PATCH] ip: reset netns after each command in batch mode + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1671016 +Upstream Status: iproute2.git commit 80a931d41c058 +Conflicts: on ip/ipnetns.c due to missing commit e3dbcb2a12ab1 + ("netns: add subcommand to attach an existing network namespace") + +commit 80a931d41c0587a4f5bfc1ece7eddac3442b5d9a +Author: Matteo Croce +Date: Fri Jun 7 22:41:22 2019 +0200 + + ip: reset netns after each command in batch mode + + When creating a new netns or executing a program into an existing one, + the unshare() or setns() calls will change the current netns. + In batch mode, this can run commands on the wrong interfaces, as the + ifindex value is meaningful only in the current netns. For example, this + command fails because veth-c doesn't exists in the init netns: + + # ip -b - <<-'EOF' + netns add client + link add name veth-c type veth peer veth-s netns client + addr add 192.168.2.1/24 dev veth-c + EOF + Cannot find device "veth-c" + Command failed -:7 + + But if there are two devices with the same name in the init and new netns, + ip will build a wrong ll_map with indexes belonging to the new netns, + and will execute actions in the init netns using this wrong mapping. + This script will flush all eth0 addresses and bring it down, as it has + the same ifindex of veth0 in the new netns: + + # ip addr + 1: lo: mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000 + link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 + inet 127.0.0.1/8 scope host lo + valid_lft forever preferred_lft forever + 2: eth0: mtu 1500 qdisc mq state UP group default qlen 1000 + link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff + inet 192.168.122.76/24 brd 192.168.122.255 scope global dynamic eth0 + valid_lft 3598sec preferred_lft 3598sec + + # ip -b - <<-'EOF' + netns add client + link add name veth0 type veth peer name veth1 + link add name veth-ns type veth peer name veth0 netns client + link set veth0 down + address flush veth0 + EOF + + # ip addr + 1: lo: mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000 + link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 + inet 127.0.0.1/8 scope host lo + valid_lft forever preferred_lft forever + 2: eth0: mtu 1500 qdisc mq state DOWN group default qlen 1000 + link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff + 3: veth1@veth0: mtu 1500 qdisc noop state DOWN group default qlen 1000 + link/ether c2:db:d0:34:13:4a brd ff:ff:ff:ff:ff:ff + 4: veth0@veth1: mtu 1500 qdisc noop state DOWN group default qlen 1000 + link/ether ca:9d:6b:5f:5f:8f brd ff:ff:ff:ff:ff:ff + 5: veth-ns@if2: mtu 1500 qdisc noop state DOWN group default qlen 1000 + link/ether 32:ef:22:df:51:0a brd ff:ff:ff:ff:ff:ff link-netns client + + The same issue can be triggered by the netns exec subcommand with a + sligthy different script: + + # ip netns add client + # ip -b - <<-'EOF' + netns exec client true + link add name veth0 type veth peer name veth1 + link add name veth-ns type veth peer name veth0 netns client + link set veth0 down + address flush veth0 + EOF + + Fix this by adding two netns_{save,reset} functions, which are used + to get a file descriptor for the init netns, and restore it after + each batch command. + netns_save() is called before the unshare() or setns(), + while netns_restore() is called after each command. + + Fixes: 0dc34c7713bb ("iproute2: Add processless network namespace support") + Reviewed-and-tested-by: Andrea Claudi + Signed-off-by: Matteo Croce + Signed-off-by: Stephen Hemminger +--- + include/namespace.h | 2 ++ + ip/ip.c | 1 + + ip/ipnetns.c | 1 + + lib/namespace.c | 31 +++++++++++++++++++++++++++++++ + 4 files changed, 35 insertions(+) + +diff --git a/include/namespace.h b/include/namespace.h +index e47f9b5d49d12..89cdda11782e8 100644 +--- a/include/namespace.h ++++ b/include/namespace.h +@@ -49,6 +49,8 @@ static inline int setns(int fd, int nstype) + } + #endif /* HAVE_SETNS */ + ++void netns_save(void); ++void netns_restore(void); + int netns_switch(char *netns); + int netns_get_fd(const char *netns); + int netns_foreach(int (*func)(char *nsname, void *arg), void *arg); +diff --git a/ip/ip.c b/ip/ip.c +index 2ca55e37a4c62..6e8230b3ee584 100644 +--- a/ip/ip.c ++++ b/ip/ip.c +@@ -158,6 +158,7 @@ static int batch(const char *name) + if (!force) + break; + } ++ netns_restore(); + } + if (line) + free(line); +diff --git a/ip/ipnetns.c b/ip/ipnetns.c +index 368be0cbc0a48..a6e3ea575c363 100644 +--- a/ip/ipnetns.c ++++ b/ip/ipnetns.c +@@ -689,6 +689,7 @@ static int netns_add(int argc, char **argv) + return -1; + } + close(fd); ++ netns_save(); + if (unshare(CLONE_NEWNET) < 0) { + fprintf(stderr, "Failed to create a new network namespace \"%s\": %s\n", + name, strerror(errno)); +diff --git a/lib/namespace.c b/lib/namespace.c +index 06ae0a48c2243..a2aea57ad4109 100644 +--- a/lib/namespace.c ++++ b/lib/namespace.c +@@ -15,6 +15,35 @@ + #include "utils.h" + #include "namespace.h" + ++static int saved_netns = -1; ++ ++/* Obtain a FD for the current namespace, so we can reenter it later */ ++void netns_save(void) ++{ ++ if (saved_netns != -1) ++ return; ++ ++ saved_netns = open("/proc/self/ns/net", O_RDONLY | O_CLOEXEC); ++ if (saved_netns == -1) { ++ perror("Cannot open init namespace"); ++ exit(1); ++ } ++} ++ ++void netns_restore(void) ++{ ++ if (saved_netns == -1) ++ return; ++ ++ if (setns(saved_netns, CLONE_NEWNET)) { ++ perror("setns"); ++ exit(1); ++ } ++ ++ close(saved_netns); ++ saved_netns = -1; ++} ++ + static void bind_etc(const char *name) + { + char etc_netns_path[sizeof(NETNS_ETC_DIR) + NAME_MAX]; +@@ -61,6 +90,8 @@ int netns_switch(char *name) + return -1; + } + ++ netns_save(); ++ + if (setns(netns, CLONE_NEWNET) < 0) { + fprintf(stderr, "setting the network namespace \"%s\" failed: %s\n", + name, strerror(errno)); +-- +2.20.1 + diff --git a/SOURCES/0072-tc-introduce-support-for-chain-templates.patch b/SOURCES/0072-tc-introduce-support-for-chain-templates.patch new file mode 100644 index 0000000..1ff31cc --- /dev/null +++ b/SOURCES/0072-tc-introduce-support-for-chain-templates.patch @@ -0,0 +1,379 @@ +From 24ad28e010f1888e431631ef2179f9284b4aed43 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 26 Jun 2019 16:59:54 +0200 +Subject: [PATCH] tc: introduce support for chain templates + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1710291 +Upstream Status: iproute2.git commit afcd06991db26 + +commit afcd06991db267db2d0d4733da34c5c508d30532 +Author: Jiri Pirko +Date: Mon Jul 23 09:24:40 2018 +0200 + + tc: introduce support for chain templates + + Signed-off-by: Jiri Pirko + Signed-off-by: David Ahern +--- + man/man8/tc.8 | 26 ++++++++++ + tc/tc.c | 5 +- + tc/tc_common.h | 1 + + tc/tc_filter.c | 131 ++++++++++++++++++++++++++++++++++-------------- + tc/tc_monitor.c | 5 +- + 5 files changed, 128 insertions(+), 40 deletions(-) + +diff --git a/man/man8/tc.8 b/man/man8/tc.8 +index 840880fbdba63..8a50f57fbfb2d 100644 +--- a/man/man8/tc.8 ++++ b/man/man8/tc.8 +@@ -58,6 +58,22 @@ tc \- show / manipulate traffic control settings + .B flowid + \fIflow-id\fR + ++.B tc ++.RI "[ " OPTIONS " ]" ++.B chain [ add | delete | get ] dev ++\fIDEV\fR ++.B [ parent ++\fIqdisc-id\fR ++.B | root ]\fR filtertype ++[ filtertype specific parameters ] ++ ++.B tc ++.RI "[ " OPTIONS " ]" ++.B chain [ add | delete | get ] block ++\fIBLOCK_INDEX\fR filtertype ++[ filtertype specific parameters ] ++ ++ + .B tc + .RI "[ " OPTIONS " ]" + .RI "[ " FORMAT " ]" +@@ -80,6 +96,16 @@ tc \- show / manipulate traffic control settings + .RI "[ " OPTIONS " ]" + .B filter show block + \fIBLOCK_INDEX\fR ++.P ++.B tc ++.RI "[ " OPTIONS " ]" ++.B chain show dev ++\fIDEV\fR ++.P ++.B tc ++.RI "[ " OPTIONS " ]" ++.B chain show block ++\fIBLOCK_INDEX\fR + + .P + .B tc +diff --git a/tc/tc.c b/tc/tc.c +index 88e22ba6bcd0b..1fcb3afa727f0 100644 +--- a/tc/tc.c ++++ b/tc/tc.c +@@ -196,7 +196,8 @@ static void usage(void) + fprintf(stderr, + "Usage: tc [ OPTIONS ] OBJECT { COMMAND | help }\n" + " tc [-force] -batch filename\n" +- "where OBJECT := { qdisc | class | filter | action | monitor | exec }\n" ++ "where OBJECT := { qdisc | class | filter | chain |\n" ++ " action | monitor | exec }\n" + " OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -r[aw] |\n" + " -o[neline] | -j[son] | -p[retty] | -c[olor]\n" + " -b[atch] [filename] | -n[etns] name |\n" +@@ -211,6 +212,8 @@ static int do_cmd(int argc, char **argv, void *buf, size_t buflen) + return do_class(argc-1, argv+1); + if (matches(*argv, "filter") == 0) + return do_filter(argc-1, argv+1, buf, buflen); ++ if (matches(*argv, "chain") == 0) ++ return do_chain(argc-1, argv+1, buf, buflen); + if (matches(*argv, "actions") == 0) + return do_action(argc-1, argv+1, buf, buflen); + if (matches(*argv, "monitor") == 0) +diff --git a/tc/tc_common.h b/tc/tc_common.h +index 49c24616c2c35..272d1727027d4 100644 +--- a/tc/tc_common.h ++++ b/tc/tc_common.h +@@ -8,6 +8,7 @@ extern struct rtnl_handle rth; + extern int do_qdisc(int argc, char **argv); + extern int do_class(int argc, char **argv); + extern int do_filter(int argc, char **argv, void *buf, size_t buflen); ++extern int do_chain(int argc, char **argv, void *buf, size_t buflen); + extern int do_action(int argc, char **argv, void *buf, size_t buflen); + extern int do_tcmonitor(int argc, char **argv); + extern int do_exec(int argc, char **argv); +diff --git a/tc/tc_filter.c b/tc/tc_filter.c +index c5bb0bffe19b2..15044b4bc6ed9 100644 +--- a/tc/tc_filter.c ++++ b/tc/tc_filter.c +@@ -45,6 +45,13 @@ static void usage(void) + "OPTIONS := ... try tc filter add help\n"); + } + ++static void chain_usage(void) ++{ ++ fprintf(stderr, ++ "Usage: tc chain [ add | del | get | show ] [ dev STRING ]\n" ++ " tc chain [ add | del | get | show ] [ block BLOCK_INDEX ] ]\n"); ++} ++ + struct tc_filter_req { + struct nlmsghdr n; + struct tcmsg t; +@@ -85,7 +92,8 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv, + req->n.nlmsg_type = cmd; + req->t.tcm_family = AF_UNSPEC; + +- if (cmd == RTM_NEWTFILTER && flags & NLM_F_CREATE) ++ if ((cmd == RTM_NEWTFILTER || cmd == RTM_NEWCHAIN) && ++ flags & NLM_F_CREATE) + protocol = htons(ETH_P_ALL); + + while (argc > 0) { +@@ -261,7 +269,10 @@ int print_filter(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) + + if (n->nlmsg_type != RTM_NEWTFILTER && + n->nlmsg_type != RTM_GETTFILTER && +- n->nlmsg_type != RTM_DELTFILTER) { ++ n->nlmsg_type != RTM_DELTFILTER && ++ n->nlmsg_type != RTM_NEWCHAIN && ++ n->nlmsg_type != RTM_GETCHAIN && ++ n->nlmsg_type != RTM_DELCHAIN) { + fprintf(stderr, "Not a filter(cmd %d)\n", n->nlmsg_type); + return 0; + } +@@ -273,27 +284,36 @@ int print_filter(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) + + parse_rtattr(tb, TCA_MAX, TCA_RTA(t), len); + +- if (tb[TCA_KIND] == NULL) { ++ if (tb[TCA_KIND] == NULL && (n->nlmsg_type == RTM_NEWTFILTER || ++ n->nlmsg_type == RTM_GETTFILTER || ++ n->nlmsg_type == RTM_DELTFILTER)) { + fprintf(stderr, "print_filter: NULL kind\n"); + return -1; + } + + open_json_object(NULL); + +- if (n->nlmsg_type == RTM_DELTFILTER) ++ if (n->nlmsg_type == RTM_DELTFILTER || n->nlmsg_type == RTM_DELCHAIN) + print_bool(PRINT_ANY, "deleted", "deleted ", true); + +- if (n->nlmsg_type == RTM_NEWTFILTER && ++ if ((n->nlmsg_type == RTM_NEWTFILTER || ++ n->nlmsg_type == RTM_NEWCHAIN) && + (n->nlmsg_flags & NLM_F_CREATE) && + !(n->nlmsg_flags & NLM_F_EXCL)) + print_bool(PRINT_ANY, "replaced", "replaced ", true); + +- if (n->nlmsg_type == RTM_NEWTFILTER && ++ if ((n->nlmsg_type == RTM_NEWTFILTER || ++ n->nlmsg_type == RTM_NEWCHAIN) && + (n->nlmsg_flags & NLM_F_CREATE) && + (n->nlmsg_flags & NLM_F_EXCL)) + print_bool(PRINT_ANY, "added", "added ", true); + +- print_string(PRINT_FP, NULL, "filter ", NULL); ++ if (n->nlmsg_type == RTM_NEWTFILTER || ++ n->nlmsg_type == RTM_GETTFILTER || ++ n->nlmsg_type == RTM_DELTFILTER) ++ print_string(PRINT_FP, NULL, "filter ", NULL); ++ else ++ print_string(PRINT_FP, NULL, "chain ", NULL); + if (t->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) { + if (!filter_block_index || + filter_block_index != t->tcm_block_index) +@@ -317,7 +337,9 @@ int print_filter(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) + } + } + +- if (t->tcm_info) { ++ if (t->tcm_info && (n->nlmsg_type == RTM_NEWTFILTER || ++ n->nlmsg_type == RTM_DELTFILTER || ++ n->nlmsg_type == RTM_GETTFILTER)) { + f_proto = TC_H_MIN(t->tcm_info); + __u32 prio = TC_H_MAJ(t->tcm_info)>>16; + +@@ -334,7 +356,8 @@ int print_filter(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) + print_uint(PRINT_ANY, "pref", "pref %u ", prio); + } + } +- print_string(PRINT_ANY, "kind", "%s ", rta_getattr_str(tb[TCA_KIND])); ++ if (tb[TCA_KIND]) ++ print_string(PRINT_ANY, "kind", "%s ", rta_getattr_str(tb[TCA_KIND])); + + if (tb[TCA_CHAIN]) { + __u32 chain_index = rta_getattr_u32(tb[TCA_CHAIN]); +@@ -345,15 +368,17 @@ int print_filter(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) + chain_index); + } + +- q = get_filter_kind(RTA_DATA(tb[TCA_KIND])); +- if (tb[TCA_OPTIONS]) { +- open_json_object("options"); +- if (q) +- q->print_fopt(q, fp, tb[TCA_OPTIONS], t->tcm_handle); +- else +- print_string(PRINT_FP, NULL, +- "[cannot parse parameters]", NULL); +- close_json_object(); ++ if (tb[TCA_KIND]) { ++ q = get_filter_kind(RTA_DATA(tb[TCA_KIND])); ++ if (tb[TCA_OPTIONS]) { ++ open_json_object("options"); ++ if (q) ++ q->print_fopt(q, fp, tb[TCA_OPTIONS], t->tcm_handle); ++ else ++ print_string(PRINT_FP, NULL, ++ "[cannot parse parameters]", NULL); ++ close_json_object(); ++ } + } + print_string(PRINT_FP, NULL, "\n", NULL); + +@@ -496,17 +521,19 @@ static int tc_filter_get(int cmd, unsigned int flags, int argc, char **argv) + argc--; argv++; + } + +- if (!protocol_set) { +- fprintf(stderr, "Must specify filter protocol\n"); +- return -1; +- } ++ if (cmd == RTM_GETTFILTER) { ++ if (!protocol_set) { ++ fprintf(stderr, "Must specify filter protocol\n"); ++ return -1; ++ } + +- if (!prio) { +- fprintf(stderr, "Must specify filter priority\n"); +- return -1; +- } ++ if (!prio) { ++ fprintf(stderr, "Must specify filter priority\n"); ++ return -1; ++ } + +- req.t.tcm_info = TC_H_MAKE(prio<<16, protocol); ++ req.t.tcm_info = TC_H_MAKE(prio<<16, protocol); ++ } + + if (chain_index_set) + addattr32(&req.n, sizeof(req), TCA_CHAIN, chain_index); +@@ -516,11 +543,13 @@ static int tc_filter_get(int cmd, unsigned int flags, int argc, char **argv) + return -1; + } + +- if (k[0]) +- addattr_l(&req.n, sizeof(req), TCA_KIND, k, strlen(k)+1); +- else { +- fprintf(stderr, "Must specify filter type\n"); +- return -1; ++ if (cmd == RTM_GETTFILTER) { ++ if (k[0]) ++ addattr_l(&req.n, sizeof(req), TCA_KIND, k, strlen(k)+1); ++ else { ++ fprintf(stderr, "Must specify filter type\n"); ++ return -1; ++ } + } + + if (d[0]) { +@@ -539,10 +568,11 @@ static int tc_filter_get(int cmd, unsigned int flags, int argc, char **argv) + return -1; + } + +- if (q->parse_fopt(q, fhandle, argc, argv, &req.n)) ++ if (cmd == RTM_GETTFILTER && ++ q->parse_fopt(q, fhandle, argc, argv, &req.n)) + return 1; + +- if (!fhandle) { ++ if (!fhandle && cmd == RTM_GETTFILTER) { + fprintf(stderr, "Must specify filter \"handle\"\n"); + return -1; + } +@@ -569,7 +599,7 @@ static int tc_filter_get(int cmd, unsigned int flags, int argc, char **argv) + return 0; + } + +-static int tc_filter_list(int argc, char **argv) ++static int tc_filter_list(int cmd, int argc, char **argv) + { + struct { + struct nlmsghdr n; +@@ -577,7 +607,7 @@ static int tc_filter_list(int argc, char **argv) + char buf[MAX_MSG]; + } req = { + .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), +- .n.nlmsg_type = RTM_GETTFILTER, ++ .n.nlmsg_type = cmd, + .t.tcm_parent = TC_H_UNSPEC, + .t.tcm_family = AF_UNSPEC, + }; +@@ -725,7 +755,7 @@ static int tc_filter_list(int argc, char **argv) + int do_filter(int argc, char **argv, void *buf, size_t buflen) + { + if (argc < 1) +- return tc_filter_list(0, NULL); ++ return tc_filter_list(RTM_GETTFILTER, 0, NULL); + if (matches(*argv, "add") == 0) + return tc_filter_modify(RTM_NEWTFILTER, NLM_F_EXCL|NLM_F_CREATE, + argc-1, argv+1, buf, buflen); +@@ -742,7 +772,7 @@ int do_filter(int argc, char **argv, void *buf, size_t buflen) + return tc_filter_get(RTM_GETTFILTER, 0, argc-1, argv+1); + if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0 + || matches(*argv, "lst") == 0) +- return tc_filter_list(argc-1, argv+1); ++ return tc_filter_list(RTM_GETTFILTER, argc-1, argv+1); + if (matches(*argv, "help") == 0) { + usage(); + return 0; +@@ -751,3 +781,28 @@ int do_filter(int argc, char **argv, void *buf, size_t buflen) + *argv); + return -1; + } ++ ++int do_chain(int argc, char **argv, void *buf, size_t buflen) ++{ ++ if (argc < 1) ++ return tc_filter_list(RTM_GETCHAIN, 0, NULL); ++ if (matches(*argv, "add") == 0) { ++ return tc_filter_modify(RTM_NEWCHAIN, NLM_F_EXCL | NLM_F_CREATE, ++ argc - 1, argv + 1, buf, buflen); ++ } else if (matches(*argv, "delete") == 0) { ++ return tc_filter_modify(RTM_DELCHAIN, 0, ++ argc - 1, argv + 1, buf, buflen); ++ } else if (matches(*argv, "get") == 0) { ++ return tc_filter_get(RTM_GETCHAIN, 0, ++ argc - 1, argv + 1); ++ } else if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0 || ++ matches(*argv, "lst") == 0) { ++ return tc_filter_list(RTM_GETCHAIN, argc - 1, argv + 1); ++ } else if (matches(*argv, "help") == 0) { ++ chain_usage(); ++ return 0; ++ } ++ fprintf(stderr, "Command \"%s\" is unknown, try \"tc chain help\".\n", ++ *argv); ++ return -1; ++} +diff --git a/tc/tc_monitor.c b/tc/tc_monitor.c +index 077b138d1ec58..1f1ee08fb9cf8 100644 +--- a/tc/tc_monitor.c ++++ b/tc/tc_monitor.c +@@ -43,7 +43,10 @@ static int accept_tcmsg(const struct sockaddr_nl *who, + if (timestamp) + print_timestamp(fp); + +- if (n->nlmsg_type == RTM_NEWTFILTER || n->nlmsg_type == RTM_DELTFILTER) { ++ if (n->nlmsg_type == RTM_NEWTFILTER || ++ n->nlmsg_type == RTM_DELTFILTER || ++ n->nlmsg_type == RTM_NEWCHAIN || ++ n->nlmsg_type == RTM_DELCHAIN) { + print_filter(who, n, arg); + return 0; + } +-- +2.20.1 + diff --git a/SOURCES/0073-m_mirred-don-t-bail-if-the-control-action-is-missing.patch b/SOURCES/0073-m_mirred-don-t-bail-if-the-control-action-is-missing.patch new file mode 100644 index 0000000..a79f9a5 --- /dev/null +++ b/SOURCES/0073-m_mirred-don-t-bail-if-the-control-action-is-missing.patch @@ -0,0 +1,46 @@ +From a298548e9cf58ea7dcaaefd29926bbbc4a1473b4 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Wed, 26 Jun 2019 18:18:04 +0200 +Subject: [PATCH] m_mirred: don't bail if the control action is missing + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1711760 +Upstream Status: iproute2.git commit 6eccf7ecdb010 + +commit 6eccf7ecdb010a90e5271942748ef4338ddb61ae +Author: Paolo Abeni +Date: Mon May 20 11:56:52 2019 +0200 + + m_mirred: don't bail if the control action is missing + + The mirred act admits an optional control action, defaulting + to TC_ACT_PIPE. The parsing code currently emits an error message + if the control action is not provided on the command line, even + if the command itself completes with no error. + + This change shuts down the error message, using the appropriate + parsing helper. + + Fixes: e67aba559581 ("tc: actions: add helpers to parse and print control actions") + Signed-off-by: Paolo Abeni + Signed-off-by: Stephen Hemminger +--- + tc/m_mirred.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tc/m_mirred.c b/tc/m_mirred.c +index c7f7318b8413f..23ba638a234d1 100644 +--- a/tc/m_mirred.c ++++ b/tc/m_mirred.c +@@ -202,7 +202,8 @@ parse_direction(struct action_util *a, int *argc_p, char ***argv_p, + + + if (p.eaction == TCA_EGRESS_MIRROR || p.eaction == TCA_INGRESS_MIRROR) +- parse_action_control(&argc, &argv, &p.action, false); ++ parse_action_control_dflt(&argc, &argv, &p.action, false, ++ TC_ACT_PIPE); + + if (argc) { + if (iok && matches(*argv, "index") == 0) { +-- +2.20.1 + diff --git a/SOURCES/0074-netns-switch-netns-in-the-child-when-executing-comma.patch b/SOURCES/0074-netns-switch-netns-in-the-child-when-executing-comma.patch new file mode 100644 index 0000000..c945957 --- /dev/null +++ b/SOURCES/0074-netns-switch-netns-in-the-child-when-executing-comma.patch @@ -0,0 +1,225 @@ +From 95436dbf882f32ed98f73ec080021daf3841f4a9 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Fri, 28 Jun 2019 14:12:36 +0200 +Subject: [PATCH] netns: switch netns in the child when executing commands + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1719759 +Upstream Status: iproute2.git commit 903818fbf9c73 + +commit 903818fbf9c73dd71793e5829775d2ccc1775af5 +Author: Matteo Croce +Date: Tue Jun 18 16:49:33 2019 +0200 + + netns: switch netns in the child when executing commands + + 'ip netns exec' changes the current netns just before executing a child + process, and restores it after forking. This is needed if we're running + in batch or do_all mode. + Some cleanups must be done both in the parent and in the child: the + parent must restore the previous netns, while the child must reset any + VRF association. + Unfortunately, if do_all is set, the VRF are not reset in the child, and + the spawned processes are started with the wrong VRF context. This can + be triggered with this script: + + # ip -b - <<-'EOF' + link add type vrf table 100 + link set vrf0 up + link add type dummy + link set dummy0 vrf vrf0 up + netns add ns1 + EOF + # ip -all -b - <<-'EOF' + vrf exec vrf0 true + netns exec setsid -f sleep 1h + EOF + # ip vrf pids vrf0 + 314 sleep + # ps 314 + PID TTY STAT TIME COMMAND + 314 ? Ss 0:00 sleep 1h + + Refactor cmd_exec() and pass to it a function pointer which is called in + the child before the final exec. In the netns exec case the function just + resets the VRF and switches netns. + + Doing it in the child is less error prone and safer, because the parent + environment is always kept unaltered. + + After this refactor some utility functions became unused, so remove them. + + Signed-off-by: Matteo Croce + Signed-off-by: Stephen Hemminger +--- + include/utils.h | 6 ++---- + ip/ipnetns.c | 30 ++++++++++++++++-------------- + ip/ipvrf.c | 2 +- + lib/exec.c | 7 ++++++- + lib/utils.c | 27 --------------------------- + 5 files changed, 25 insertions(+), 47 deletions(-) + +diff --git a/include/utils.h b/include/utils.h +index c32b37a1797d8..f00e7742b3c2a 100644 +--- a/include/utils.h ++++ b/include/utils.h +@@ -292,14 +292,12 @@ extern int cmdlineno; + ssize_t getcmdline(char **line, size_t *len, FILE *in); + int makeargs(char *line, char *argv[], int maxargs); + +-int do_each_netns(int (*func)(char *nsname, void *arg), void *arg, +- bool show_label); +- + char *int_to_str(int val, char *buf); + int get_guid(__u64 *guid, const char *arg); + int get_real_family(int rtm_type, int rtm_family); + +-int cmd_exec(const char *cmd, char **argv, bool do_fork); ++int cmd_exec(const char *cmd, char **argv, bool do_fork, ++ int (*setup)(void *), void *arg); + int make_path(const char *path, mode_t mode); + char *find_cgroup2_mount(void); + int get_command_name(const char *pid, char *comm, size_t len); +diff --git a/ip/ipnetns.c b/ip/ipnetns.c +index a6e3ea575c363..10bfe2eb69e0b 100644 +--- a/ip/ipnetns.c ++++ b/ip/ipnetns.c +@@ -395,11 +395,24 @@ static int netns_list(int argc, char **argv) + return 0; + } + ++static int do_switch(void *arg) ++{ ++ char *netns = arg; ++ ++ /* we just changed namespaces. clear any vrf association ++ * with prior namespace before exec'ing command ++ */ ++ vrf_reset(); ++ ++ return netns_switch(netns); ++} ++ + static int on_netns_exec(char *nsname, void *arg) + { + char **argv = arg; + +- cmd_exec(argv[1], argv + 1, true); ++ printf("\nnetns: %s\n", nsname); ++ cmd_exec(argv[0], argv, true, do_switch, nsname); + return 0; + } + +@@ -408,8 +421,6 @@ static int netns_exec(int argc, char **argv) + /* Setup the proper environment for apps that are not netns + * aware, and execute a program in that environment. + */ +- const char *cmd; +- + if (argc < 1 && !do_all) { + fprintf(stderr, "No netns name specified\n"); + return -1; +@@ -420,22 +431,13 @@ static int netns_exec(int argc, char **argv) + } + + if (do_all) +- return do_each_netns(on_netns_exec, --argv, 1); +- +- if (netns_switch(argv[0])) +- return -1; +- +- /* we just changed namespaces. clear any vrf association +- * with prior namespace before exec'ing command +- */ +- vrf_reset(); ++ return netns_foreach(on_netns_exec, argv); + + /* ip must return the status of the child, + * but do_cmd() will add a minus to this, + * so let's add another one here to cancel it. + */ +- cmd = argv[1]; +- return -cmd_exec(cmd, argv + 1, !!batch_mode); ++ return -cmd_exec(argv[1], argv + 1, !!batch_mode, do_switch, argv[0]); + } + + static int is_pid(const char *str) +diff --git a/ip/ipvrf.c b/ip/ipvrf.c +index 8a6b7f977b142..c93ff71b39070 100644 +--- a/ip/ipvrf.c ++++ b/ip/ipvrf.c +@@ -455,7 +455,7 @@ static int ipvrf_exec(int argc, char **argv) + if (vrf_switch(argv[0])) + return -1; + +- return -cmd_exec(argv[1], argv + 1, !!batch_mode); ++ return -cmd_exec(argv[1], argv + 1, !!batch_mode, NULL, NULL); + } + + /* reset VRF association of current process to default VRF; +diff --git a/lib/exec.c b/lib/exec.c +index eb36b59dee7f4..9b1c8f4a13960 100644 +--- a/lib/exec.c ++++ b/lib/exec.c +@@ -5,8 +5,10 @@ + #include + + #include "utils.h" ++#include "namespace.h" + +-int cmd_exec(const char *cmd, char **argv, bool do_fork) ++int cmd_exec(const char *cmd, char **argv, bool do_fork, ++ int (*setup)(void *), void *arg) + { + fflush(stdout); + if (do_fork) { +@@ -34,6 +36,9 @@ int cmd_exec(const char *cmd, char **argv, bool do_fork) + } + } + ++ if (setup && setup(arg)) ++ return -1; ++ + if (execvp(cmd, argv) < 0) + fprintf(stderr, "exec of \"%s\" failed: %s\n", + cmd, strerror(errno)); +diff --git a/lib/utils.c b/lib/utils.c +index 7be2d6bec5215..5f229a9a4f584 100644 +--- a/lib/utils.c ++++ b/lib/utils.c +@@ -1455,33 +1455,6 @@ void print_nlmsg_timestamp(FILE *fp, const struct nlmsghdr *n) + fprintf(fp, "Timestamp: %s %lu us\n", tstr, usecs); + } + +-static int on_netns(char *nsname, void *arg) +-{ +- struct netns_func *f = arg; +- +- if (netns_switch(nsname)) +- return -1; +- +- return f->func(nsname, f->arg); +-} +- +-static int on_netns_label(char *nsname, void *arg) +-{ +- printf("\nnetns: %s\n", nsname); +- return on_netns(nsname, arg); +-} +- +-int do_each_netns(int (*func)(char *nsname, void *arg), void *arg, +- bool show_label) +-{ +- struct netns_func nsf = { .func = func, .arg = arg }; +- +- if (show_label) +- return netns_foreach(on_netns_label, &nsf); +- +- return netns_foreach(on_netns, &nsf); +-} +- + char *int_to_str(int val, char *buf) + { + sprintf(buf, "%d", val); +-- +2.20.1 + diff --git a/SOURCES/0075-ip-vrf-use-hook-to-change-VRF-in-the-child.patch b/SOURCES/0075-ip-vrf-use-hook-to-change-VRF-in-the-child.patch new file mode 100644 index 0000000..849ffed --- /dev/null +++ b/SOURCES/0075-ip-vrf-use-hook-to-change-VRF-in-the-child.patch @@ -0,0 +1,57 @@ +From 1a9c12f737e86a7905cd123f364af053dc0c7491 Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Fri, 28 Jun 2019 14:12:36 +0200 +Subject: [PATCH] ip vrf: use hook to change VRF in the child + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1719759 +Upstream Status: iproute2.git commit d81d4ba15d74a + +commit d81d4ba15d74a51f23f61a2ddb792689e5db95f0 +Author: Matteo Croce +Date: Tue Jun 18 16:49:34 2019 +0200 + + ip vrf: use hook to change VRF in the child + + On vrf exec, reset the VRF associations in the child process, via the + new hook added to cmd_exec(). In this way, the parent doesn't have to + reset the VRF associations before spawning other processes. + + Signed-off-by: Matteo Croce + Signed-off-by: Stephen Hemminger +--- + ip/ipvrf.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/ip/ipvrf.c b/ip/ipvrf.c +index c93ff71b39070..aba8639501139 100644 +--- a/ip/ipvrf.c ++++ b/ip/ipvrf.c +@@ -441,6 +441,13 @@ out: + return rc; + } + ++static int do_switch(void *arg) ++{ ++ char *vrf = arg; ++ ++ return vrf_switch(vrf); ++} ++ + static int ipvrf_exec(int argc, char **argv) + { + if (argc < 1) { +@@ -452,10 +459,7 @@ static int ipvrf_exec(int argc, char **argv) + return -1; + } + +- if (vrf_switch(argv[0])) +- return -1; +- +- return -cmd_exec(argv[1], argv + 1, !!batch_mode, NULL, NULL); ++ return -cmd_exec(argv[1], argv + 1, !!batch_mode, do_switch, argv[0]); + } + + /* reset VRF association of current process to default VRF; +-- +2.20.1 + diff --git a/SOURCES/0076-netns-make-netns_-save-restore-static.patch b/SOURCES/0076-netns-make-netns_-save-restore-static.patch new file mode 100644 index 0000000..3363708 --- /dev/null +++ b/SOURCES/0076-netns-make-netns_-save-restore-static.patch @@ -0,0 +1,164 @@ +From 56dfe34480259eebd91c9a4dc57a6fe15c07e60a Mon Sep 17 00:00:00 2001 +From: Andrea Claudi +Date: Fri, 28 Jun 2019 14:12:36 +0200 +Subject: [PATCH] netns: make netns_{save,restore} static + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1719759 +Upstream Status: iproute2.git commit b2e2922373a6c +Conflicts: context change due to missing commit e3dbcb2a12ab1 + ("netns: add subcommand to attach an existing network namespace") + +commit b2e2922373a6c65ed08b57926e61f3621d89a70a +Author: Matteo Croce +Date: Tue Jun 18 16:49:35 2019 +0200 + + netns: make netns_{save,restore} static + + The netns_{save,restore} functions are only used in ipnetns.c now, since + the restore is not needed anymore after the netns exec command. + Move them in ipnetns.c, and make them static. + + Signed-off-by: Matteo Croce + Signed-off-by: Stephen Hemminger +--- + include/namespace.h | 2 -- + ip/ip.c | 1 - + ip/ipnetns.c | 31 +++++++++++++++++++++++++++++++ + lib/namespace.c | 31 ------------------------------- + 4 files changed, 31 insertions(+), 34 deletions(-) + +diff --git a/include/namespace.h b/include/namespace.h +index 89cdda11782e8..e47f9b5d49d12 100644 +--- a/include/namespace.h ++++ b/include/namespace.h +@@ -49,8 +49,6 @@ static inline int setns(int fd, int nstype) + } + #endif /* HAVE_SETNS */ + +-void netns_save(void); +-void netns_restore(void); + int netns_switch(char *netns); + int netns_get_fd(const char *netns); + int netns_foreach(int (*func)(char *nsname, void *arg), void *arg); +diff --git a/ip/ip.c b/ip/ip.c +index 6e8230b3ee584..2ca55e37a4c62 100644 +--- a/ip/ip.c ++++ b/ip/ip.c +@@ -158,7 +158,6 @@ static int batch(const char *name) + if (!force) + break; + } +- netns_restore(); + } + if (line) + free(line); +diff --git a/ip/ipnetns.c b/ip/ipnetns.c +index 10bfe2eb69e0b..40848a5cf10ac 100644 +--- a/ip/ipnetns.c ++++ b/ip/ipnetns.c +@@ -42,6 +42,7 @@ static int usage(void) + static struct rtnl_handle rtnsh = { .fd = -1 }; + + static int have_rtnl_getnsid = -1; ++static int saved_netns = -1; + + static int ipnetns_accept_msg(const struct sockaddr_nl *who, + struct rtnl_ctrl_data *ctrl, +@@ -634,6 +635,33 @@ static int create_netns_dir(void) + return 0; + } + ++/* Obtain a FD for the current namespace, so we can reenter it later */ ++static void netns_save(void) ++{ ++ if (saved_netns != -1) ++ return; ++ ++ saved_netns = open("/proc/self/ns/net", O_RDONLY | O_CLOEXEC); ++ if (saved_netns == -1) { ++ perror("Cannot open init namespace"); ++ exit(1); ++ } ++} ++ ++static void netns_restore(void) ++{ ++ if (saved_netns == -1) ++ return; ++ ++ if (setns(saved_netns, CLONE_NEWNET)) { ++ perror("setns"); ++ exit(1); ++ } ++ ++ close(saved_netns); ++ saved_netns = -1; ++} ++ + static int netns_add(int argc, char **argv) + { + /* This function creates a new network namespace and +@@ -704,8 +732,11 @@ static int netns_add(int argc, char **argv) + netns_path, strerror(errno)); + goto out_delete; + } ++ netns_restore(); ++ + return 0; + out_delete: ++ netns_restore(); + netns_delete(argc, argv); + return -1; + } +diff --git a/lib/namespace.c b/lib/namespace.c +index a2aea57ad4109..06ae0a48c2243 100644 +--- a/lib/namespace.c ++++ b/lib/namespace.c +@@ -15,35 +15,6 @@ + #include "utils.h" + #include "namespace.h" + +-static int saved_netns = -1; +- +-/* Obtain a FD for the current namespace, so we can reenter it later */ +-void netns_save(void) +-{ +- if (saved_netns != -1) +- return; +- +- saved_netns = open("/proc/self/ns/net", O_RDONLY | O_CLOEXEC); +- if (saved_netns == -1) { +- perror("Cannot open init namespace"); +- exit(1); +- } +-} +- +-void netns_restore(void) +-{ +- if (saved_netns == -1) +- return; +- +- if (setns(saved_netns, CLONE_NEWNET)) { +- perror("setns"); +- exit(1); +- } +- +- close(saved_netns); +- saved_netns = -1; +-} +- + static void bind_etc(const char *name) + { + char etc_netns_path[sizeof(NETNS_ETC_DIR) + NAME_MAX]; +@@ -90,8 +61,6 @@ int netns_switch(char *name) + return -1; + } + +- netns_save(); +- + if (setns(netns, CLONE_NEWNET) < 0) { + fprintf(stderr, "setting the network namespace \"%s\" failed: %s\n", + name, strerror(errno)); +-- +2.20.1 + diff --git a/SPECS/iproute.spec b/SPECS/iproute.spec index 0813dbe..d6e0e31 100644 --- a/SPECS/iproute.spec +++ b/SPECS/iproute.spec @@ -1,7 +1,7 @@ %global cbq_version v0.7.3 %define rpmversion 4.18.0 -%define specrelease 11%{?dist} +%define specrelease 15%{?dist} %define pkg_release %{specrelease}%{?buildid} Summary: Advanced IP routing and network device configuration tools @@ -58,6 +58,38 @@ Patch40: 0041-iplink-fix-incorrect-any-address-handling-for-ip-tun.pa Patch41: 0042-l2tp-Fix-printing-of-cookie-and-peer_cookie-values.patch Patch42: 0043-tc-f_flower-add-geneve-option-match-support-to-flowe.patch Patch43: 0044-tc-m_tunnel_key-Add-tunnel-option-support-to-act_tun.patch +Patch44: 0045-ip-rule-Add-ipproto-and-port-range-to-filter-list.patch +Patch45: 0046-tc-flower-Add-support-for-QinQ.patch +Patch46: 0047-uapi-update-ib_verbs.patch +Patch47: 0048-rdma-Fix-representation-of-PortInfo-CapabilityMask.patch +Patch48: 0049-devlink-Add-param-command-support.patch +Patch49: 0050-libnetlink-Convert-GETADDR-dumps-to-use-rtnl_addrdum.patch +Patch50: 0051-rdma-Update-kernel-include-file-to-support-IB-device.patch +Patch51: 0052-rdma-Introduce-command-execution-helper-with-require.patch +Patch52: 0053-rdma-Add-an-option-to-rename-IB-device-interface.patch +Patch53: 0054-rdma-Document-IB-device-renaming-option.patch +Patch54: 0055-iplink-add-support-for-reporting-multiple-XDP-progra.patch +Patch55: 0056-bpf-move-bpf_elf_map-fixup-notification-under-verbos.patch +Patch56: 0057-bpf-remove-strict-dependency-on-af_alg.patch +Patch57: 0058-bpf-implement-bpf-to-bpf-calls-support.patch +Patch58: 0059-bpf-implement-btf-handling-and-map-annotation.patch +Patch59: 0060-bpf-check-map-symbol-type-properly-with-newer-llvm-c.patch +Patch60: 0061-Use-libbsd-for-strlcpy-if-available.patch +Patch61: 0062-Include-bsd-string.h-only-in-include-utils.h.patch +Patch62: 0063-bpf-initialise-map-symbol-before-retrieving-and-comp.patch +Patch63: 0064-lib-bpf-fix-build-warning-if-no-elf.patch +Patch64: 0065-bpf-add-btf-func-and-func_proto-kind-support.patch +Patch65: 0066-uapi-update-headers-to-4.20-rc1.patch +Patch66: 0067-uapi-update-bpf-header.patch +Patch67: 0068-Update-kernel-headers.patch +Patch68: 0069-ip-xfrm-Respect-family-in-deleteall-and-list-command.patch +Patch69: 0070-ss-Review-ssfilter.patch +Patch70: 0071-ip-reset-netns-after-each-command-in-batch-mode.patch +Patch71: 0072-tc-introduce-support-for-chain-templates.patch +Patch72: 0073-m_mirred-don-t-bail-if-the-control-action-is-missing.patch +Patch73: 0074-netns-switch-netns-in-the-child-when-executing-comma.patch +Patch74: 0075-ip-vrf-use-hook-to-change-VRF-in-the-child.patch +Patch75: 0076-netns-make-netns_-save-restore-static.patch License: GPLv2+ and Public Domain BuildRequires: bison BuildRequires: elfutils-libelf-devel @@ -202,6 +234,46 @@ cat %{SOURCE3} >>%{buildroot}%{_sysconfdir}/iproute2/rt_dsfield %{_includedir}/iproute2/bpf_elf.h %changelog +* Thu Jul 04 2019 Andrea Claudi [4.18.0-15.el8] +- netns: make netns_{save,restore} static (Andrea Claudi) [1719759] +- ip vrf: use hook to change VRF in the child (Andrea Claudi) [1719759] +- netns: switch netns in the child when executing commands (Andrea Claudi) [1719759] +- m_mirred: don't bail if the control action is missing (Andrea Claudi) [1711760] +- tc: introduce support for chain templates (Andrea Claudi) [1710291] +- ip: reset netns after each command in batch mode (Andrea Claudi) [1671016] + +* Thu Jun 20 2019 Andrea Claudi [4.18.0-14.el8] +- ss: Review ssfilter (Andrea Claudi) [1698401] + +* Fri Jun 14 2019 Andrea Claudi [4.18.0-13.el8] +- ip-xfrm: Respect family in deleteall and list commands (Andrea Claudi) [1656717] +- Update kernel headers (Andrea Claudi) [1716361] +- uapi: update bpf header (Andrea Claudi) [1716361] +- uapi: update headers to 4.20-rc1 (Andrea Claudi) [1716361] +- bpf: add btf func and func_proto kind support (Andrea Claudi) [1716361] +- lib/bpf: fix build warning if no elf (Andrea Claudi) [1716361] +- bpf: initialise map symbol before retrieving and comparing its type (Andrea Claudi) [1716361] +- Include bsd/string.h only in include/utils.h (Andrea Claudi) [1716361] +- Use libbsd for strlcpy if available (Andrea Claudi) [1716361] +- bpf: check map symbol type properly with newer llvm compiler (Andrea Claudi) [1716361] +- bpf: implement btf handling and map annotation (Andrea Claudi) [1716361] +- bpf: implement bpf to bpf calls support (Andrea Claudi) [1716361] +- bpf: remove strict dependency on af_alg (Andrea Claudi) [1716361] +- bpf: move bpf_elf_map fixup notification under verbose (Andrea Claudi) [1716361] +- iplink: add support for reporting multiple XDP programs (Andrea Claudi) [1716361] +- rdma: Document IB device renaming option (Andrea Claudi) [1663228] +- rdma: Add an option to rename IB device interface (Andrea Claudi) [1663228] +- rdma: Introduce command execution helper with required device name (Andrea Claudi) [1663228] +- rdma: Update kernel include file to support IB device renaming (Andrea Claudi) [1663228] +- libnetlink: Convert GETADDR dumps to use rtnl_addrdump_req (Andrea Claudi) [1716772] + +* Wed May 29 2019 Andrea Claudi [4.18.0-12.el8] +- devlink: Add param command support (Andrea Claudi) [1663199] +- rdma: Fix representation of PortInfo CapabilityMask (Andrea Claudi) [1664694] +- uapi: update ib_verbs (Andrea Claudi) [1664694] +- tc: flower: Add support for QinQ (Andrea Claudi) [1615928] +- ip rule: Add ipproto and port range to filter list (Andrea Claudi) [1678111] + * Thu Jan 31 2019 Phil Sutter [4.18.0-11.el8] - tc: m_tunnel_key: Add tunnel option support to act_tunnel_key (Phil Sutter) [1654761] - tc: f_flower: add geneve option match support to flower (Phil Sutter) [1654761]