|
|
049c96 |
From ed60b38f294c5f950d125384af8e430388929c51 Mon Sep 17 00:00:00 2001
|
|
|
049c96 |
From: Jakub Sitnicki <jkbs@redhat.com>
|
|
|
049c96 |
Date: Wed, 27 Jul 2016 15:56:14 +0200
|
|
|
049c96 |
Subject: [PATCH] Add support to configure SR-IOV VF minimum and maximum Tx
|
|
|
049c96 |
rate through ip tool
|
|
|
049c96 |
|
|
|
049c96 |
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1340914
|
|
|
049c96 |
Upstream Status: iproute2.git commit f89a2a0
|
|
|
049c96 |
Conflicts:
|
|
|
049c96 |
* context in iplink_parse() because we already have
|
|
|
049c96 |
ccdcbf35f120 ("iplink: add support of IFLA_LINK_NETNSID attribute")
|
|
|
049c96 |
* we already have a partial backport of ca611d6408c9 ("man: ip-link.8:
|
|
|
049c96 |
Fix and improve synopsis"), so fast-forward the synopsis to it's
|
|
|
049c96 |
current form
|
|
|
049c96 |
|
|
|
049c96 |
commit f89a2a05ffa94ac5bec9f50751f761215356092b
|
|
|
049c96 |
Author: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
|
|
|
049c96 |
Date: Thu May 22 09:59:37 2014 -0400
|
|
|
049c96 |
|
|
|
049c96 |
Add support to configure SR-IOV VF minimum and maximum Tx rate through ip tool
|
|
|
049c96 |
|
|
|
049c96 |
o "min_tx_rate" option has been added for minimum Tx rate. Hence, for
|
|
|
049c96 |
consistent naming, "max_tx_rate" option has been introduced for maximum
|
|
|
049c96 |
Tx rate.
|
|
|
049c96 |
|
|
|
049c96 |
o Change in v2: "rate" can be used along with "max_tx_rate".
|
|
|
049c96 |
When both are specified, "max_tx_rate" should override.
|
|
|
049c96 |
|
|
|
049c96 |
o Change in v3:
|
|
|
049c96 |
* IFLA_VF_RATE: When IFLA_VF_RATE is used, and user has given only one of
|
|
|
049c96 |
min_tx_rate or max_tx_rate, reading of previous rate limits is done in
|
|
|
049c96 |
userspace instead of in kernel space before ndo_set_vf_rate.
|
|
|
049c96 |
|
|
|
049c96 |
* IFLA_VF_TX_RATE: When IFLA_VF_TX_RATE is used, min_tx_rate is always read
|
|
|
049c96 |
in kernel space. This takes care of below scenarios:
|
|
|
049c96 |
(1) when old tool sends "rate" but kernel is new (expects min and max)
|
|
|
049c96 |
(2) when new tool sends only "rate" but kernel is old (expects only "rate")
|
|
|
049c96 |
|
|
|
049c96 |
o Change in v4 as suggested by Stephen Hemminger:
|
|
|
049c96 |
* As per iproute policy, input and output formats should match. Changing display
|
|
|
049c96 |
of max_tx_rate and min_tx_rate options accordingly.
|
|
|
049c96 |
./ip/ip link show p3p1
|
|
|
049c96 |
8: p3p1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT qlen 1000
|
|
|
049c96 |
link/ether 00:0e:1e:16:ce:40 brd ff:ff:ff:ff:ff:ff
|
|
|
049c96 |
vf 0 MAC 2a:18:8f:4d:3d:d4, tx rate 700 (Mbps), max_tx_rate 700Mbps, min_tx_rate 200Mbps
|
|
|
049c96 |
vf 1 MAC 72:dc:ba:f9:df:fd
|
|
|
049c96 |
|
|
|
049c96 |
Signed-off-by: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
|
|
|
049c96 |
---
|
|
|
049c96 |
ip/ip_common.h | 1 +
|
|
|
049c96 |
ip/ipaddress.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
|
049c96 |
ip/iplink.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++---
|
|
|
049c96 |
man/man8/ip-link.8.in | 26 +++++++++++++++++++--
|
|
|
049c96 |
4 files changed, 148 insertions(+), 5 deletions(-)
|
|
|
049c96 |
|
|
|
049c96 |
diff --git a/ip/ip_common.h b/ip/ip_common.h
|
|
|
049c96 |
index c3e7fda..286f61f 100644
|
|
|
049c96 |
--- a/ip/ip_common.h
|
|
|
049c96 |
+++ b/ip/ip_common.h
|
|
|
049c96 |
@@ -17,6 +17,7 @@ extern int iproute_monitor(int argc, char **argv);
|
|
|
049c96 |
extern void iplink_usage(void) __attribute__((noreturn));
|
|
|
049c96 |
extern void iproute_reset_filter(void);
|
|
|
049c96 |
extern void ipmroute_reset_filter(void);
|
|
|
049c96 |
+void ipaddr_get_vf_rate(int, int *, int *, int);
|
|
|
049c96 |
extern void ipaddr_reset_filter(int);
|
|
|
049c96 |
extern void ipneigh_reset_filter(void);
|
|
|
049c96 |
extern void ipntable_reset_filter(void);
|
|
|
049c96 |
diff --git a/ip/ipaddress.c b/ip/ipaddress.c
|
|
|
049c96 |
index 05e0da9..11ff34d 100644
|
|
|
049c96 |
--- a/ip/ipaddress.c
|
|
|
049c96 |
+++ b/ip/ipaddress.c
|
|
|
049c96 |
@@ -290,6 +290,7 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo)
|
|
|
049c96 |
{
|
|
|
049c96 |
struct ifla_vf_mac *vf_mac;
|
|
|
049c96 |
struct ifla_vf_vlan *vf_vlan;
|
|
|
049c96 |
+ struct ifla_vf_rate *vf_rate;
|
|
|
049c96 |
struct ifla_vf_tx_rate *vf_tx_rate;
|
|
|
049c96 |
struct ifla_vf_spoofchk *vf_spoofchk;
|
|
|
049c96 |
struct ifla_vf_link_state *vf_linkstate;
|
|
|
049c96 |
@@ -307,6 +308,7 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo)
|
|
|
049c96 |
vf_mac = RTA_DATA(vf[IFLA_VF_MAC]);
|
|
|
049c96 |
vf_vlan = RTA_DATA(vf[IFLA_VF_VLAN]);
|
|
|
049c96 |
vf_tx_rate = RTA_DATA(vf[IFLA_VF_TX_RATE]);
|
|
|
049c96 |
+ vf_rate = RTA_DATA(vf[IFLA_VF_RATE]);
|
|
|
049c96 |
|
|
|
049c96 |
/* Check if the spoof checking vf info type is supported by
|
|
|
049c96 |
* this kernel.
|
|
|
049c96 |
@@ -342,6 +344,10 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo)
|
|
|
049c96 |
fprintf(fp, ", qos %d", vf_vlan->qos);
|
|
|
049c96 |
if (vf_tx_rate->rate)
|
|
|
049c96 |
fprintf(fp, ", tx rate %d (Mbps)", vf_tx_rate->rate);
|
|
|
049c96 |
+ if (vf_rate->max_tx_rate)
|
|
|
049c96 |
+ fprintf(fp, ", max_tx_rate %dMbps", vf_rate->max_tx_rate);
|
|
|
049c96 |
+ if (vf_rate->min_tx_rate)
|
|
|
049c96 |
+ fprintf(fp, ", min_tx_rate %dMbps", vf_rate->min_tx_rate);
|
|
|
049c96 |
if (vf_spoofchk && vf_spoofchk->setting != -1) {
|
|
|
049c96 |
if (vf_spoofchk->setting)
|
|
|
049c96 |
fprintf(fp, ", spoof checking on");
|
|
|
049c96 |
@@ -1450,6 +1456,63 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action)
|
|
|
049c96 |
return 0;
|
|
|
049c96 |
}
|
|
|
049c96 |
|
|
|
049c96 |
+static void
|
|
|
049c96 |
+ipaddr_loop_each_vf(struct rtattr *tb[], int vfnum, int *min, int *max)
|
|
|
049c96 |
+{
|
|
|
049c96 |
+ struct rtattr *vflist = tb[IFLA_VFINFO_LIST];
|
|
|
049c96 |
+ struct rtattr *i, *vf[IFLA_VF_MAX+1];
|
|
|
049c96 |
+ struct ifla_vf_rate *vf_rate;
|
|
|
049c96 |
+ int rem;
|
|
|
049c96 |
+
|
|
|
049c96 |
+ rem = RTA_PAYLOAD(vflist);
|
|
|
049c96 |
+
|
|
|
049c96 |
+ for (i = RTA_DATA(vflist); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
|
|
|
049c96 |
+ parse_rtattr_nested(vf, IFLA_VF_MAX, i);
|
|
|
049c96 |
+ vf_rate = RTA_DATA(vf[IFLA_VF_RATE]);
|
|
|
049c96 |
+ if (vf_rate->vf == vfnum) {
|
|
|
049c96 |
+ *min = vf_rate->min_tx_rate;
|
|
|
049c96 |
+ *max = vf_rate->max_tx_rate;
|
|
|
049c96 |
+ return;
|
|
|
049c96 |
+ }
|
|
|
049c96 |
+ }
|
|
|
049c96 |
+ fprintf(stderr, "Cannot find VF %d\n", vfnum);
|
|
|
049c96 |
+ exit(1);
|
|
|
049c96 |
+}
|
|
|
049c96 |
+
|
|
|
049c96 |
+void ipaddr_get_vf_rate(int vfnum, int *min, int *max, int idx)
|
|
|
049c96 |
+{
|
|
|
049c96 |
+ struct nlmsg_chain linfo = { NULL, NULL};
|
|
|
049c96 |
+ struct rtattr *tb[IFLA_MAX+1];
|
|
|
049c96 |
+ struct ifinfomsg *ifi;
|
|
|
049c96 |
+ struct nlmsg_list *l;
|
|
|
049c96 |
+ struct nlmsghdr *n;
|
|
|
049c96 |
+ int len;
|
|
|
049c96 |
+
|
|
|
049c96 |
+ if (rtnl_wilddump_request(&rth, AF_UNSPEC, RTM_GETLINK) < 0) {
|
|
|
049c96 |
+ perror("Cannot send dump request");
|
|
|
049c96 |
+ exit(1);
|
|
|
049c96 |
+ }
|
|
|
049c96 |
+ if (rtnl_dump_filter(&rth, store_nlmsg, &linfo) < 0) {
|
|
|
049c96 |
+ fprintf(stderr, "Dump terminated\n");
|
|
|
049c96 |
+ exit(1);
|
|
|
049c96 |
+ }
|
|
|
049c96 |
+ for (l = linfo.head; l; l = l->next) {
|
|
|
049c96 |
+ n = &l->h;
|
|
|
049c96 |
+ ifi = NLMSG_DATA(n);
|
|
|
049c96 |
+
|
|
|
049c96 |
+ len = n->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
|
|
|
049c96 |
+ if (len < 0 || idx && idx != ifi->ifi_index)
|
|
|
049c96 |
+ continue;
|
|
|
049c96 |
+
|
|
|
049c96 |
+ parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len);
|
|
|
049c96 |
+
|
|
|
049c96 |
+ if ((tb[IFLA_VFINFO_LIST] && tb[IFLA_NUM_VF])) {
|
|
|
049c96 |
+ ipaddr_loop_each_vf(tb, vfnum, min, max);
|
|
|
049c96 |
+ return;
|
|
|
049c96 |
+ }
|
|
|
049c96 |
+ }
|
|
|
049c96 |
+}
|
|
|
049c96 |
+
|
|
|
049c96 |
int ipaddr_list_link(int argc, char **argv)
|
|
|
049c96 |
{
|
|
|
049c96 |
preferred_family = AF_PACKET;
|
|
|
049c96 |
diff --git a/ip/iplink.c b/ip/iplink.c
|
|
|
049c96 |
index 0098443..bce5ba0 100644
|
|
|
049c96 |
--- a/ip/iplink.c
|
|
|
049c96 |
+++ b/ip/iplink.c
|
|
|
049c96 |
@@ -233,16 +233,40 @@ struct iplink_req {
|
|
|
049c96 |
};
|
|
|
049c96 |
|
|
|
049c96 |
static int iplink_parse_vf(int vf, int *argcp, char ***argvp,
|
|
|
049c96 |
- struct iplink_req *req)
|
|
|
049c96 |
+ struct iplink_req *req, int dev_index)
|
|
|
049c96 |
{
|
|
|
049c96 |
+ char new_rate_api = 0, count = 0, override_legacy_rate = 0;
|
|
|
049c96 |
+ struct ifla_vf_rate tivt;
|
|
|
049c96 |
int len, argc = *argcp;
|
|
|
049c96 |
char **argv = *argvp;
|
|
|
049c96 |
struct rtattr *vfinfo;
|
|
|
049c96 |
|
|
|
049c96 |
+ tivt.min_tx_rate = -1;
|
|
|
049c96 |
+ tivt.max_tx_rate = -1;
|
|
|
049c96 |
+
|
|
|
049c96 |
vfinfo = addattr_nest(&req->n, sizeof(*req), IFLA_VF_INFO);
|
|
|
049c96 |
|
|
|
049c96 |
while (NEXT_ARG_OK()) {
|
|
|
049c96 |
NEXT_ARG();
|
|
|
049c96 |
+ count++;
|
|
|
049c96 |
+ if (!matches(*argv, "max_tx_rate")) {
|
|
|
049c96 |
+ /* new API in use */
|
|
|
049c96 |
+ new_rate_api = 1;
|
|
|
049c96 |
+ /* override legacy rate */
|
|
|
049c96 |
+ override_legacy_rate = 1;
|
|
|
049c96 |
+ } else if (!matches(*argv, "min_tx_rate")) {
|
|
|
049c96 |
+ /* new API in use */
|
|
|
049c96 |
+ new_rate_api = 1;
|
|
|
049c96 |
+ }
|
|
|
049c96 |
+ }
|
|
|
049c96 |
+
|
|
|
049c96 |
+ while (count--) {
|
|
|
049c96 |
+ /* rewind arg */
|
|
|
049c96 |
+ PREV_ARG();
|
|
|
049c96 |
+ }
|
|
|
049c96 |
+
|
|
|
049c96 |
+ while (NEXT_ARG_OK()) {
|
|
|
049c96 |
+ NEXT_ARG();
|
|
|
049c96 |
if (matches(*argv, "mac") == 0) {
|
|
|
049c96 |
struct ifla_vf_mac ivm;
|
|
|
049c96 |
NEXT_ARG();
|
|
|
049c96 |
@@ -279,7 +303,25 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp,
|
|
|
049c96 |
invarg("Invalid \"rate\" value\n", *argv);
|
|
|
049c96 |
}
|
|
|
049c96 |
ivt.vf = vf;
|
|
|
049c96 |
- addattr_l(&req->n, sizeof(*req), IFLA_VF_TX_RATE, &ivt, sizeof(ivt));
|
|
|
049c96 |
+ if (!new_rate_api)
|
|
|
049c96 |
+ addattr_l(&req->n, sizeof(*req),
|
|
|
049c96 |
+ IFLA_VF_TX_RATE, &ivt, sizeof(ivt));
|
|
|
049c96 |
+ else if (!override_legacy_rate)
|
|
|
049c96 |
+ tivt.max_tx_rate = ivt.rate;
|
|
|
049c96 |
+
|
|
|
049c96 |
+ } else if (matches(*argv, "max_tx_rate") == 0) {
|
|
|
049c96 |
+ NEXT_ARG();
|
|
|
049c96 |
+ if (get_unsigned(&tivt.max_tx_rate, *argv, 0))
|
|
|
049c96 |
+ invarg("Invalid \"max tx rate\" value\n",
|
|
|
049c96 |
+ *argv);
|
|
|
049c96 |
+ tivt.vf = vf;
|
|
|
049c96 |
+
|
|
|
049c96 |
+ } else if (matches(*argv, "min_tx_rate") == 0) {
|
|
|
049c96 |
+ NEXT_ARG();
|
|
|
049c96 |
+ if (get_unsigned(&tivt.min_tx_rate, *argv, 0))
|
|
|
049c96 |
+ invarg("Invalid \"min tx rate\" value\n",
|
|
|
049c96 |
+ *argv);
|
|
|
049c96 |
+ tivt.vf = vf;
|
|
|
049c96 |
|
|
|
049c96 |
} else if (matches(*argv, "spoofchk") == 0) {
|
|
|
049c96 |
struct ifla_vf_spoofchk ivs;
|
|
|
049c96 |
@@ -337,6 +379,19 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp,
|
|
|
049c96 |
}
|
|
|
049c96 |
}
|
|
|
049c96 |
|
|
|
049c96 |
+ if (new_rate_api) {
|
|
|
049c96 |
+ int tmin, tmax;
|
|
|
049c96 |
+ if (tivt.min_tx_rate == -1 || tivt.max_tx_rate == -1) {
|
|
|
049c96 |
+ ipaddr_get_vf_rate(tivt.vf, &tmin, &tmax, dev_index);
|
|
|
049c96 |
+ if (tivt.min_tx_rate == -1)
|
|
|
049c96 |
+ tivt.min_tx_rate = tmin;
|
|
|
049c96 |
+ if (tivt.max_tx_rate == -1)
|
|
|
049c96 |
+ tivt.max_tx_rate = tmax;
|
|
|
049c96 |
+ }
|
|
|
049c96 |
+ addattr_l(&req->n, sizeof(*req), IFLA_VF_RATE, &tivt,
|
|
|
049c96 |
+ sizeof(tivt));
|
|
|
049c96 |
+ }
|
|
|
049c96 |
+
|
|
|
049c96 |
if (argc == *argcp)
|
|
|
049c96 |
incomplete_command();
|
|
|
049c96 |
|
|
|
049c96 |
@@ -358,6 +413,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req,
|
|
|
049c96 |
int vf = -1;
|
|
|
049c96 |
int numtxqueues = -1;
|
|
|
049c96 |
int numrxqueues = -1;
|
|
|
049c96 |
+ int dev_index;
|
|
|
049c96 |
int link_netnsid = -1;
|
|
|
049c96 |
|
|
|
049c96 |
*group = -1;
|
|
|
049c96 |
@@ -468,7 +524,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req,
|
|
|
049c96 |
}
|
|
|
049c96 |
vflist = addattr_nest(&req->n, sizeof(*req),
|
|
|
049c96 |
IFLA_VFINFO_LIST);
|
|
|
049c96 |
- len = iplink_parse_vf(vf, &argc, &argv, req);
|
|
|
049c96 |
+ len = iplink_parse_vf(vf, &argc, &argv, req, dev_index);
|
|
|
049c96 |
if (len < 0)
|
|
|
049c96 |
return -1;
|
|
|
049c96 |
addattr_nest_end(&req->n, vflist);
|
|
|
049c96 |
@@ -570,6 +626,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req,
|
|
|
049c96 |
if (*dev)
|
|
|
049c96 |
duparg2("dev", *argv);
|
|
|
049c96 |
*dev = *argv;
|
|
|
049c96 |
+ dev_index = ll_name_to_index(*dev);
|
|
|
049c96 |
}
|
|
|
049c96 |
argc--; argv++;
|
|
|
049c96 |
}
|
|
|
049c96 |
diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in
|
|
|
049c96 |
index 33172e5..741922f 100644
|
|
|
049c96 |
--- a/man/man8/ip-link.8.in
|
|
|
049c96 |
+++ b/man/man8/ip-link.8.in
|
|
|
049c96 |
@@ -106,6 +106,12 @@ ip-link \- network device configuration
|
|
|
049c96 |
.RB "[ " rate
|
|
|
049c96 |
.IR TXRATE " ]"
|
|
|
049c96 |
.br
|
|
|
049c96 |
+.RB "[ " max_tx_rate
|
|
|
049c96 |
+.IR TXRATE " ]"
|
|
|
049c96 |
+.br
|
|
|
049c96 |
+.RB "[ " min_tx_rate
|
|
|
049c96 |
+.IR TXRATE " ]"
|
|
|
049c96 |
+.br
|
|
|
049c96 |
.RB "[ " spoofchk " { " on " | " off " } ]"
|
|
|
049c96 |
.br
|
|
|
049c96 |
.RB "[ " query_rss " { " on " | " off " } ]"
|
|
|
049c96 |
@@ -1021,8 +1027,24 @@ as 0 disables VLAN tagging and filtering for the VF.
|
|
|
049c96 |
|
|
|
049c96 |
.sp
|
|
|
049c96 |
.BI rate " TXRATE"
|
|
|
049c96 |
-- change the allowed transmit bandwidth, in Mbps, for the specified VF.
|
|
|
049c96 |
-Setting this parameter to 0 disables rate limiting. The
|
|
|
049c96 |
+-- change the allowed transmit bandwidth, in Mbps, for the specified VF.
|
|
|
049c96 |
+Setting this parameter to 0 disables rate limiting.
|
|
|
049c96 |
+.B vf
|
|
|
049c96 |
+parameter must be specified.
|
|
|
049c96 |
+Please use new API
|
|
|
049c96 |
+.B "max_tx_rate"
|
|
|
049c96 |
+option instead.
|
|
|
049c96 |
+
|
|
|
049c96 |
+.sp
|
|
|
049c96 |
+.BI max_tx_rate " TXRATE"
|
|
|
049c96 |
+- change the allowed maximum transmit bandwidth, in Mbps, for the specified VF.
|
|
|
049c96 |
+.B vf
|
|
|
049c96 |
+parameter must be specified.
|
|
|
049c96 |
+
|
|
|
049c96 |
+.sp
|
|
|
049c96 |
+.BI min_tx_rate " TXRATE"
|
|
|
049c96 |
+- change the allowed minimum transmit bandwidth, in Mbps, for the specified VF.
|
|
|
049c96 |
+Minimum TXRATE should be always <= Maximum TXRATE.
|
|
|
049c96 |
.B vf
|
|
|
049c96 |
parameter must be specified.
|
|
|
049c96 |
|
|
|
049c96 |
--
|
|
|
049c96 |
1.8.3.1
|
|
|
049c96 |
|