naccyde / rpms / iproute

Forked from rpms/iproute 5 months ago
Clone

Blame SOURCES/0219-Add-support-to-configure-SR-IOV-VF-minimum-and-maxim.patch

049c96
From ed60b38f294c5f950d125384af8e430388929c51 Mon Sep 17 00:00:00 2001
049c96
From: Jakub Sitnicki <jkbs@redhat.com>
049c96
Date: Wed, 27 Jul 2016 15:56:14 +0200
049c96
Subject: [PATCH] Add support to configure SR-IOV VF minimum and maximum Tx
049c96
 rate through ip tool
049c96
049c96
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1340914
049c96
Upstream Status: iproute2.git commit f89a2a0
049c96
Conflicts:
049c96
* context in iplink_parse() because we already have
049c96
  ccdcbf35f120 ("iplink: add support of IFLA_LINK_NETNSID attribute")
049c96
* we already have a partial backport of ca611d6408c9 ("man: ip-link.8:
049c96
  Fix and improve synopsis"), so fast-forward the synopsis to it's
049c96
  current form
049c96
049c96
commit f89a2a05ffa94ac5bec9f50751f761215356092b
049c96
Author: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
049c96
Date:   Thu May 22 09:59:37 2014 -0400
049c96
049c96
    Add support to configure SR-IOV VF minimum and maximum Tx rate through ip tool
049c96
049c96
    o "min_tx_rate" option has been added for minimum Tx rate. Hence, for
049c96
      consistent naming, "max_tx_rate" option has been introduced for maximum
049c96
      Tx rate.
049c96
049c96
    o Change in v2: "rate" can be used along with "max_tx_rate".
049c96
      When both are specified, "max_tx_rate" should override.
049c96
049c96
    o Change in v3:
049c96
      * IFLA_VF_RATE: When IFLA_VF_RATE is used, and user has given only one of
049c96
        min_tx_rate or max_tx_rate, reading of previous rate limits is done in
049c96
        userspace instead of in kernel space before ndo_set_vf_rate.
049c96
049c96
      * IFLA_VF_TX_RATE: When IFLA_VF_TX_RATE is used, min_tx_rate is always read
049c96
        in kernel space. This takes care of below scenarios:
049c96
        (1) when old tool sends "rate" but kernel is new (expects min and max)
049c96
        (2) when new tool sends only "rate" but kernel is old (expects only "rate")
049c96
049c96
    o Change in v4 as suggested by Stephen Hemminger:
049c96
      * As per iproute policy, input and output formats should match. Changing display
049c96
        of max_tx_rate and min_tx_rate options accordingly.
049c96
    	./ip/ip link show p3p1
049c96
    	8: p3p1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT qlen 1000
049c96
            link/ether 00:0e:1e:16:ce:40 brd ff:ff:ff:ff:ff:ff
049c96
            vf 0 MAC 2a:18:8f:4d:3d:d4, tx rate 700 (Mbps), max_tx_rate 700Mbps, min_tx_rate 200Mbps
049c96
            vf 1 MAC 72:dc:ba:f9:df:fd
049c96
049c96
    Signed-off-by: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
049c96
---
049c96
 ip/ip_common.h        |  1 +
049c96
 ip/ipaddress.c        | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
049c96
 ip/iplink.c           | 63 ++++++++++++++++++++++++++++++++++++++++++++++++---
049c96
 man/man8/ip-link.8.in | 26 +++++++++++++++++++--
049c96
 4 files changed, 148 insertions(+), 5 deletions(-)
049c96
049c96
diff --git a/ip/ip_common.h b/ip/ip_common.h
049c96
index c3e7fda..286f61f 100644
049c96
--- a/ip/ip_common.h
049c96
+++ b/ip/ip_common.h
049c96
@@ -17,6 +17,7 @@ extern int iproute_monitor(int argc, char **argv);
049c96
 extern void iplink_usage(void) __attribute__((noreturn));
049c96
 extern void iproute_reset_filter(void);
049c96
 extern void ipmroute_reset_filter(void);
049c96
+void ipaddr_get_vf_rate(int, int *, int *, int);
049c96
 extern void ipaddr_reset_filter(int);
049c96
 extern void ipneigh_reset_filter(void);
049c96
 extern void ipntable_reset_filter(void);
049c96
diff --git a/ip/ipaddress.c b/ip/ipaddress.c
049c96
index 05e0da9..11ff34d 100644
049c96
--- a/ip/ipaddress.c
049c96
+++ b/ip/ipaddress.c
049c96
@@ -290,6 +290,7 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo)
049c96
 {
049c96
 	struct ifla_vf_mac *vf_mac;
049c96
 	struct ifla_vf_vlan *vf_vlan;
049c96
+	struct ifla_vf_rate *vf_rate;
049c96
 	struct ifla_vf_tx_rate *vf_tx_rate;
049c96
 	struct ifla_vf_spoofchk *vf_spoofchk;
049c96
 	struct ifla_vf_link_state *vf_linkstate;
049c96
@@ -307,6 +308,7 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo)
049c96
 	vf_mac = RTA_DATA(vf[IFLA_VF_MAC]);
049c96
 	vf_vlan = RTA_DATA(vf[IFLA_VF_VLAN]);
049c96
 	vf_tx_rate = RTA_DATA(vf[IFLA_VF_TX_RATE]);
049c96
+	vf_rate = RTA_DATA(vf[IFLA_VF_RATE]);
049c96
 
049c96
 	/* Check if the spoof checking vf info type is supported by
049c96
 	 * this kernel.
049c96
@@ -342,6 +344,10 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo)
049c96
 		fprintf(fp, ", qos %d", vf_vlan->qos);
049c96
 	if (vf_tx_rate->rate)
049c96
 		fprintf(fp, ", tx rate %d (Mbps)", vf_tx_rate->rate);
049c96
+	if (vf_rate->max_tx_rate)
049c96
+		fprintf(fp, ", max_tx_rate %dMbps", vf_rate->max_tx_rate);
049c96
+	if (vf_rate->min_tx_rate)
049c96
+		fprintf(fp, ", min_tx_rate %dMbps", vf_rate->min_tx_rate);
049c96
 	if (vf_spoofchk && vf_spoofchk->setting != -1) {
049c96
 		if (vf_spoofchk->setting)
049c96
 			fprintf(fp, ", spoof checking on");
049c96
@@ -1450,6 +1456,63 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action)
049c96
 	return 0;
049c96
 }
049c96
 
049c96
+static void
049c96
+ipaddr_loop_each_vf(struct rtattr *tb[], int vfnum, int *min, int *max)
049c96
+{
049c96
+	struct rtattr *vflist = tb[IFLA_VFINFO_LIST];
049c96
+	struct rtattr *i, *vf[IFLA_VF_MAX+1];
049c96
+	struct ifla_vf_rate *vf_rate;
049c96
+	int rem;
049c96
+
049c96
+	rem = RTA_PAYLOAD(vflist);
049c96
+
049c96
+	for (i = RTA_DATA(vflist); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
049c96
+		parse_rtattr_nested(vf, IFLA_VF_MAX, i);
049c96
+		vf_rate = RTA_DATA(vf[IFLA_VF_RATE]);
049c96
+		if (vf_rate->vf == vfnum) {
049c96
+			*min = vf_rate->min_tx_rate;
049c96
+			*max = vf_rate->max_tx_rate;
049c96
+			return;
049c96
+		}
049c96
+	}
049c96
+	fprintf(stderr, "Cannot find VF %d\n", vfnum);
049c96
+	exit(1);
049c96
+}
049c96
+
049c96
+void ipaddr_get_vf_rate(int vfnum, int *min, int *max, int idx)
049c96
+{
049c96
+	struct nlmsg_chain linfo = { NULL, NULL};
049c96
+	struct rtattr *tb[IFLA_MAX+1];
049c96
+	struct ifinfomsg *ifi;
049c96
+	struct nlmsg_list *l;
049c96
+	struct nlmsghdr *n;
049c96
+	int len;
049c96
+
049c96
+	if (rtnl_wilddump_request(&rth, AF_UNSPEC, RTM_GETLINK) < 0) {
049c96
+		perror("Cannot send dump request");
049c96
+		exit(1);
049c96
+	}
049c96
+	if (rtnl_dump_filter(&rth, store_nlmsg, &linfo) < 0) {
049c96
+		fprintf(stderr, "Dump terminated\n");
049c96
+		exit(1);
049c96
+	}
049c96
+	for (l = linfo.head; l; l = l->next) {
049c96
+		n = &l->h;
049c96
+		ifi = NLMSG_DATA(n);
049c96
+
049c96
+		len = n->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
049c96
+		if (len < 0 || idx && idx != ifi->ifi_index)
049c96
+			continue;
049c96
+
049c96
+		parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len);
049c96
+
049c96
+		if ((tb[IFLA_VFINFO_LIST] && tb[IFLA_NUM_VF])) {
049c96
+			ipaddr_loop_each_vf(tb, vfnum, min, max);
049c96
+			return;
049c96
+		}
049c96
+	}
049c96
+}
049c96
+
049c96
 int ipaddr_list_link(int argc, char **argv)
049c96
 {
049c96
 	preferred_family = AF_PACKET;
049c96
diff --git a/ip/iplink.c b/ip/iplink.c
049c96
index 0098443..bce5ba0 100644
049c96
--- a/ip/iplink.c
049c96
+++ b/ip/iplink.c
049c96
@@ -233,16 +233,40 @@ struct iplink_req {
049c96
 };
049c96
 
049c96
 static int iplink_parse_vf(int vf, int *argcp, char ***argvp,
049c96
-			   struct iplink_req *req)
049c96
+			   struct iplink_req *req, int dev_index)
049c96
 {
049c96
+	char new_rate_api = 0, count = 0, override_legacy_rate = 0;
049c96
+	struct ifla_vf_rate tivt;
049c96
 	int len, argc = *argcp;
049c96
 	char **argv = *argvp;
049c96
 	struct rtattr *vfinfo;
049c96
 
049c96
+	tivt.min_tx_rate = -1;
049c96
+	tivt.max_tx_rate = -1;
049c96
+
049c96
 	vfinfo = addattr_nest(&req->n, sizeof(*req), IFLA_VF_INFO);
049c96
 
049c96
 	while (NEXT_ARG_OK()) {
049c96
 		NEXT_ARG();
049c96
+		count++;
049c96
+		if (!matches(*argv, "max_tx_rate")) {
049c96
+			/* new API in use */
049c96
+			new_rate_api = 1;
049c96
+			/* override legacy rate */
049c96
+			override_legacy_rate = 1;
049c96
+		} else if (!matches(*argv, "min_tx_rate")) {
049c96
+			/* new API in use */
049c96
+			new_rate_api = 1;
049c96
+		}
049c96
+	}
049c96
+
049c96
+	while (count--) {
049c96
+		/* rewind arg */
049c96
+		PREV_ARG();
049c96
+	}
049c96
+
049c96
+	while (NEXT_ARG_OK()) {
049c96
+		NEXT_ARG();
049c96
 		if (matches(*argv, "mac") == 0) {
049c96
 			struct ifla_vf_mac ivm;
049c96
 			NEXT_ARG();
049c96
@@ -279,7 +303,25 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp,
049c96
 				invarg("Invalid \"rate\" value\n", *argv);
049c96
 			}
049c96
 			ivt.vf = vf;
049c96
-			addattr_l(&req->n, sizeof(*req), IFLA_VF_TX_RATE, &ivt, sizeof(ivt));
049c96
+			if (!new_rate_api)
049c96
+				addattr_l(&req->n, sizeof(*req),
049c96
+					  IFLA_VF_TX_RATE, &ivt, sizeof(ivt));
049c96
+			else if (!override_legacy_rate)
049c96
+				tivt.max_tx_rate = ivt.rate;
049c96
+
049c96
+		} else if (matches(*argv, "max_tx_rate") == 0) {
049c96
+			NEXT_ARG();
049c96
+			if (get_unsigned(&tivt.max_tx_rate, *argv, 0))
049c96
+				invarg("Invalid \"max tx rate\" value\n",
049c96
+				       *argv);
049c96
+			tivt.vf = vf;
049c96
+
049c96
+		} else if (matches(*argv, "min_tx_rate") == 0) {
049c96
+			NEXT_ARG();
049c96
+			if (get_unsigned(&tivt.min_tx_rate, *argv, 0))
049c96
+				invarg("Invalid \"min tx rate\" value\n",
049c96
+				       *argv);
049c96
+			tivt.vf = vf;
049c96
 
049c96
 		} else if (matches(*argv, "spoofchk") == 0) {
049c96
 			struct ifla_vf_spoofchk ivs;
049c96
@@ -337,6 +379,19 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp,
049c96
 		}
049c96
 	}
049c96
 
049c96
+	if (new_rate_api) {
049c96
+		int tmin, tmax;
049c96
+		if (tivt.min_tx_rate == -1 || tivt.max_tx_rate == -1) {
049c96
+			ipaddr_get_vf_rate(tivt.vf, &tmin, &tmax, dev_index);
049c96
+			if (tivt.min_tx_rate == -1)
049c96
+				tivt.min_tx_rate = tmin;
049c96
+			if (tivt.max_tx_rate == -1)
049c96
+				tivt.max_tx_rate = tmax;
049c96
+		}
049c96
+		addattr_l(&req->n, sizeof(*req), IFLA_VF_RATE, &tivt,
049c96
+			  sizeof(tivt));
049c96
+	}
049c96
+
049c96
 	if (argc == *argcp)
049c96
 		incomplete_command();
049c96
 
049c96
@@ -358,6 +413,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req,
049c96
 	int vf = -1;
049c96
 	int numtxqueues = -1;
049c96
 	int numrxqueues = -1;
049c96
+	int dev_index;
049c96
 	int link_netnsid = -1;
049c96
 
049c96
 	*group = -1;
049c96
@@ -468,7 +524,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req,
049c96
 			}
049c96
 			vflist = addattr_nest(&req->n, sizeof(*req),
049c96
 					      IFLA_VFINFO_LIST);
049c96
-			len = iplink_parse_vf(vf, &argc, &argv, req);
049c96
+			len = iplink_parse_vf(vf, &argc, &argv, req, dev_index);
049c96
 			if (len < 0)
049c96
 				return -1;
049c96
 			addattr_nest_end(&req->n, vflist);
049c96
@@ -570,6 +626,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req,
049c96
 			if (*dev)
049c96
 				duparg2("dev", *argv);
049c96
 			*dev = *argv;
049c96
+			dev_index = ll_name_to_index(*dev);
049c96
 		}
049c96
 		argc--; argv++;
049c96
 	}
049c96
diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in
049c96
index 33172e5..741922f 100644
049c96
--- a/man/man8/ip-link.8.in
049c96
+++ b/man/man8/ip-link.8.in
049c96
@@ -106,6 +106,12 @@ ip-link \- network device configuration
049c96
 .RB "[ " rate
049c96
 .IR TXRATE " ]"
049c96
 .br
049c96
+.RB "[ " max_tx_rate
049c96
+.IR TXRATE " ]"
049c96
+.br
049c96
+.RB "[ " min_tx_rate
049c96
+.IR TXRATE " ]"
049c96
+.br
049c96
 .RB "[ " spoofchk " { " on " | " off " } ]"
049c96
 .br
049c96
 .RB "[ " query_rss " { " on " | " off " } ]"
049c96
@@ -1021,8 +1027,24 @@ as 0 disables VLAN tagging and filtering for the VF.
049c96
 
049c96
 .sp
049c96
 .BI rate " TXRATE"
049c96
-- change the allowed transmit bandwidth, in Mbps, for the specified VF.
049c96
-Setting this parameter to 0 disables rate limiting. The
049c96
+-- change the allowed transmit bandwidth, in Mbps, for the specified VF.
049c96
+Setting this parameter to 0 disables rate limiting.
049c96
+.B vf
049c96
+parameter must be specified.
049c96
+Please use new API
049c96
+.B "max_tx_rate"
049c96
+option instead.
049c96
+
049c96
+.sp
049c96
+.BI max_tx_rate " TXRATE"
049c96
+- change the allowed maximum transmit bandwidth, in Mbps, for the specified VF.
049c96
+.B vf
049c96
+parameter must be specified.
049c96
+
049c96
+.sp
049c96
+.BI min_tx_rate " TXRATE"
049c96
+- change the allowed minimum transmit bandwidth, in Mbps, for the specified VF.
049c96
+Minimum TXRATE should be always <= Maximum TXRATE.
049c96
 .B vf
049c96
 parameter must be specified.
049c96
 
049c96
-- 
049c96
1.8.3.1
049c96