Blame SOURCES/iproute2-3.10.0-pkt_sched-fq-Fair-Queue-packet-scheduler.patch

a4b897
From 837c38c3365b63ba486d0b0eb8a963621d8f0ac2 Mon Sep 17 00:00:00 2001
a4b897
From: Eric Dumazet <edumazet@google.com>
a4b897
Date: Thu, 29 Aug 2013 19:30:36 -0700
a4b897
Subject: [PATCH 1/2] pkt_sched: fq: Fair Queue packet scheduler
a4b897
a4b897
Support for FQ packet scheduler
a4b897
a4b897
$ tc qd add dev eth0 root fq help
a4b897
Usage: ... fq [ limit PACKETS ] [ flow_limit PACKETS ]
a4b897
              [ quantum BYTES ] [ initial_quantum BYTES ]
a4b897
              [ maxrate RATE  ] [ buckets NUMBER ]
a4b897
              [ [no]pacing ]
a4b897
a4b897
$ tc -s -d qd
a4b897
qdisc fq 8002: dev eth0 root refcnt 32 limit 10000p flow_limit 100p
a4b897
buckets 256 quantum 3028 initial_quantum 15140
a4b897
 Sent 216532416 bytes 148395 pkt (dropped 0, overlimits 0 requeues 14)
a4b897
 backlog 0b 0p requeues 14
a4b897
  511 flows (511 inactive, 0 throttled)
a4b897
  110 gc, 0 highprio, 0 retrans, 1143 throttled, 0 flows_plimit
a4b897
a4b897
limit	: max number of packets on whole Qdisc (default 10000)
a4b897
a4b897
flow_limit : max number of packets per flow (default 100)
a4b897
a4b897
quantum : the max deficit per RR round (default is 2 MTU)
a4b897
a4b897
initial_quantum : initial credit for new flows (default is 10 MTU)
a4b897
a4b897
maxrate : max per flow rate (default : unlimited)
a4b897
a4b897
buckets : number of RB trees (default : 1024) in hash table.
a4b897
               (consumes 8 bytes per bucket)
a4b897
a4b897
[no]pacing : disable/enable pacing (default is enable)
a4b897
a4b897
Usage :
a4b897
a4b897
tc qdisc add dev $ETH root fq
a4b897
a4b897
tc qdisc del dev $ETH root 2>/dev/null
a4b897
tc qdisc add dev $ETH root handle 1: mq
a4b897
for i in `seq 1 4`
a4b897
do
a4b897
  tc qdisc add dev $ETH parent 1:$i est 1sec 4sec fq
a4b897
done
a4b897
a4b897
Signed-off-by: Eric Dumazet <edumazet@google.com>
a4b897
---
a4b897
 tc/Makefile |   1 +
a4b897
 tc/q_fq.c   | 279 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
a4b897
 2 files changed, 280 insertions(+)
a4b897
 create mode 100644 tc/q_fq.c
a4b897
a4b897
diff --git a/tc/Makefile b/tc/Makefile
a4b897
index af6a277..79116f3 100644
a4b897
--- a/tc/Makefile
a4b897
+++ b/tc/Makefile
a4b897
@@ -56,6 +56,7 @@ TCMODULES += em_meta.o
a4b897
 TCMODULES += q_mqprio.o
a4b897
 TCMODULES += q_codel.o
a4b897
 TCMODULES += q_fq_codel.o
a4b897
+TCMODULES += q_fq.o
a4b897
 
a4b897
 ifeq ($(TC_CONFIG_IPSET), y)
a4b897
   ifeq ($(TC_CONFIG_XT), y)
a4b897
diff --git a/tc/q_fq.c b/tc/q_fq.c
a4b897
new file mode 100644
a4b897
index 0000000..c1f658e
a4b897
--- /dev/null
a4b897
+++ b/tc/q_fq.c
a4b897
@@ -0,0 +1,279 @@
a4b897
+/*
a4b897
+ * Fair Queue
a4b897
+ *
a4b897
+ *  Copyright (C) 2013 Eric Dumazet <edumazet@google.com>
a4b897
+ *
a4b897
+ * Redistribution and use in source and binary forms, with or without
a4b897
+ * modification, are permitted provided that the following conditions
a4b897
+ * are met:
a4b897
+ * 1. Redistributions of source code must retain the above copyright
a4b897
+ *    notice, this list of conditions, and the following disclaimer,
a4b897
+ *    without modification.
a4b897
+ * 2. Redistributions in binary form must reproduce the above copyright
a4b897
+ *    notice, this list of conditions and the following disclaimer in the
a4b897
+ *    documentation and/or other materials provided with the distribution.
a4b897
+ * 3. The names of the authors may not be used to endorse or promote products
a4b897
+ *    derived from this software without specific prior written permission.
a4b897
+ *
a4b897
+ * Alternatively, provided that this notice is retained in full, this
a4b897
+ * software may be distributed under the terms of the GNU General
a4b897
+ * Public License ("GPL") version 2, in which case the provisions of the
a4b897
+ * GPL apply INSTEAD OF those given above.
a4b897
+ *
a4b897
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
a4b897
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
a4b897
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
a4b897
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
a4b897
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
a4b897
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
a4b897
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
a4b897
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
a4b897
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
a4b897
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
a4b897
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
a4b897
+ * DAMAGE.
a4b897
+ *
a4b897
+ */
a4b897
+
a4b897
+#include <stdio.h>
a4b897
+#include <stdlib.h>
a4b897
+#include <unistd.h>
a4b897
+#include <syslog.h>
a4b897
+#include <fcntl.h>
a4b897
+#include <sys/socket.h>
a4b897
+#include <netinet/in.h>
a4b897
+#include <arpa/inet.h>
a4b897
+#include <string.h>
a4b897
+
a4b897
+#include "utils.h"
a4b897
+#include "tc_util.h"
a4b897
+
a4b897
+static void explain(void)
a4b897
+{
a4b897
+	fprintf(stderr, "Usage: ... fq [ limit PACKETS ] [ flow_limit PACKETS ]\n");
a4b897
+	fprintf(stderr, "              [ quantum BYTES ] [ initial_quantum BYTES ]\n");
a4b897
+	fprintf(stderr, "              [ maxrate RATE  ] [ buckets NUMBER ]\n");
a4b897
+	fprintf(stderr, "              [ [no]pacing ]\n");
a4b897
+}
a4b897
+
a4b897
+static unsigned int ilog2(unsigned int val)
a4b897
+{
a4b897
+	unsigned int res = 0;
a4b897
+
a4b897
+	val--;
a4b897
+	while (val) {
a4b897
+		res++;
a4b897
+		val >>= 1;
a4b897
+	}
a4b897
+	return res;
a4b897
+}
a4b897
+
a4b897
+static int fq_parse_opt(struct qdisc_util *qu, int argc, char **argv,
a4b897
+			struct nlmsghdr *n)
a4b897
+{
a4b897
+	unsigned int plimit = ~0U;
a4b897
+	unsigned int flow_plimit = ~0U;
a4b897
+	unsigned int quantum = ~0U;
a4b897
+	unsigned int initial_quantum = ~0U;
a4b897
+	unsigned int buckets = 0;
a4b897
+	unsigned int maxrate = ~0U;
a4b897
+	unsigned int defrate = ~0U;
a4b897
+	int pacing = -1;
a4b897
+	struct rtattr *tail;
a4b897
+
a4b897
+	while (argc > 0) {
a4b897
+		if (strcmp(*argv, "limit") == 0) {
a4b897
+			NEXT_ARG();
a4b897
+			if (get_unsigned(&plimit, *argv, 0)) {
a4b897
+				fprintf(stderr, "Illegal \"limit\"\n");
a4b897
+				return -1;
a4b897
+			}
a4b897
+		} else if (strcmp(*argv, "flow_limit") == 0) {
a4b897
+			NEXT_ARG();
a4b897
+			if (get_unsigned(&flow_plimit, *argv, 0)) {
a4b897
+				fprintf(stderr, "Illegal \"flow_limit\"\n");
a4b897
+				return -1;
a4b897
+			}
a4b897
+		} else if (strcmp(*argv, "buckets") == 0) {
a4b897
+			NEXT_ARG();
a4b897
+			if (get_unsigned(&buckets, *argv, 0)) {
a4b897
+				fprintf(stderr, "Illegal \"buckets\"\n");
a4b897
+				return -1;
a4b897
+			}
a4b897
+		} else if (strcmp(*argv, "maxrate") == 0) {
a4b897
+			NEXT_ARG();
a4b897
+			if (get_rate(&maxrate, *argv)) {
a4b897
+				fprintf(stderr, "Illegal \"maxrate\"\n");
a4b897
+				return -1;
a4b897
+			}
a4b897
+		} else if (strcmp(*argv, "defrate") == 0) {
a4b897
+			NEXT_ARG();
a4b897
+			if (get_rate(&defrate, *argv)) {
a4b897
+				fprintf(stderr, "Illegal \"defrate\"\n");
a4b897
+				return -1;
a4b897
+			}
a4b897
+		} else if (strcmp(*argv, "quantum") == 0) {
a4b897
+			NEXT_ARG();
a4b897
+			if (get_unsigned(&quantum, *argv, 0)) {
a4b897
+				fprintf(stderr, "Illegal \"quantum\"\n");
a4b897
+				return -1;
a4b897
+			}
a4b897
+		} else if (strcmp(*argv, "initial_quantum") == 0) {
a4b897
+			NEXT_ARG();
a4b897
+			if (get_unsigned(&initial_quantum, *argv, 0)) {
a4b897
+				fprintf(stderr, "Illegal \"initial_quantum\"\n");
a4b897
+				return -1;
a4b897
+			}
a4b897
+		} else if (strcmp(*argv, "pacing") == 0) {
a4b897
+			pacing = 1;
a4b897
+		} else if (strcmp(*argv, "nopacing") == 0) {
a4b897
+			pacing = 0;
a4b897
+		} else if (strcmp(*argv, "help") == 0) {
a4b897
+			explain();
a4b897
+			return -1;
a4b897
+		} else {
a4b897
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
a4b897
+			explain();
a4b897
+			return -1;
a4b897
+		}
a4b897
+		argc--; argv++;
a4b897
+	}
a4b897
+
a4b897
+	tail = NLMSG_TAIL(n);
a4b897
+	addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
a4b897
+	if (buckets) {
a4b897
+		unsigned int log = ilog2(buckets);
a4b897
+
a4b897
+		addattr_l(n, 1024, TCA_FQ_BUCKETS_LOG,
a4b897
+			  &log, sizeof(log));
a4b897
+	}
a4b897
+	if (plimit != ~0U)
a4b897
+		addattr_l(n, 1024, TCA_FQ_PLIMIT,
a4b897
+			  &plimit, sizeof(plimit));
a4b897
+	if (flow_plimit != ~0U)
a4b897
+		addattr_l(n, 1024, TCA_FQ_FLOW_PLIMIT,
a4b897
+			  &flow_plimit, sizeof(flow_plimit));
a4b897
+	if (quantum != ~0U)
a4b897
+		addattr_l(n, 1024, TCA_FQ_QUANTUM, &quantum, sizeof(quantum));
a4b897
+	if (initial_quantum != ~0U)
a4b897
+		addattr_l(n, 1024, TCA_FQ_INITIAL_QUANTUM,
a4b897
+			  &initial_quantum, sizeof(initial_quantum));
a4b897
+	if (pacing != -1)
a4b897
+		addattr_l(n, 1024, TCA_FQ_RATE_ENABLE,
a4b897
+			  &pacing, sizeof(pacing));
a4b897
+	if (maxrate != ~0U)
a4b897
+		addattr_l(n, 1024, TCA_FQ_FLOW_MAX_RATE,
a4b897
+			  &maxrate, sizeof(maxrate));
a4b897
+	if (defrate != ~0U)
a4b897
+		addattr_l(n, 1024, TCA_FQ_FLOW_DEFAULT_RATE,
a4b897
+			  &defrate, sizeof(defrate));
a4b897
+	tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail;
a4b897
+	return 0;
a4b897
+}
a4b897
+
a4b897
+static int fq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
a4b897
+{
a4b897
+	struct rtattr *tb[TCA_FQ_MAX + 1];
a4b897
+	unsigned int plimit, flow_plimit;
a4b897
+	unsigned int buckets_log;
a4b897
+	int pacing;
a4b897
+	unsigned int rate, quantum;
a4b897
+	SPRINT_BUF(b1);
a4b897
+
a4b897
+	if (opt == NULL)
a4b897
+		return 0;
a4b897
+
a4b897
+	parse_rtattr_nested(tb, TCA_FQ_MAX, opt);
a4b897
+
a4b897
+	if (tb[TCA_FQ_PLIMIT] &&
a4b897
+	    RTA_PAYLOAD(tb[TCA_FQ_PLIMIT]) >= sizeof(__u32)) {
a4b897
+		plimit = rta_getattr_u32(tb[TCA_FQ_PLIMIT]);
a4b897
+		fprintf(f, "limit %up ", plimit);
a4b897
+	}
a4b897
+	if (tb[TCA_FQ_FLOW_PLIMIT] &&
a4b897
+	    RTA_PAYLOAD(tb[TCA_FQ_FLOW_PLIMIT]) >= sizeof(__u32)) {
a4b897
+		flow_plimit = rta_getattr_u32(tb[TCA_FQ_FLOW_PLIMIT]);
a4b897
+		fprintf(f, "flow_limit %up ", flow_plimit);
a4b897
+	}
a4b897
+	if (tb[TCA_FQ_BUCKETS_LOG] &&
a4b897
+	    RTA_PAYLOAD(tb[TCA_FQ_BUCKETS_LOG]) >= sizeof(__u32)) {
a4b897
+		buckets_log = rta_getattr_u32(tb[TCA_FQ_BUCKETS_LOG]);
a4b897
+		fprintf(f, "buckets %u ", 1U << buckets_log);
a4b897
+	}
a4b897
+	if (tb[TCA_FQ_RATE_ENABLE] &&
a4b897
+	    RTA_PAYLOAD(tb[TCA_FQ_RATE_ENABLE]) >= sizeof(int)) {
a4b897
+		pacing = rta_getattr_u32(tb[TCA_FQ_RATE_ENABLE]);
a4b897
+		if (pacing == 0)
a4b897
+			fprintf(f, "nopacing ");
a4b897
+	}
a4b897
+	if (tb[TCA_FQ_QUANTUM] &&
a4b897
+	    RTA_PAYLOAD(tb[TCA_FQ_QUANTUM]) >= sizeof(__u32)) {
a4b897
+		quantum = rta_getattr_u32(tb[TCA_FQ_QUANTUM]);
a4b897
+		fprintf(f, "quantum %u ", quantum);
a4b897
+	}
a4b897
+	if (tb[TCA_FQ_INITIAL_QUANTUM] &&
a4b897
+	    RTA_PAYLOAD(tb[TCA_FQ_INITIAL_QUANTUM]) >= sizeof(__u32)) {
a4b897
+		quantum = rta_getattr_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
a4b897
+		fprintf(f, "initial_quantum %u ", quantum);
a4b897
+	}
a4b897
+	if (tb[TCA_FQ_FLOW_MAX_RATE] &&
a4b897
+	    RTA_PAYLOAD(tb[TCA_FQ_FLOW_MAX_RATE]) >= sizeof(__u32)) {
a4b897
+		rate = rta_getattr_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
a4b897
+
a4b897
+		if (rate != ~0U)
a4b897
+			fprintf(f, "maxrate %s ", sprint_rate(rate, b1));
a4b897
+	}
a4b897
+	if (tb[TCA_FQ_FLOW_DEFAULT_RATE] &&
a4b897
+	    RTA_PAYLOAD(tb[TCA_FQ_FLOW_DEFAULT_RATE]) >= sizeof(__u32)) {
a4b897
+		rate = rta_getattr_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]);
a4b897
+
a4b897
+		if (rate != 0)
a4b897
+			fprintf(f, "defrate %s ", sprint_rate(rate, b1));
a4b897
+	}
a4b897
+
a4b897
+	return 0;
a4b897
+}
a4b897
+
a4b897
+static int fq_print_xstats(struct qdisc_util *qu, FILE *f,
a4b897
+			   struct rtattr *xstats)
a4b897
+{
a4b897
+	struct tc_fq_qd_stats *st;
a4b897
+
a4b897
+	if (xstats == NULL)
a4b897
+		return 0;
a4b897
+
a4b897
+	if (RTA_PAYLOAD(xstats) < sizeof(*st))
a4b897
+		return -1;
a4b897
+
a4b897
+	st = RTA_DATA(xstats);
a4b897
+
a4b897
+	fprintf(f, "  %u flows (%u inactive, %u throttled)",
a4b897
+		st->flows, st->inactive_flows, st->throttled_flows);
a4b897
+
a4b897
+	if (st->time_next_delayed_flow > 0)
a4b897
+		fprintf(f, ", next packet delay %llu ns", st->time_next_delayed_flow);
a4b897
+
a4b897
+	fprintf(f, "\n  %llu gc, %llu highprio",
a4b897
+		st->gc_flows, st->highprio_packets);
a4b897
+
a4b897
+	if (st->tcp_retrans)
a4b897
+		fprintf(f, ", %llu retrans", st->tcp_retrans);
a4b897
+
a4b897
+	fprintf(f, ", %llu throttled", st->throttled);
a4b897
+
a4b897
+	if (st->flows_plimit)
a4b897
+		fprintf(f, ", %llu flows_plimit", st->flows_plimit);
a4b897
+
a4b897
+	if (st->pkts_too_long || st->allocation_errors)
a4b897
+		fprintf(f, "\n  %llu too long pkts, %llu alloc errors\n",
a4b897
+			st->pkts_too_long, st->allocation_errors);
a4b897
+
a4b897
+	return 0;
a4b897
+}
a4b897
+
a4b897
+struct qdisc_util fq_qdisc_util = {
a4b897
+	.id		= "fq",
a4b897
+	.parse_qopt	= fq_parse_opt,
a4b897
+	.print_qopt	= fq_print_opt,
a4b897
+	.print_xstats	= fq_print_xstats,
a4b897
+};
a4b897
-- 
a4b897
1.8.3.1
a4b897