blob: 13322e8a045643bf620b408453b42157b0b48659 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_sfq.c Stochastic Fairness Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 */
11
Linus Torvalds1da177e2005-04-16 15:20:36 -070012#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/types.h>
14#include <linux/kernel.h>
15#include <linux/jiffies.h>
16#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/in.h>
18#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/init.h>
Patrick McHardy0ba48052007-07-02 22:49:07 -070020#include <linux/ipv6.h>
21#include <linux/skbuff.h>
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -070022#include <linux/jhash.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090023#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <net/ip.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070025#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026#include <net/pkt_sched.h>
27
28
29/* Stochastic Fairness Queuing algorithm.
30 =======================================
31
32 Source:
33 Paul E. McKenney "Stochastic Fairness Queuing",
34 IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
35
36 Paul E. McKenney "Stochastic Fairness Queuing",
37 "Interworking: Research and Experience", v.2, 1991, p.113-131.
38
39
40 See also:
41 M. Shreedhar and George Varghese "Efficient Fair
42 Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
43
44
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090045 This is not the thing that is usually called (W)FQ nowadays.
Linus Torvalds1da177e2005-04-16 15:20:36 -070046 It does not use any timestamp mechanism, but instead
47 processes queues in round-robin order.
48
49 ADVANTAGE:
50
51 - It is very cheap. Both CPU and memory requirements are minimal.
52
53 DRAWBACKS:
54
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090055 - "Stochastic" -> It is not 100% fair.
Linus Torvalds1da177e2005-04-16 15:20:36 -070056 When hash collisions occur, several flows are considered as one.
57
58 - "Round-robin" -> It introduces larger delays than virtual clock
59 based schemes, and should not be used for isolating interactive
60 traffic from non-interactive. It means, that this scheduler
61 should be used as leaf of CBQ or P3, which put interactive traffic
62 to higher priority band.
63
64 We still need true WFQ for top level CSZ, but using WFQ
65 for the best effort traffic is absolutely pointless:
66 SFQ is superior for this purpose.
67
68 IMPLEMENTATION:
69 This implementation limits maximal queue length to 128;
Eric Dumazeteda83e32010-12-20 12:54:58 +000070 maximal mtu to 2^15-1; max 128 flows, number of hash buckets to 1024.
Linus Torvalds1da177e2005-04-16 15:20:36 -070071 The only goal of this restrictions was that all data
Eric Dumazeteda83e32010-12-20 12:54:58 +000072 fit into one 4K page on 32bit arches.
Linus Torvalds1da177e2005-04-16 15:20:36 -070073
74 It is easy to increase these values, but not in flight. */
75
Eric Dumazeteda83e32010-12-20 12:54:58 +000076#define SFQ_DEPTH 128 /* max number of packets per flow */
77#define SFQ_SLOTS 128 /* max number of flows */
78#define SFQ_EMPTY_SLOT 255
Linus Torvalds1da177e2005-04-16 15:20:36 -070079#define SFQ_HASH_DIVISOR 1024
80
Eric Dumazeteda83e32010-12-20 12:54:58 +000081/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
Linus Torvalds1da177e2005-04-16 15:20:36 -070082typedef unsigned char sfq_index;
83
Eric Dumazeteda83e32010-12-20 12:54:58 +000084/*
85 * We dont use pointers to save space.
86 * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
87 * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
88 * are 'pointers' to dep[] array
89 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070090struct sfq_head
91{
92 sfq_index next;
93 sfq_index prev;
94};
95
Eric Dumazeteda83e32010-12-20 12:54:58 +000096struct sfq_slot {
97 struct sk_buff *skblist_next;
98 struct sk_buff *skblist_prev;
99 sfq_index qlen; /* number of skbs in skblist */
100 sfq_index next; /* next slot in sfq chain */
101 struct sfq_head dep; /* anchor in dep[] chains */
102 unsigned short hash; /* hash value (index in ht[]) */
103 short allot; /* credit for this slot */
104};
105
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106struct sfq_sched_data
107{
108/* Parameters */
109 int perturb_period;
110 unsigned quantum; /* Allotment per round: MUST BE >= MTU */
111 int limit;
112
113/* Variables */
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800114 struct tcf_proto *filter_list;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 struct timer_list perturb_timer;
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700116 u32 perturbation;
Eric Dumazeteda83e32010-12-20 12:54:58 +0000117 sfq_index cur_depth; /* depth of longest slot */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118
Eric Dumazeteda83e32010-12-20 12:54:58 +0000119 struct sfq_slot *tail; /* current slot in round */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120 sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */
Eric Dumazeteda83e32010-12-20 12:54:58 +0000121 struct sfq_slot slots[SFQ_SLOTS];
122 struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123};
124
Eric Dumazeteda83e32010-12-20 12:54:58 +0000125/*
126 * sfq_head are either in a sfq_slot or in dep[] array
127 */
128static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
129{
130 if (val < SFQ_SLOTS)
131 return &q->slots[val].dep;
132 return &q->dep[val - SFQ_SLOTS];
133}
134
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
136{
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700137 return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138}
139
140static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
141{
142 u32 h, h2;
143
144 switch (skb->protocol) {
Arnaldo Carvalho de Melo60678042008-09-20 22:20:49 -0700145 case htons(ETH_P_IP):
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 {
Changli Gaof2f00982010-08-04 04:58:59 +0000147 const struct iphdr *iph;
Changli Gaob9959c22010-08-17 19:07:35 +0000148 int poff;
Changli Gaof2f00982010-08-04 04:58:59 +0000149
150 if (!pskb_network_may_pull(skb, sizeof(*iph)))
151 goto err;
152 iph = ip_hdr(skb);
Eric Dumazet0eae88f2010-04-20 19:06:52 -0700153 h = (__force u32)iph->daddr;
154 h2 = (__force u32)iph->saddr ^ iph->protocol;
Changli Gaob9959c22010-08-17 19:07:35 +0000155 if (iph->frag_off & htons(IP_MF|IP_OFFSET))
156 break;
157 poff = proto_ports_offset(iph->protocol);
158 if (poff >= 0 &&
159 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
160 iph = ip_hdr(skb);
161 h2 ^= *(u32*)((void *)iph + iph->ihl * 4 + poff);
162 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 break;
164 }
Arnaldo Carvalho de Melo60678042008-09-20 22:20:49 -0700165 case htons(ETH_P_IPV6):
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 {
Changli Gaof2f00982010-08-04 04:58:59 +0000167 struct ipv6hdr *iph;
Changli Gaob9959c22010-08-17 19:07:35 +0000168 int poff;
Changli Gaof2f00982010-08-04 04:58:59 +0000169
170 if (!pskb_network_may_pull(skb, sizeof(*iph)))
171 goto err;
172 iph = ipv6_hdr(skb);
Eric Dumazet0eae88f2010-04-20 19:06:52 -0700173 h = (__force u32)iph->daddr.s6_addr32[3];
174 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
Changli Gaob9959c22010-08-17 19:07:35 +0000175 poff = proto_ports_offset(iph->nexthdr);
176 if (poff >= 0 &&
177 pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
178 iph = ipv6_hdr(skb);
179 h2 ^= *(u32*)((void *)iph + sizeof(*iph) + poff);
180 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 break;
182 }
183 default:
Changli Gaof2f00982010-08-04 04:58:59 +0000184err:
Eric Dumazet0eae88f2010-04-20 19:06:52 -0700185 h = (unsigned long)skb_dst(skb) ^ (__force u32)skb->protocol;
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800186 h2 = (unsigned long)skb->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 }
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800188
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 return sfq_fold_hash(q, h, h2);
190}
191
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800192static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
193 int *qerr)
194{
195 struct sfq_sched_data *q = qdisc_priv(sch);
196 struct tcf_result res;
197 int result;
198
199 if (TC_H_MAJ(skb->priority) == sch->handle &&
200 TC_H_MIN(skb->priority) > 0 &&
201 TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR)
202 return TC_H_MIN(skb->priority);
203
204 if (!q->filter_list)
205 return sfq_hash(q, skb) + 1;
206
Jarek Poplawskic27f3392008-08-04 22:39:11 -0700207 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800208 result = tc_classify(skb, q->filter_list, &res);
209 if (result >= 0) {
210#ifdef CONFIG_NET_CLS_ACT
211 switch (result) {
212 case TC_ACT_STOLEN:
213 case TC_ACT_QUEUED:
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700214 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800215 case TC_ACT_SHOT:
216 return 0;
217 }
218#endif
219 if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR)
220 return TC_H_MIN(res.classid);
221 }
222 return 0;
223}
224
Eric Dumazeteda83e32010-12-20 12:54:58 +0000225/*
226 * x : slot number [0 .. SFQ_SLOTS - 1]
227 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
229{
230 sfq_index p, n;
Eric Dumazeteda83e32010-12-20 12:54:58 +0000231 int qlen = q->slots[x].qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232
Eric Dumazeteda83e32010-12-20 12:54:58 +0000233 p = qlen + SFQ_SLOTS;
234 n = q->dep[qlen].next;
235
236 q->slots[x].dep.next = n;
237 q->slots[x].dep.prev = p;
238
239 q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */
240 sfq_dep_head(q, n)->prev = x;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241}
242
Eric Dumazeteda83e32010-12-20 12:54:58 +0000243#define sfq_unlink(q, x, n, p) \
244 n = q->slots[x].dep.next; \
245 p = q->slots[x].dep.prev; \
246 sfq_dep_head(q, p)->next = n; \
247 sfq_dep_head(q, n)->prev = p
248
249
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
251{
252 sfq_index p, n;
Eric Dumazeteda83e32010-12-20 12:54:58 +0000253 int d;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254
Eric Dumazeteda83e32010-12-20 12:54:58 +0000255 sfq_unlink(q, x, n, p);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256
Eric Dumazeteda83e32010-12-20 12:54:58 +0000257 d = q->slots[x].qlen--;
258 if (n == p && q->cur_depth == d)
259 q->cur_depth--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 sfq_link(q, x);
261}
262
263static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
264{
265 sfq_index p, n;
266 int d;
267
Eric Dumazeteda83e32010-12-20 12:54:58 +0000268 sfq_unlink(q, x, n, p);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269
Eric Dumazeteda83e32010-12-20 12:54:58 +0000270 d = ++q->slots[x].qlen;
271 if (q->cur_depth < d)
272 q->cur_depth = d;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273 sfq_link(q, x);
274}
275
Eric Dumazeteda83e32010-12-20 12:54:58 +0000276/* helper functions : might be changed when/if skb use a standard list_head */
277
278/* remove one skb from tail of slot queue */
279static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
280{
281 struct sk_buff *skb = slot->skblist_prev;
282
283 slot->skblist_prev = skb->prev;
284 skb->next = skb->prev = NULL;
285 return skb;
286}
287
288/* remove one skb from head of slot queue */
289static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
290{
291 struct sk_buff *skb = slot->skblist_next;
292
293 slot->skblist_next = skb->next;
294 skb->next = skb->prev = NULL;
295 return skb;
296}
297
298static inline void slot_queue_init(struct sfq_slot *slot)
299{
300 slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
301}
302
303/* add skb to slot queue (tail add) */
304static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
305{
306 skb->prev = slot->skblist_prev;
307 skb->next = (struct sk_buff *)slot;
308 slot->skblist_prev->next = skb;
309 slot->skblist_prev = skb;
310}
311
312#define slot_queue_walk(slot, skb) \
313 for (skb = slot->skblist_next; \
314 skb != (struct sk_buff *)slot; \
315 skb = skb->next)
316
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317static unsigned int sfq_drop(struct Qdisc *sch)
318{
319 struct sfq_sched_data *q = qdisc_priv(sch);
Eric Dumazeteda83e32010-12-20 12:54:58 +0000320 sfq_index x, d = q->cur_depth;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321 struct sk_buff *skb;
322 unsigned int len;
Eric Dumazeteda83e32010-12-20 12:54:58 +0000323 struct sfq_slot *slot;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324
Eric Dumazeteda83e32010-12-20 12:54:58 +0000325 /* Queue is full! Find the longest slot and drop tail packet from it */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326 if (d > 1) {
Eric Dumazeteda83e32010-12-20 12:54:58 +0000327 x = q->dep[d].next;
328 slot = &q->slots[x];
329drop:
330 skb = slot_dequeue_tail(slot);
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700331 len = qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332 sfq_dec(q, x);
Eric Dumazeteda83e32010-12-20 12:54:58 +0000333 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 sch->q.qlen--;
335 sch->qstats.drops++;
Patrick McHardyf5539eb2006-03-20 19:01:38 -0800336 sch->qstats.backlog -= len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 return len;
338 }
339
340 if (d == 1) {
341 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
Eric Dumazeteda83e32010-12-20 12:54:58 +0000342 x = q->tail->next;
343 slot = &q->slots[x];
344 q->tail->next = slot->next;
345 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
346 goto drop;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347 }
348
349 return 0;
350}
351
352static int
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800353sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354{
355 struct sfq_sched_data *q = qdisc_priv(sch);
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800356 unsigned int hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357 sfq_index x;
Eric Dumazeteda83e32010-12-20 12:54:58 +0000358 struct sfq_slot *slot;
Jarek Poplawski7f3ff4f2008-12-21 20:14:48 -0800359 int uninitialized_var(ret);
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800360
361 hash = sfq_classify(skb, sch, &ret);
362 if (hash == 0) {
Jarek Poplawskic27f3392008-08-04 22:39:11 -0700363 if (ret & __NET_XMIT_BYPASS)
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800364 sch->qstats.drops++;
365 kfree_skb(skb);
366 return ret;
367 }
368 hash--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369
370 x = q->ht[hash];
Eric Dumazeteda83e32010-12-20 12:54:58 +0000371 slot = &q->slots[x];
372 if (x == SFQ_EMPTY_SLOT) {
373 x = q->dep[0].next; /* get a free slot */
374 q->ht[hash] = x;
375 slot = &q->slots[x];
376 slot->hash = hash;
377 slot_queue_init(slot);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378 }
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800379
Eric Dumazeteda83e32010-12-20 12:54:58 +0000380 /* If selected queue has length q->limit, do simple tail drop,
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700381 * i.e. drop _this_ packet.
382 */
Eric Dumazeteda83e32010-12-20 12:54:58 +0000383 if (slot->qlen >= q->limit)
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700384 return qdisc_drop(skb, sch);
385
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700386 sch->qstats.backlog += qdisc_pkt_len(skb);
Eric Dumazeteda83e32010-12-20 12:54:58 +0000387 slot_queue_add(slot, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388 sfq_inc(q, x);
Eric Dumazeteda83e32010-12-20 12:54:58 +0000389 if (slot->qlen == 1) { /* The flow is new */
390 if (q->tail == NULL) { /* It is the first flow */
391 slot->next = x;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 } else {
Eric Dumazeteda83e32010-12-20 12:54:58 +0000393 slot->next = q->tail->next;
394 q->tail->next = x;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 }
Eric Dumazeteda83e32010-12-20 12:54:58 +0000396 q->tail = slot;
397 slot->allot = q->quantum;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398 }
Alexey Kuznetsov5588b402007-09-19 10:42:03 -0700399 if (++sch->q.qlen <= q->limit) {
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700400 sch->bstats.bytes += qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 sch->bstats.packets++;
Ben Greear9871e502010-08-10 01:45:40 -0700402 return NET_XMIT_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403 }
404
405 sfq_drop(sch);
406 return NET_XMIT_CN;
407}
408
Patrick McHardy48a8f512008-10-31 00:44:18 -0700409static struct sk_buff *
410sfq_peek(struct Qdisc *sch)
411{
412 struct sfq_sched_data *q = qdisc_priv(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413
Patrick McHardy48a8f512008-10-31 00:44:18 -0700414 /* No active slots */
Eric Dumazeteda83e32010-12-20 12:54:58 +0000415 if (q->tail == NULL)
Patrick McHardy48a8f512008-10-31 00:44:18 -0700416 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417
Eric Dumazeteda83e32010-12-20 12:54:58 +0000418 return q->slots[q->tail->next].skblist_next;
Patrick McHardy48a8f512008-10-31 00:44:18 -0700419}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420
421static struct sk_buff *
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800422sfq_dequeue(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423{
424 struct sfq_sched_data *q = qdisc_priv(sch);
425 struct sk_buff *skb;
Eric Dumazetaa3e2192010-12-20 13:18:16 -0800426 sfq_index a, next_a;
Eric Dumazeteda83e32010-12-20 12:54:58 +0000427 struct sfq_slot *slot;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428
429 /* No active slots */
Eric Dumazeteda83e32010-12-20 12:54:58 +0000430 if (q->tail == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 return NULL;
432
Eric Dumazeteda83e32010-12-20 12:54:58 +0000433 a = q->tail->next;
434 slot = &q->slots[a];
435 skb = slot_dequeue_head(slot);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 sfq_dec(q, a);
437 sch->q.qlen--;
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700438 sch->qstats.backlog -= qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439
440 /* Is the slot empty? */
Eric Dumazeteda83e32010-12-20 12:54:58 +0000441 if (slot->qlen == 0) {
442 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
443 next_a = slot->next;
Eric Dumazetaa3e2192010-12-20 13:18:16 -0800444 if (a == next_a) {
Eric Dumazeteda83e32010-12-20 12:54:58 +0000445 q->tail = NULL; /* no more active slots */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 return skb;
447 }
Eric Dumazeteda83e32010-12-20 12:54:58 +0000448 q->tail->next = next_a;
449 } else if ((slot->allot -= qdisc_pkt_len(skb)) <= 0) {
450 q->tail = slot;
451 slot->allot += q->quantum;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 }
453 return skb;
454}
455
456static void
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800457sfq_reset(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458{
459 struct sk_buff *skb;
460
461 while ((skb = sfq_dequeue(sch)) != NULL)
462 kfree_skb(skb);
463}
464
465static void sfq_perturbation(unsigned long arg)
466{
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800467 struct Qdisc *sch = (struct Qdisc *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 struct sfq_sched_data *q = qdisc_priv(sch);
469
Stephen Hemmingerd46f8dd2008-01-20 17:19:43 -0800470 q->perturbation = net_random();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700472 if (q->perturb_period)
473 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474}
475
Patrick McHardy1e904742008-01-22 22:11:17 -0800476static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700477{
478 struct sfq_sched_data *q = qdisc_priv(sch);
Patrick McHardy1e904742008-01-22 22:11:17 -0800479 struct tc_sfq_qopt *ctl = nla_data(opt);
Patrick McHardy5e50da02006-11-29 17:36:20 -0800480 unsigned int qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481
Patrick McHardy1e904742008-01-22 22:11:17 -0800482 if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 return -EINVAL;
484
485 sch_tree_lock(sch);
David S. Miller5ce2d482008-07-08 17:06:30 -0700486 q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800487 q->perturb_period = ctl->perturb_period * HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488 if (ctl->limit)
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700489 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490
Patrick McHardy5e50da02006-11-29 17:36:20 -0800491 qlen = sch->q.qlen;
Alexey Kuznetsov5588b402007-09-19 10:42:03 -0700492 while (sch->q.qlen > q->limit)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 sfq_drop(sch);
Patrick McHardy5e50da02006-11-29 17:36:20 -0800494 qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495
496 del_timer(&q->perturb_timer);
497 if (q->perturb_period) {
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700498 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
Stephen Hemmingerd46f8dd2008-01-20 17:19:43 -0800499 q->perturbation = net_random();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 }
501 sch_tree_unlock(sch);
502 return 0;
503}
504
Patrick McHardy1e904742008-01-22 22:11:17 -0800505static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506{
507 struct sfq_sched_data *q = qdisc_priv(sch);
508 int i;
509
Stephen Hemmingerd3e99482008-01-20 17:18:45 -0800510 q->perturb_timer.function = sfq_perturbation;
Fernando Carrijoc19a28e2009-01-07 18:09:08 -0800511 q->perturb_timer.data = (unsigned long)sch;
Stephen Hemmingerd3e99482008-01-20 17:18:45 -0800512 init_timer_deferrable(&q->perturb_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800514 for (i = 0; i < SFQ_HASH_DIVISOR; i++)
Eric Dumazeteda83e32010-12-20 12:54:58 +0000515 q->ht[i] = SFQ_EMPTY_SLOT;
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800516
517 for (i = 0; i < SFQ_DEPTH; i++) {
Eric Dumazeteda83e32010-12-20 12:54:58 +0000518 q->dep[i].next = i + SFQ_SLOTS;
519 q->dep[i].prev = i + SFQ_SLOTS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 }
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800521
Alexey Kuznetsov32740dd2007-09-30 17:51:33 -0700522 q->limit = SFQ_DEPTH - 1;
Eric Dumazeteda83e32010-12-20 12:54:58 +0000523 q->cur_depth = 0;
524 q->tail = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525 if (opt == NULL) {
David S. Miller5ce2d482008-07-08 17:06:30 -0700526 q->quantum = psched_mtu(qdisc_dev(sch));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527 q->perturb_period = 0;
Stephen Hemmingerd46f8dd2008-01-20 17:19:43 -0800528 q->perturbation = net_random();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529 } else {
530 int err = sfq_change(sch, opt);
531 if (err)
532 return err;
533 }
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800534
Eric Dumazeteda83e32010-12-20 12:54:58 +0000535 for (i = 0; i < SFQ_SLOTS; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 sfq_link(q, i);
537 return 0;
538}
539
540static void sfq_destroy(struct Qdisc *sch)
541{
542 struct sfq_sched_data *q = qdisc_priv(sch);
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800543
Patrick McHardyff31ab52008-07-01 19:52:38 -0700544 tcf_destroy_chain(&q->filter_list);
Jarek Poplawski980c4782008-04-29 03:29:03 -0700545 q->perturb_period = 0;
546 del_timer_sync(&q->perturb_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547}
548
549static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
550{
551 struct sfq_sched_data *q = qdisc_priv(sch);
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700552 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553 struct tc_sfq_qopt opt;
554
555 opt.quantum = q->quantum;
Stephen Hemminger6f9e98f2008-01-20 17:20:56 -0800556 opt.perturb_period = q->perturb_period / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557
558 opt.limit = q->limit;
559 opt.divisor = SFQ_HASH_DIVISOR;
David S. Millercdec7e52008-07-26 02:28:09 -0700560 opt.flows = q->limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561
Patrick McHardy1e904742008-01-22 22:11:17 -0800562 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563
564 return skb->len;
565
Patrick McHardy1e904742008-01-22 22:11:17 -0800566nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -0700567 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568 return -1;
569}
570
Jarek Poplawski41065fb2010-08-10 22:31:02 +0000571static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
572{
573 return NULL;
574}
575
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800576static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
577{
578 return 0;
579}
580
Jarek Poplawskieb4a5522010-08-06 00:22:35 +0000581static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
582 u32 classid)
583{
584 return 0;
585}
586
Jarek Poplawskida7115d2010-08-09 12:18:17 +0000587static void sfq_put(struct Qdisc *q, unsigned long cl)
588{
589}
590
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800591static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl)
592{
593 struct sfq_sched_data *q = qdisc_priv(sch);
594
595 if (cl)
596 return NULL;
597 return &q->filter_list;
598}
599
Patrick McHardy94de78d2008-01-31 18:37:16 -0800600static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
601 struct sk_buff *skb, struct tcmsg *tcm)
602{
603 tcm->tcm_handle |= TC_H_MIN(cl);
604 return 0;
605}
606
607static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
608 struct gnet_dump *d)
609{
610 struct sfq_sched_data *q = qdisc_priv(sch);
Eric Dumazeteda83e32010-12-20 12:54:58 +0000611 const struct sfq_slot *slot = &q->slots[q->ht[cl - 1]];
612 struct gnet_stats_queue qs = { .qlen = slot->qlen };
613 struct tc_sfq_xstats xstats = { .allot = slot->allot };
Eric Dumazetc4266262010-12-15 08:18:36 +0000614 struct sk_buff *skb;
615
Eric Dumazeteda83e32010-12-20 12:54:58 +0000616 slot_queue_walk(slot, skb)
Eric Dumazetc4266262010-12-15 08:18:36 +0000617 qs.backlog += qdisc_pkt_len(skb);
Patrick McHardy94de78d2008-01-31 18:37:16 -0800618
619 if (gnet_stats_copy_queue(d, &qs) < 0)
620 return -1;
621 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
622}
623
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800624static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
625{
Patrick McHardy94de78d2008-01-31 18:37:16 -0800626 struct sfq_sched_data *q = qdisc_priv(sch);
627 unsigned int i;
628
629 if (arg->stop)
630 return;
631
632 for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
Eric Dumazeteda83e32010-12-20 12:54:58 +0000633 if (q->ht[i] == SFQ_EMPTY_SLOT ||
Patrick McHardy94de78d2008-01-31 18:37:16 -0800634 arg->count < arg->skip) {
635 arg->count++;
636 continue;
637 }
638 if (arg->fn(sch, i + 1, arg) < 0) {
639 arg->stop = 1;
640 break;
641 }
642 arg->count++;
643 }
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800644}
645
646static const struct Qdisc_class_ops sfq_class_ops = {
Jarek Poplawski41065fb2010-08-10 22:31:02 +0000647 .leaf = sfq_leaf,
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800648 .get = sfq_get,
Jarek Poplawskida7115d2010-08-09 12:18:17 +0000649 .put = sfq_put,
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800650 .tcf_chain = sfq_find_tcf,
Jarek Poplawskieb4a5522010-08-06 00:22:35 +0000651 .bind_tcf = sfq_bind,
Jarek Poplawskida7115d2010-08-09 12:18:17 +0000652 .unbind_tcf = sfq_put,
Patrick McHardy94de78d2008-01-31 18:37:16 -0800653 .dump = sfq_dump_class,
654 .dump_stats = sfq_dump_class_stats,
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800655 .walk = sfq_walk,
656};
657
Eric Dumazet20fea082007-11-14 01:44:41 -0800658static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
Patrick McHardy7d2681a2008-01-31 18:36:52 -0800659 .cl_ops = &sfq_class_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 .id = "sfq",
661 .priv_size = sizeof(struct sfq_sched_data),
662 .enqueue = sfq_enqueue,
663 .dequeue = sfq_dequeue,
Patrick McHardy48a8f512008-10-31 00:44:18 -0700664 .peek = sfq_peek,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 .drop = sfq_drop,
666 .init = sfq_init,
667 .reset = sfq_reset,
668 .destroy = sfq_destroy,
669 .change = NULL,
670 .dump = sfq_dump,
671 .owner = THIS_MODULE,
672};
673
674static int __init sfq_module_init(void)
675{
676 return register_qdisc(&sfq_qdisc_ops);
677}
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900678static void __exit sfq_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679{
680 unregister_qdisc(&sfq_qdisc_ops);
681}
682module_init(sfq_module_init)
683module_exit(sfq_module_exit)
684MODULE_LICENSE("GPL");