blob: b093d8fce789d4e646112dccb930fc2ac566add9 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/module.h>
14#include <asm/uaccess.h>
15#include <asm/system.h>
16#include <linux/bitops.h>
17#include <linux/types.h>
18#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/string.h>
20#include <linux/mm.h>
21#include <linux/socket.h>
22#include <linux/sockios.h>
23#include <linux/in.h>
24#include <linux/errno.h>
25#include <linux/interrupt.h>
26#include <linux/if_ether.h>
27#include <linux/inet.h>
28#include <linux/netdevice.h>
29#include <linux/etherdevice.h>
30#include <linux/notifier.h>
31#include <net/ip.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070032#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <net/route.h>
34#include <linux/skbuff.h>
35#include <net/sock.h>
36#include <net/pkt_sched.h>
37
38
39/* Class-Based Queueing (CBQ) algorithm.
40 =======================================
41
42 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090043 Management Models for Packet Networks",
Linus Torvalds1da177e2005-04-16 15:20:36 -070044 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
45
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090046 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090048 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
Linus Torvalds1da177e2005-04-16 15:20:36 -070049 Parameters", 1996
50
51 [4] Sally Floyd and Michael Speer, "Experimental Results
52 for Class-Based Queueing", 1998, not published.
53
54 -----------------------------------------------------------------------
55
56 Algorithm skeleton was taken from NS simulator cbq.cc.
57 If someone wants to check this code against the LBL version,
58 he should take into account that ONLY the skeleton was borrowed,
59 the implementation is different. Particularly:
60
61 --- The WRR algorithm is different. Our version looks more
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090062 reasonable (I hope) and works when quanta are allowed to be
63 less than MTU, which is always the case when real time classes
64 have small rates. Note, that the statement of [3] is
65 incomplete, delay may actually be estimated even if class
66 per-round allotment is less than MTU. Namely, if per-round
67 allotment is W*r_i, and r_1+...+r_k = r < 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070068
69 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
70
71 In the worst case we have IntServ estimate with D = W*r+k*MTU
72 and C = MTU*r. The proof (if correct at all) is trivial.
73
74
75 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
76 interpret some places, which look like wrong translations
77 from NS. Anyone is advised to find these differences
78 and explain to me, why I am wrong 8).
79
80 --- Linux has no EOI event, so that we cannot estimate true class
81 idle time. Workaround is to consider the next dequeue event
82 as sign that previous packet is finished. This is wrong because of
83 internal device queueing, but on a permanently loaded link it is true.
84 Moreover, combined with clock integrator, this scheme looks
85 very close to an ideal solution. */
86
87struct cbq_sched_data;
88
89
90struct cbq_class
91{
92 struct cbq_class *next; /* hash table link */
93 struct cbq_class *next_alive; /* next class with backlog in this priority band */
94
95/* Parameters */
96 u32 classid;
97 unsigned char priority; /* class priority */
98 unsigned char priority2; /* priority to be used after overlimit */
99 unsigned char ewma_log; /* time constant for idle time calculation */
100 unsigned char ovl_strategy;
101#ifdef CONFIG_NET_CLS_POLICE
102 unsigned char police;
103#endif
104
105 u32 defmap;
106
107 /* Link-sharing scheduler parameters */
108 long maxidle; /* Class parameters: see below. */
109 long offtime;
110 long minidle;
111 u32 avpkt;
112 struct qdisc_rate_table *R_tab;
113
114 /* Overlimit strategy parameters */
115 void (*overlimit)(struct cbq_class *cl);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700116 psched_tdiff_t penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117
118 /* General scheduler (WRR) parameters */
119 long allot;
120 long quantum; /* Allotment per WRR round */
121 long weight; /* Relative allotment: see below */
122
123 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
124 struct cbq_class *split; /* Ptr to split node */
125 struct cbq_class *share; /* Ptr to LS parent in the class tree */
126 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
127 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
128 parent otherwise */
129 struct cbq_class *sibling; /* Sibling chain */
130 struct cbq_class *children; /* Pointer to children chain */
131
132 struct Qdisc *q; /* Elementary queueing discipline */
133
134
135/* Variables */
136 unsigned char cpriority; /* Effective priority */
137 unsigned char delayed;
138 unsigned char level; /* level of the class in hierarchy:
139 0 for leaf classes, and maximal
140 level of children + 1 for nodes.
141 */
142
143 psched_time_t last; /* Last end of service */
144 psched_time_t undertime;
145 long avgidle;
146 long deficit; /* Saved deficit for WRR */
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700147 psched_time_t penalized;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148 struct gnet_stats_basic bstats;
149 struct gnet_stats_queue qstats;
150 struct gnet_stats_rate_est rate_est;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151 struct tc_cbq_xstats xstats;
152
153 struct tcf_proto *filter_list;
154
155 int refcnt;
156 int filters;
157
158 struct cbq_class *defaults[TC_PRIO_MAX+1];
159};
160
161struct cbq_sched_data
162{
163 struct cbq_class *classes[16]; /* Hash table of all classes */
164 int nclasses[TC_CBQ_MAXPRIO+1];
165 unsigned quanta[TC_CBQ_MAXPRIO+1];
166
167 struct cbq_class link;
168
169 unsigned activemask;
170 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
171 with backlog */
172
173#ifdef CONFIG_NET_CLS_POLICE
174 struct cbq_class *rx_class;
175#endif
176 struct cbq_class *tx_class;
177 struct cbq_class *tx_borrowed;
178 int tx_len;
179 psched_time_t now; /* Cached timestamp */
180 psched_time_t now_rt; /* Cached real time */
181 unsigned pmask;
182
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700183 struct hrtimer delay_timer;
Patrick McHardy88a99352007-03-16 01:21:11 -0700184 struct qdisc_watchdog watchdog; /* Watchdog timer,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185 started when CBQ has
186 backlog, but cannot
187 transmit just now */
Patrick McHardy88a99352007-03-16 01:21:11 -0700188 psched_tdiff_t wd_expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 int toplevel;
190 u32 hgenerator;
191};
192
193
194#define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
195
196
197static __inline__ unsigned cbq_hash(u32 h)
198{
199 h ^= h>>8;
200 h ^= h>>4;
201 return h&0xF;
202}
203
204static __inline__ struct cbq_class *
205cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
206{
207 struct cbq_class *cl;
208
209 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
210 if (cl->classid == classid)
211 return cl;
212 return NULL;
213}
214
215#ifdef CONFIG_NET_CLS_POLICE
216
217static struct cbq_class *
218cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
219{
220 struct cbq_class *cl, *new;
221
222 for (cl = this->tparent; cl; cl = cl->tparent)
223 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
224 return new;
225
226 return NULL;
227}
228
229#endif
230
231/* Classify packet. The procedure is pretty complicated, but
232 it allows us to combine link sharing and priority scheduling
233 transparently.
234
235 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
236 so that it resolves to split nodes. Then packets are classified
237 by logical priority, or a more specific classifier may be attached
238 to the split node.
239 */
240
241static struct cbq_class *
242cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
243{
244 struct cbq_sched_data *q = qdisc_priv(sch);
245 struct cbq_class *head = &q->link;
246 struct cbq_class **defmap;
247 struct cbq_class *cl = NULL;
248 u32 prio = skb->priority;
249 struct tcf_result res;
250
251 /*
252 * Step 1. If skb->priority points to one of our classes, use it.
253 */
254 if (TC_H_MAJ(prio^sch->handle) == 0 &&
255 (cl = cbq_class_lookup(q, prio)) != NULL)
256 return cl;
257
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800258 *qerr = NET_XMIT_BYPASS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 for (;;) {
260 int result = 0;
261 defmap = head->defaults;
262
263 /*
264 * Step 2+n. Apply classifier.
265 */
266 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
267 goto fallback;
268
269 if ((cl = (void*)res.class) == NULL) {
270 if (TC_H_MAJ(res.classid))
271 cl = cbq_class_lookup(q, res.classid);
272 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
273 cl = defmap[TC_PRIO_BESTEFFORT];
274
275 if (cl == NULL || cl->level >= head->level)
276 goto fallback;
277 }
278
279#ifdef CONFIG_NET_CLS_ACT
280 switch (result) {
281 case TC_ACT_QUEUED:
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900282 case TC_ACT_STOLEN:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 *qerr = NET_XMIT_SUCCESS;
284 case TC_ACT_SHOT:
285 return NULL;
286 }
287#elif defined(CONFIG_NET_CLS_POLICE)
288 switch (result) {
289 case TC_POLICE_RECLASSIFY:
290 return cbq_reclassify(skb, cl);
291 case TC_POLICE_SHOT:
292 return NULL;
293 default:
294 break;
295 }
296#endif
297 if (cl->level == 0)
298 return cl;
299
300 /*
301 * Step 3+n. If classifier selected a link sharing class,
302 * apply agency specific classifier.
303 * Repeat this procdure until we hit a leaf node.
304 */
305 head = cl;
306 }
307
308fallback:
309 cl = head;
310
311 /*
312 * Step 4. No success...
313 */
314 if (TC_H_MAJ(prio) == 0 &&
315 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
316 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
317 return head;
318
319 return cl;
320}
321
322/*
323 A packet has just been enqueued on the empty class.
324 cbq_activate_class adds it to the tail of active class list
325 of its priority band.
326 */
327
328static __inline__ void cbq_activate_class(struct cbq_class *cl)
329{
330 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
331 int prio = cl->cpriority;
332 struct cbq_class *cl_tail;
333
334 cl_tail = q->active[prio];
335 q->active[prio] = cl;
336
337 if (cl_tail != NULL) {
338 cl->next_alive = cl_tail->next_alive;
339 cl_tail->next_alive = cl;
340 } else {
341 cl->next_alive = cl;
342 q->activemask |= (1<<prio);
343 }
344}
345
346/*
347 Unlink class from active chain.
348 Note that this same procedure is done directly in cbq_dequeue*
349 during round-robin procedure.
350 */
351
352static void cbq_deactivate_class(struct cbq_class *this)
353{
354 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
355 int prio = this->cpriority;
356 struct cbq_class *cl;
357 struct cbq_class *cl_prev = q->active[prio];
358
359 do {
360 cl = cl_prev->next_alive;
361 if (cl == this) {
362 cl_prev->next_alive = cl->next_alive;
363 cl->next_alive = NULL;
364
365 if (cl == q->active[prio]) {
366 q->active[prio] = cl_prev;
367 if (cl == q->active[prio]) {
368 q->active[prio] = NULL;
369 q->activemask &= ~(1<<prio);
370 return;
371 }
372 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 return;
374 }
375 } while ((cl_prev = cl) != q->active[prio]);
376}
377
378static void
379cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
380{
381 int toplevel = q->toplevel;
382
383 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
384 psched_time_t now;
385 psched_tdiff_t incr;
386
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700387 now = psched_get_time();
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700388 incr = now - q->now_rt;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700389 now = q->now + incr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390
391 do {
Patrick McHardy104e0872007-03-23 11:28:07 -0700392 if (cl->undertime < now) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 q->toplevel = cl->level;
394 return;
395 }
396 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
397 }
398}
399
400static int
401cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
402{
403 struct cbq_sched_data *q = qdisc_priv(sch);
404 int len = skb->len;
405 int ret;
406 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
407
408#ifdef CONFIG_NET_CLS_POLICE
409 q->rx_class = cl;
410#endif
411 if (cl == NULL) {
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800412 if (ret == NET_XMIT_BYPASS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413 sch->qstats.drops++;
414 kfree_skb(skb);
415 return ret;
416 }
417
418#ifdef CONFIG_NET_CLS_POLICE
419 cl->q->__parent = sch;
420#endif
421 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
422 sch->q.qlen++;
423 sch->bstats.packets++;
424 sch->bstats.bytes+=len;
425 cbq_mark_toplevel(q, cl);
426 if (!cl->next_alive)
427 cbq_activate_class(cl);
428 return ret;
429 }
430
431 sch->qstats.drops++;
432 cbq_mark_toplevel(q, cl);
433 cl->qstats.drops++;
434 return ret;
435}
436
437static int
438cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
439{
440 struct cbq_sched_data *q = qdisc_priv(sch);
441 struct cbq_class *cl;
442 int ret;
443
444 if ((cl = q->tx_class) == NULL) {
445 kfree_skb(skb);
446 sch->qstats.drops++;
447 return NET_XMIT_CN;
448 }
449 q->tx_class = NULL;
450
451 cbq_mark_toplevel(q, cl);
452
453#ifdef CONFIG_NET_CLS_POLICE
454 q->rx_class = cl;
455 cl->q->__parent = sch;
456#endif
457 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
458 sch->q.qlen++;
459 sch->qstats.requeues++;
460 if (!cl->next_alive)
461 cbq_activate_class(cl);
462 return 0;
463 }
464 sch->qstats.drops++;
465 cl->qstats.drops++;
466 return ret;
467}
468
469/* Overlimit actions */
470
471/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
472
473static void cbq_ovl_classic(struct cbq_class *cl)
474{
475 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700476 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700477
478 if (!cl->delayed) {
479 delay += cl->offtime;
480
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900481 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482 Class goes to sleep, so that it will have no
483 chance to work avgidle. Let's forgive it 8)
484
485 BTW cbq-2.0 has a crap in this
486 place, apparently they forgot to shift it by cl->ewma_log.
487 */
488 if (cl->avgidle < 0)
489 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
490 if (cl->avgidle < cl->minidle)
491 cl->avgidle = cl->minidle;
492 if (delay <= 0)
493 delay = 1;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700494 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495
496 cl->xstats.overactions++;
497 cl->delayed = 1;
498 }
499 if (q->wd_expires == 0 || q->wd_expires > delay)
500 q->wd_expires = delay;
501
502 /* Dirty work! We must schedule wakeups based on
503 real available rate, rather than leaf rate,
504 which may be tiny (even zero).
505 */
506 if (q->toplevel == TC_CBQ_MAXLEVEL) {
507 struct cbq_class *b;
508 psched_tdiff_t base_delay = q->wd_expires;
509
510 for (b = cl->borrow; b; b = b->borrow) {
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700511 delay = b->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512 if (delay < base_delay) {
513 if (delay <= 0)
514 delay = 1;
515 base_delay = delay;
516 }
517 }
518
519 q->wd_expires = base_delay;
520 }
521}
522
523/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
524 they go overlimit
525 */
526
527static void cbq_ovl_rclassic(struct cbq_class *cl)
528{
529 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
530 struct cbq_class *this = cl;
531
532 do {
533 if (cl->level > q->toplevel) {
534 cl = NULL;
535 break;
536 }
537 } while ((cl = cl->borrow) != NULL);
538
539 if (cl == NULL)
540 cl = this;
541 cbq_ovl_classic(cl);
542}
543
544/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
545
546static void cbq_ovl_delay(struct cbq_class *cl)
547{
548 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700549 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550
551 if (!cl->delayed) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700552 psched_time_t sched = q->now;
553 ktime_t expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554
555 delay += cl->offtime;
556 if (cl->avgidle < 0)
557 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
558 if (cl->avgidle < cl->minidle)
559 cl->avgidle = cl->minidle;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700560 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561
562 if (delay > 0) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700563 sched += delay + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700564 cl->penalized = sched;
565 cl->cpriority = TC_CBQ_MAXPRIO;
566 q->pmask |= (1<<TC_CBQ_MAXPRIO);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700567
568 expires = ktime_set(0, 0);
569 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
570 if (hrtimer_try_to_cancel(&q->delay_timer) &&
571 ktime_to_ns(ktime_sub(q->delay_timer.expires,
572 expires)) > 0)
573 q->delay_timer.expires = expires;
574 hrtimer_restart(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575 cl->delayed = 1;
576 cl->xstats.overactions++;
577 return;
578 }
579 delay = 1;
580 }
581 if (q->wd_expires == 0 || q->wd_expires > delay)
582 q->wd_expires = delay;
583}
584
585/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
586
587static void cbq_ovl_lowprio(struct cbq_class *cl)
588{
589 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
590
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700591 cl->penalized = q->now + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592
593 if (cl->cpriority != cl->priority2) {
594 cl->cpriority = cl->priority2;
595 q->pmask |= (1<<cl->cpriority);
596 cl->xstats.overactions++;
597 }
598 cbq_ovl_classic(cl);
599}
600
601/* TC_CBQ_OVL_DROP: penalize class by dropping */
602
603static void cbq_ovl_drop(struct cbq_class *cl)
604{
605 if (cl->q->ops->drop)
606 if (cl->q->ops->drop(cl->q))
607 cl->qdisc->q.qlen--;
608 cl->xstats.overactions++;
609 cbq_ovl_classic(cl);
610}
611
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700612static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
613 psched_time_t now)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614{
615 struct cbq_class *cl;
616 struct cbq_class *cl_prev = q->active[prio];
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700617 psched_time_t sched = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618
619 if (cl_prev == NULL)
Patrick McHardye9054a32007-03-16 01:21:40 -0700620 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621
622 do {
623 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700624 if (now - cl->penalized > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625 cl_prev->next_alive = cl->next_alive;
626 cl->next_alive = NULL;
627 cl->cpriority = cl->priority;
628 cl->delayed = 0;
629 cbq_activate_class(cl);
630
631 if (cl == q->active[prio]) {
632 q->active[prio] = cl_prev;
633 if (cl == q->active[prio]) {
634 q->active[prio] = NULL;
635 return 0;
636 }
637 }
638
639 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700640 } else if (sched - cl->penalized > 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641 sched = cl->penalized;
642 } while ((cl_prev = cl) != q->active[prio]);
643
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700644 return sched - now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645}
646
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700647static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648{
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700649 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
650 delay_timer);
651 struct Qdisc *sch = q->watchdog.qdisc;
652 psched_time_t now;
653 psched_tdiff_t delay = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 unsigned pmask;
655
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700656 now = psched_get_time();
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700657
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658 pmask = q->pmask;
659 q->pmask = 0;
660
661 while (pmask) {
662 int prio = ffz(~pmask);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700663 psched_tdiff_t tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700664
665 pmask &= ~(1<<prio);
666
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700667 tmp = cbq_undelay_prio(q, prio, now);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 if (tmp > 0) {
669 q->pmask |= 1<<prio;
670 if (tmp < delay || delay == 0)
671 delay = tmp;
672 }
673 }
674
675 if (delay) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700676 ktime_t time;
677
678 time = ktime_set(0, 0);
679 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
680 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681 }
682
683 sch->flags &= ~TCQ_F_THROTTLED;
684 netif_schedule(sch->dev);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700685 return HRTIMER_NORESTART;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686}
687
688
689#ifdef CONFIG_NET_CLS_POLICE
690
691static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
692{
693 int len = skb->len;
694 struct Qdisc *sch = child->__parent;
695 struct cbq_sched_data *q = qdisc_priv(sch);
696 struct cbq_class *cl = q->rx_class;
697
698 q->rx_class = NULL;
699
700 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
701
702 cbq_mark_toplevel(q, cl);
703
704 q->rx_class = cl;
705 cl->q->__parent = sch;
706
707 if (cl->q->enqueue(skb, cl->q) == 0) {
708 sch->q.qlen++;
709 sch->bstats.packets++;
710 sch->bstats.bytes+=len;
711 if (!cl->next_alive)
712 cbq_activate_class(cl);
713 return 0;
714 }
715 sch->qstats.drops++;
716 return 0;
717 }
718
719 sch->qstats.drops++;
720 return -1;
721}
722#endif
723
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900724/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725 It is mission critical procedure.
726
727 We "regenerate" toplevel cutoff, if transmitting class
728 has backlog and it is not regulated. It is not part of
729 original CBQ description, but looks more reasonable.
730 Probably, it is wrong. This question needs further investigation.
731*/
732
733static __inline__ void
734cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
735 struct cbq_class *borrowed)
736{
737 if (cl && q->toplevel >= borrowed->level) {
738 if (cl->q->q.qlen > 1) {
739 do {
Patrick McHardya0849802007-03-23 11:28:30 -0700740 if (borrowed->undertime == PSCHED_PASTPERFECT) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700741 q->toplevel = borrowed->level;
742 return;
743 }
744 } while ((borrowed=borrowed->borrow) != NULL);
745 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900746#if 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 /* It is not necessary now. Uncommenting it
748 will save CPU cycles, but decrease fairness.
749 */
750 q->toplevel = TC_CBQ_MAXLEVEL;
751#endif
752 }
753}
754
755static void
756cbq_update(struct cbq_sched_data *q)
757{
758 struct cbq_class *this = q->tx_class;
759 struct cbq_class *cl = this;
760 int len = q->tx_len;
761
762 q->tx_class = NULL;
763
764 for ( ; cl; cl = cl->share) {
765 long avgidle = cl->avgidle;
766 long idle;
767
768 cl->bstats.packets++;
769 cl->bstats.bytes += len;
770
771 /*
772 (now - last) is total time between packet right edges.
773 (last_pktlen/rate) is "virtual" busy time, so that
774
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900775 idle = (now - last) - last_pktlen/rate
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 */
777
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700778 idle = q->now - cl->last;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779 if ((unsigned long)idle > 128*1024*1024) {
780 avgidle = cl->maxidle;
781 } else {
782 idle -= L2T(cl, len);
783
784 /* true_avgidle := (1-W)*true_avgidle + W*idle,
785 where W=2^{-ewma_log}. But cl->avgidle is scaled:
786 cl->avgidle == true_avgidle/W,
787 hence:
788 */
789 avgidle += idle - (avgidle>>cl->ewma_log);
790 }
791
792 if (avgidle <= 0) {
793 /* Overlimit or at-limit */
794
795 if (avgidle < cl->minidle)
796 avgidle = cl->minidle;
797
798 cl->avgidle = avgidle;
799
800 /* Calculate expected time, when this class
801 will be allowed to send.
802 It will occur, when:
803 (1-W)*true_avgidle + W*delay = 0, i.e.
804 idle = (1/W - 1)*(-true_avgidle)
805 or
806 idle = (1 - W)*(-cl->avgidle);
807 */
808 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
809
810 /*
811 That is not all.
812 To maintain the rate allocated to the class,
813 we add to undertime virtual clock,
814 necessary to complete transmitted packet.
815 (len/phys_bandwidth has been already passed
816 to the moment of cbq_update)
817 */
818
819 idle -= L2T(&q->link, len);
820 idle += L2T(cl, len);
821
Patrick McHardy7c59e252007-03-23 11:27:45 -0700822 cl->undertime = q->now + idle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 } else {
824 /* Underlimit */
825
Patrick McHardya0849802007-03-23 11:28:30 -0700826 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 if (avgidle > cl->maxidle)
828 cl->avgidle = cl->maxidle;
829 else
830 cl->avgidle = avgidle;
831 }
832 cl->last = q->now;
833 }
834
835 cbq_update_toplevel(q, this, q->tx_borrowed);
836}
837
838static __inline__ struct cbq_class *
839cbq_under_limit(struct cbq_class *cl)
840{
841 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
842 struct cbq_class *this_cl = cl;
843
844 if (cl->tparent == NULL)
845 return cl;
846
Patrick McHardya0849802007-03-23 11:28:30 -0700847 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 cl->delayed = 0;
849 return cl;
850 }
851
852 do {
853 /* It is very suspicious place. Now overlimit
854 action is generated for not bounded classes
855 only if link is completely congested.
856 Though it is in agree with ancestor-only paradigm,
857 it looks very stupid. Particularly,
858 it means that this chunk of code will either
859 never be called or result in strong amplification
860 of burstiness. Dangerous, silly, and, however,
861 no another solution exists.
862 */
863 if ((cl = cl->borrow) == NULL) {
864 this_cl->qstats.overlimits++;
865 this_cl->overlimit(this_cl);
866 return NULL;
867 }
868 if (cl->level > q->toplevel)
869 return NULL;
Patrick McHardya0849802007-03-23 11:28:30 -0700870 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871
872 cl->delayed = 0;
873 return cl;
874}
875
876static __inline__ struct sk_buff *
877cbq_dequeue_prio(struct Qdisc *sch, int prio)
878{
879 struct cbq_sched_data *q = qdisc_priv(sch);
880 struct cbq_class *cl_tail, *cl_prev, *cl;
881 struct sk_buff *skb;
882 int deficit;
883
884 cl_tail = cl_prev = q->active[prio];
885 cl = cl_prev->next_alive;
886
887 do {
888 deficit = 0;
889
890 /* Start round */
891 do {
892 struct cbq_class *borrow = cl;
893
894 if (cl->q->q.qlen &&
895 (borrow = cbq_under_limit(cl)) == NULL)
896 goto skip_class;
897
898 if (cl->deficit <= 0) {
899 /* Class exhausted its allotment per
900 this round. Switch to the next one.
901 */
902 deficit = 1;
903 cl->deficit += cl->quantum;
904 goto next_class;
905 }
906
907 skb = cl->q->dequeue(cl->q);
908
909 /* Class did not give us any skb :-(
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900910 It could occur even if cl->q->q.qlen != 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 f.e. if cl->q == "tbf"
912 */
913 if (skb == NULL)
914 goto skip_class;
915
916 cl->deficit -= skb->len;
917 q->tx_class = cl;
918 q->tx_borrowed = borrow;
919 if (borrow != cl) {
920#ifndef CBQ_XSTATS_BORROWS_BYTES
921 borrow->xstats.borrows++;
922 cl->xstats.borrows++;
923#else
924 borrow->xstats.borrows += skb->len;
925 cl->xstats.borrows += skb->len;
926#endif
927 }
928 q->tx_len = skb->len;
929
930 if (cl->deficit <= 0) {
931 q->active[prio] = cl;
932 cl = cl->next_alive;
933 cl->deficit += cl->quantum;
934 }
935 return skb;
936
937skip_class:
938 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
939 /* Class is empty or penalized.
940 Unlink it from active chain.
941 */
942 cl_prev->next_alive = cl->next_alive;
943 cl->next_alive = NULL;
944
945 /* Did cl_tail point to it? */
946 if (cl == cl_tail) {
947 /* Repair it! */
948 cl_tail = cl_prev;
949
950 /* Was it the last class in this band? */
951 if (cl == cl_tail) {
952 /* Kill the band! */
953 q->active[prio] = NULL;
954 q->activemask &= ~(1<<prio);
955 if (cl->q->q.qlen)
956 cbq_activate_class(cl);
957 return NULL;
958 }
959
960 q->active[prio] = cl_tail;
961 }
962 if (cl->q->q.qlen)
963 cbq_activate_class(cl);
964
965 cl = cl_prev;
966 }
967
968next_class:
969 cl_prev = cl;
970 cl = cl->next_alive;
971 } while (cl_prev != cl_tail);
972 } while (deficit);
973
974 q->active[prio] = cl_prev;
975
976 return NULL;
977}
978
979static __inline__ struct sk_buff *
980cbq_dequeue_1(struct Qdisc *sch)
981{
982 struct cbq_sched_data *q = qdisc_priv(sch);
983 struct sk_buff *skb;
984 unsigned activemask;
985
986 activemask = q->activemask&0xFF;
987 while (activemask) {
988 int prio = ffz(~activemask);
989 activemask &= ~(1<<prio);
990 skb = cbq_dequeue_prio(sch, prio);
991 if (skb)
992 return skb;
993 }
994 return NULL;
995}
996
997static struct sk_buff *
998cbq_dequeue(struct Qdisc *sch)
999{
1000 struct sk_buff *skb;
1001 struct cbq_sched_data *q = qdisc_priv(sch);
1002 psched_time_t now;
1003 psched_tdiff_t incr;
1004
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001005 now = psched_get_time();
Patrick McHardy8edc0c32007-03-23 11:28:55 -07001006 incr = now - q->now_rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007
1008 if (q->tx_class) {
1009 psched_tdiff_t incr2;
1010 /* Time integrator. We calculate EOS time
1011 by adding expected packet transmission time.
1012 If real time is greater, we warp artificial clock,
1013 so that:
1014
1015 cbq_time = max(real_time, work);
1016 */
1017 incr2 = L2T(&q->link, q->tx_len);
Patrick McHardy7c59e252007-03-23 11:27:45 -07001018 q->now += incr2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 cbq_update(q);
1020 if ((incr -= incr2) < 0)
1021 incr = 0;
1022 }
Patrick McHardy7c59e252007-03-23 11:27:45 -07001023 q->now += incr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 q->now_rt = now;
1025
1026 for (;;) {
1027 q->wd_expires = 0;
1028
1029 skb = cbq_dequeue_1(sch);
1030 if (skb) {
1031 sch->q.qlen--;
1032 sch->flags &= ~TCQ_F_THROTTLED;
1033 return skb;
1034 }
1035
1036 /* All the classes are overlimit.
1037
1038 It is possible, if:
1039
1040 1. Scheduler is empty.
1041 2. Toplevel cutoff inhibited borrowing.
1042 3. Root class is overlimit.
1043
1044 Reset 2d and 3d conditions and retry.
1045
1046 Note, that NS and cbq-2.0 are buggy, peeking
1047 an arbitrary class is appropriate for ancestor-only
1048 sharing, but not for toplevel algorithm.
1049
1050 Our version is better, but slower, because it requires
1051 two passes, but it is unavoidable with top-level sharing.
1052 */
1053
1054 if (q->toplevel == TC_CBQ_MAXLEVEL &&
Patrick McHardya0849802007-03-23 11:28:30 -07001055 q->link.undertime == PSCHED_PASTPERFECT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056 break;
1057
1058 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardya0849802007-03-23 11:28:30 -07001059 q->link.undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060 }
1061
1062 /* No packets in scheduler or nobody wants to give them to us :-(
1063 Sigh... start watchdog timer in the last case. */
1064
1065 if (sch->q.qlen) {
1066 sch->qstats.overlimits++;
Patrick McHardy88a99352007-03-16 01:21:11 -07001067 if (q->wd_expires)
1068 qdisc_watchdog_schedule(&q->watchdog,
Patrick McHardybb239ac2007-03-16 12:31:28 -07001069 now + q->wd_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070 }
1071 return NULL;
1072}
1073
1074/* CBQ class maintanance routines */
1075
1076static void cbq_adjust_levels(struct cbq_class *this)
1077{
1078 if (this == NULL)
1079 return;
1080
1081 do {
1082 int level = 0;
1083 struct cbq_class *cl;
1084
1085 if ((cl = this->children) != NULL) {
1086 do {
1087 if (cl->level > level)
1088 level = cl->level;
1089 } while ((cl = cl->sibling) != this->children);
1090 }
1091 this->level = level+1;
1092 } while ((this = this->tparent) != NULL);
1093}
1094
1095static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1096{
1097 struct cbq_class *cl;
1098 unsigned h;
1099
1100 if (q->quanta[prio] == 0)
1101 return;
1102
1103 for (h=0; h<16; h++) {
1104 for (cl = q->classes[h]; cl; cl = cl->next) {
1105 /* BUGGGG... Beware! This expression suffer of
1106 arithmetic overflows!
1107 */
1108 if (cl->priority == prio) {
1109 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1110 q->quanta[prio];
1111 }
1112 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1113 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1114 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1115 }
1116 }
1117 }
1118}
1119
1120static void cbq_sync_defmap(struct cbq_class *cl)
1121{
1122 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1123 struct cbq_class *split = cl->split;
1124 unsigned h;
1125 int i;
1126
1127 if (split == NULL)
1128 return;
1129
1130 for (i=0; i<=TC_PRIO_MAX; i++) {
1131 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1132 split->defaults[i] = NULL;
1133 }
1134
1135 for (i=0; i<=TC_PRIO_MAX; i++) {
1136 int level = split->level;
1137
1138 if (split->defaults[i])
1139 continue;
1140
1141 for (h=0; h<16; h++) {
1142 struct cbq_class *c;
1143
1144 for (c = q->classes[h]; c; c = c->next) {
1145 if (c->split == split && c->level < level &&
1146 c->defmap&(1<<i)) {
1147 split->defaults[i] = c;
1148 level = c->level;
1149 }
1150 }
1151 }
1152 }
1153}
1154
1155static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1156{
1157 struct cbq_class *split = NULL;
1158
1159 if (splitid == 0) {
1160 if ((split = cl->split) == NULL)
1161 return;
1162 splitid = split->classid;
1163 }
1164
1165 if (split == NULL || split->classid != splitid) {
1166 for (split = cl->tparent; split; split = split->tparent)
1167 if (split->classid == splitid)
1168 break;
1169 }
1170
1171 if (split == NULL)
1172 return;
1173
1174 if (cl->split != split) {
1175 cl->defmap = 0;
1176 cbq_sync_defmap(cl);
1177 cl->split = split;
1178 cl->defmap = def&mask;
1179 } else
1180 cl->defmap = (cl->defmap&~mask)|(def&mask);
1181
1182 cbq_sync_defmap(cl);
1183}
1184
1185static void cbq_unlink_class(struct cbq_class *this)
1186{
1187 struct cbq_class *cl, **clp;
1188 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1189
1190 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1191 if (cl == this) {
1192 *clp = cl->next;
1193 cl->next = NULL;
1194 break;
1195 }
1196 }
1197
1198 if (this->tparent) {
1199 clp=&this->sibling;
1200 cl = *clp;
1201 do {
1202 if (cl == this) {
1203 *clp = cl->sibling;
1204 break;
1205 }
1206 clp = &cl->sibling;
1207 } while ((cl = *clp) != this->sibling);
1208
1209 if (this->tparent->children == this) {
1210 this->tparent->children = this->sibling;
1211 if (this->sibling == this)
1212 this->tparent->children = NULL;
1213 }
1214 } else {
1215 BUG_TRAP(this->sibling == this);
1216 }
1217}
1218
1219static void cbq_link_class(struct cbq_class *this)
1220{
1221 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1222 unsigned h = cbq_hash(this->classid);
1223 struct cbq_class *parent = this->tparent;
1224
1225 this->sibling = this;
1226 this->next = q->classes[h];
1227 q->classes[h] = this;
1228
1229 if (parent == NULL)
1230 return;
1231
1232 if (parent->children == NULL) {
1233 parent->children = this;
1234 } else {
1235 this->sibling = parent->children->sibling;
1236 parent->children->sibling = this;
1237 }
1238}
1239
1240static unsigned int cbq_drop(struct Qdisc* sch)
1241{
1242 struct cbq_sched_data *q = qdisc_priv(sch);
1243 struct cbq_class *cl, *cl_head;
1244 int prio;
1245 unsigned int len;
1246
1247 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1248 if ((cl_head = q->active[prio]) == NULL)
1249 continue;
1250
1251 cl = cl_head;
1252 do {
1253 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1254 sch->q.qlen--;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001255 if (!cl->q->q.qlen)
1256 cbq_deactivate_class(cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257 return len;
1258 }
1259 } while ((cl = cl->next_alive) != cl_head);
1260 }
1261 return 0;
1262}
1263
1264static void
1265cbq_reset(struct Qdisc* sch)
1266{
1267 struct cbq_sched_data *q = qdisc_priv(sch);
1268 struct cbq_class *cl;
1269 int prio;
1270 unsigned h;
1271
1272 q->activemask = 0;
1273 q->pmask = 0;
1274 q->tx_class = NULL;
1275 q->tx_borrowed = NULL;
Patrick McHardy88a99352007-03-16 01:21:11 -07001276 qdisc_watchdog_cancel(&q->watchdog);
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001277 hrtimer_cancel(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001279 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280 q->now_rt = q->now;
1281
1282 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1283 q->active[prio] = NULL;
1284
1285 for (h = 0; h < 16; h++) {
1286 for (cl = q->classes[h]; cl; cl = cl->next) {
1287 qdisc_reset(cl->q);
1288
1289 cl->next_alive = NULL;
Patrick McHardya0849802007-03-23 11:28:30 -07001290 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291 cl->avgidle = cl->maxidle;
1292 cl->deficit = cl->quantum;
1293 cl->cpriority = cl->priority;
1294 }
1295 }
1296 sch->q.qlen = 0;
1297}
1298
1299
1300static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1301{
1302 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1303 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1304 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1305 }
1306 if (lss->change&TCF_CBQ_LSS_EWMA)
1307 cl->ewma_log = lss->ewma_log;
1308 if (lss->change&TCF_CBQ_LSS_AVPKT)
1309 cl->avpkt = lss->avpkt;
1310 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1311 cl->minidle = -(long)lss->minidle;
1312 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1313 cl->maxidle = lss->maxidle;
1314 cl->avgidle = lss->maxidle;
1315 }
1316 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1317 cl->offtime = lss->offtime;
1318 return 0;
1319}
1320
1321static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1322{
1323 q->nclasses[cl->priority]--;
1324 q->quanta[cl->priority] -= cl->weight;
1325 cbq_normalize_quanta(q, cl->priority);
1326}
1327
1328static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1329{
1330 q->nclasses[cl->priority]++;
1331 q->quanta[cl->priority] += cl->weight;
1332 cbq_normalize_quanta(q, cl->priority);
1333}
1334
1335static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1336{
1337 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1338
1339 if (wrr->allot)
1340 cl->allot = wrr->allot;
1341 if (wrr->weight)
1342 cl->weight = wrr->weight;
1343 if (wrr->priority) {
1344 cl->priority = wrr->priority-1;
1345 cl->cpriority = cl->priority;
1346 if (cl->priority >= cl->priority2)
1347 cl->priority2 = TC_CBQ_MAXPRIO-1;
1348 }
1349
1350 cbq_addprio(q, cl);
1351 return 0;
1352}
1353
1354static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1355{
1356 switch (ovl->strategy) {
1357 case TC_CBQ_OVL_CLASSIC:
1358 cl->overlimit = cbq_ovl_classic;
1359 break;
1360 case TC_CBQ_OVL_DELAY:
1361 cl->overlimit = cbq_ovl_delay;
1362 break;
1363 case TC_CBQ_OVL_LOWPRIO:
1364 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1365 ovl->priority2-1 <= cl->priority)
1366 return -EINVAL;
1367 cl->priority2 = ovl->priority2-1;
1368 cl->overlimit = cbq_ovl_lowprio;
1369 break;
1370 case TC_CBQ_OVL_DROP:
1371 cl->overlimit = cbq_ovl_drop;
1372 break;
1373 case TC_CBQ_OVL_RCLASSIC:
1374 cl->overlimit = cbq_ovl_rclassic;
1375 break;
1376 default:
1377 return -EINVAL;
1378 }
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001379 cl->penalty = ovl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 return 0;
1381}
1382
1383#ifdef CONFIG_NET_CLS_POLICE
1384static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1385{
1386 cl->police = p->police;
1387
1388 if (cl->q->handle) {
1389 if (p->police == TC_POLICE_RECLASSIFY)
1390 cl->q->reshape_fail = cbq_reshape_fail;
1391 else
1392 cl->q->reshape_fail = NULL;
1393 }
1394 return 0;
1395}
1396#endif
1397
1398static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1399{
1400 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1401 return 0;
1402}
1403
1404static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1405{
1406 struct cbq_sched_data *q = qdisc_priv(sch);
1407 struct rtattr *tb[TCA_CBQ_MAX];
1408 struct tc_ratespec *r;
1409
1410 if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1411 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1412 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1413 return -EINVAL;
1414
1415 if (tb[TCA_CBQ_LSSOPT-1] &&
1416 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1417 return -EINVAL;
1418
1419 r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1420
1421 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1422 return -EINVAL;
1423
1424 q->link.refcnt = 1;
1425 q->link.sibling = &q->link;
1426 q->link.classid = sch->handle;
1427 q->link.qdisc = sch;
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001428 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1429 sch->handle)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430 q->link.q = &noop_qdisc;
1431
1432 q->link.priority = TC_CBQ_MAXPRIO-1;
1433 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1434 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1435 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1436 q->link.overlimit = cbq_ovl_classic;
1437 q->link.allot = psched_mtu(sch->dev);
1438 q->link.quantum = q->link.allot;
1439 q->link.weight = q->link.R_tab->rate.rate;
1440
1441 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1442 q->link.avpkt = q->link.allot/2;
1443 q->link.minidle = -0x7FFFFFFF;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444
Patrick McHardy88a99352007-03-16 01:21:11 -07001445 qdisc_watchdog_init(&q->watchdog, sch);
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001446 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447 q->delay_timer.function = cbq_undelay;
1448 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001449 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450 q->now_rt = q->now;
1451
1452 cbq_link_class(&q->link);
1453
1454 if (tb[TCA_CBQ_LSSOPT-1])
1455 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1456
1457 cbq_addprio(q, &q->link);
1458 return 0;
1459}
1460
1461static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1462{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001463 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001464
1465 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1466 return skb->len;
1467
1468rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001469 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470 return -1;
1471}
1472
1473static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1474{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001475 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001476 struct tc_cbq_lssopt opt;
1477
1478 opt.flags = 0;
1479 if (cl->borrow == NULL)
1480 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1481 if (cl->share == NULL)
1482 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1483 opt.ewma_log = cl->ewma_log;
1484 opt.level = cl->level;
1485 opt.avpkt = cl->avpkt;
1486 opt.maxidle = cl->maxidle;
1487 opt.minidle = (u32)(-cl->minidle);
1488 opt.offtime = cl->offtime;
1489 opt.change = ~0;
1490 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1491 return skb->len;
1492
1493rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001494 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495 return -1;
1496}
1497
1498static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1499{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001500 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501 struct tc_cbq_wrropt opt;
1502
1503 opt.flags = 0;
1504 opt.allot = cl->allot;
1505 opt.priority = cl->priority+1;
1506 opt.cpriority = cl->cpriority+1;
1507 opt.weight = cl->weight;
1508 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1509 return skb->len;
1510
1511rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001512 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513 return -1;
1514}
1515
1516static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1517{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001518 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519 struct tc_cbq_ovl opt;
1520
1521 opt.strategy = cl->ovl_strategy;
1522 opt.priority2 = cl->priority2+1;
Patrick McHardy8a470772005-06-28 12:56:45 -07001523 opt.pad = 0;
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001524 opt.penalty = cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001525 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1526 return skb->len;
1527
1528rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001529 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001530 return -1;
1531}
1532
1533static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1534{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001535 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536 struct tc_cbq_fopt opt;
1537
1538 if (cl->split || cl->defmap) {
1539 opt.split = cl->split ? cl->split->classid : 0;
1540 opt.defmap = cl->defmap;
1541 opt.defchange = ~0;
1542 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1543 }
1544 return skb->len;
1545
1546rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001547 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001548 return -1;
1549}
1550
1551#ifdef CONFIG_NET_CLS_POLICE
1552static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1553{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001554 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001555 struct tc_cbq_police opt;
1556
1557 if (cl->police) {
1558 opt.police = cl->police;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001559 opt.__res1 = 0;
1560 opt.__res2 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001561 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1562 }
1563 return skb->len;
1564
1565rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001566 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567 return -1;
1568}
1569#endif
1570
1571static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1572{
1573 if (cbq_dump_lss(skb, cl) < 0 ||
1574 cbq_dump_rate(skb, cl) < 0 ||
1575 cbq_dump_wrr(skb, cl) < 0 ||
1576 cbq_dump_ovl(skb, cl) < 0 ||
1577#ifdef CONFIG_NET_CLS_POLICE
1578 cbq_dump_police(skb, cl) < 0 ||
1579#endif
1580 cbq_dump_fopt(skb, cl) < 0)
1581 return -1;
1582 return 0;
1583}
1584
1585static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1586{
1587 struct cbq_sched_data *q = qdisc_priv(sch);
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001588 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589 struct rtattr *rta;
1590
1591 rta = (struct rtattr*)b;
1592 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1593 if (cbq_dump_attr(skb, &q->link) < 0)
1594 goto rtattr_failure;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001595 rta->rta_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596 return skb->len;
1597
1598rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001599 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600 return -1;
1601}
1602
1603static int
1604cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1605{
1606 struct cbq_sched_data *q = qdisc_priv(sch);
1607
1608 q->link.xstats.avgidle = q->link.avgidle;
1609 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1610}
1611
1612static int
1613cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1614 struct sk_buff *skb, struct tcmsg *tcm)
1615{
1616 struct cbq_class *cl = (struct cbq_class*)arg;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001617 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618 struct rtattr *rta;
1619
1620 if (cl->tparent)
1621 tcm->tcm_parent = cl->tparent->classid;
1622 else
1623 tcm->tcm_parent = TC_H_ROOT;
1624 tcm->tcm_handle = cl->classid;
1625 tcm->tcm_info = cl->q->handle;
1626
1627 rta = (struct rtattr*)b;
1628 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1629 if (cbq_dump_attr(skb, cl) < 0)
1630 goto rtattr_failure;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001631 rta->rta_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632 return skb->len;
1633
1634rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001635 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636 return -1;
1637}
1638
1639static int
1640cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1641 struct gnet_dump *d)
1642{
1643 struct cbq_sched_data *q = qdisc_priv(sch);
1644 struct cbq_class *cl = (struct cbq_class*)arg;
1645
1646 cl->qstats.qlen = cl->q->q.qlen;
1647 cl->xstats.avgidle = cl->avgidle;
1648 cl->xstats.undertime = 0;
1649
Patrick McHardya0849802007-03-23 11:28:30 -07001650 if (cl->undertime != PSCHED_PASTPERFECT)
Patrick McHardy8edc0c32007-03-23 11:28:55 -07001651 cl->xstats.undertime = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001652
1653 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1656 return -1;
1657
1658 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1659}
1660
1661static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1662 struct Qdisc **old)
1663{
1664 struct cbq_class *cl = (struct cbq_class*)arg;
1665
1666 if (cl) {
1667 if (new == NULL) {
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001668 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1669 cl->classid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670 return -ENOBUFS;
1671 } else {
1672#ifdef CONFIG_NET_CLS_POLICE
1673 if (cl->police == TC_POLICE_RECLASSIFY)
1674 new->reshape_fail = cbq_reshape_fail;
1675#endif
1676 }
1677 sch_tree_lock(sch);
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001678 *old = xchg(&cl->q, new);
Patrick McHardy5e50da02006-11-29 17:36:20 -08001679 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680 qdisc_reset(*old);
1681 sch_tree_unlock(sch);
1682
1683 return 0;
1684 }
1685 return -ENOENT;
1686}
1687
1688static struct Qdisc *
1689cbq_leaf(struct Qdisc *sch, unsigned long arg)
1690{
1691 struct cbq_class *cl = (struct cbq_class*)arg;
1692
1693 return cl ? cl->q : NULL;
1694}
1695
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001696static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1697{
1698 struct cbq_class *cl = (struct cbq_class *)arg;
1699
1700 if (cl->q->q.qlen == 0)
1701 cbq_deactivate_class(cl);
1702}
1703
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1705{
1706 struct cbq_sched_data *q = qdisc_priv(sch);
1707 struct cbq_class *cl = cbq_class_lookup(q, classid);
1708
1709 if (cl) {
1710 cl->refcnt++;
1711 return (unsigned long)cl;
1712 }
1713 return 0;
1714}
1715
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1717{
1718 struct cbq_sched_data *q = qdisc_priv(sch);
1719
1720 BUG_TRAP(!cl->filters);
1721
Patrick McHardya48b5a62007-03-23 11:29:43 -07001722 tcf_destroy_chain(cl->filter_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723 qdisc_destroy(cl->q);
1724 qdisc_put_rtab(cl->R_tab);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725 gen_kill_estimator(&cl->bstats, &cl->rate_est);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001726 if (cl != &q->link)
1727 kfree(cl);
1728}
1729
1730static void
1731cbq_destroy(struct Qdisc* sch)
1732{
1733 struct cbq_sched_data *q = qdisc_priv(sch);
1734 struct cbq_class *cl;
1735 unsigned h;
1736
1737#ifdef CONFIG_NET_CLS_POLICE
1738 q->rx_class = NULL;
1739#endif
1740 /*
1741 * Filters must be destroyed first because we don't destroy the
1742 * classes from root to leafs which means that filters can still
1743 * be bound to classes which have been destroyed already. --TGR '04
1744 */
Patrick McHardyb00b4bf2007-06-05 16:06:59 -07001745 for (h = 0; h < 16; h++) {
1746 for (cl = q->classes[h]; cl; cl = cl->next) {
Patrick McHardya48b5a62007-03-23 11:29:43 -07001747 tcf_destroy_chain(cl->filter_list);
Patrick McHardyb00b4bf2007-06-05 16:06:59 -07001748 cl->filter_list = NULL;
1749 }
1750 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001751 for (h = 0; h < 16; h++) {
1752 struct cbq_class *next;
1753
1754 for (cl = q->classes[h]; cl; cl = next) {
1755 next = cl->next;
1756 cbq_destroy_class(sch, cl);
1757 }
1758 }
1759}
1760
1761static void cbq_put(struct Qdisc *sch, unsigned long arg)
1762{
1763 struct cbq_class *cl = (struct cbq_class*)arg;
1764
1765 if (--cl->refcnt == 0) {
1766#ifdef CONFIG_NET_CLS_POLICE
1767 struct cbq_sched_data *q = qdisc_priv(sch);
1768
1769 spin_lock_bh(&sch->dev->queue_lock);
1770 if (q->rx_class == cl)
1771 q->rx_class = NULL;
1772 spin_unlock_bh(&sch->dev->queue_lock);
1773#endif
1774
1775 cbq_destroy_class(sch, cl);
1776 }
1777}
1778
1779static int
1780cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1781 unsigned long *arg)
1782{
1783 int err;
1784 struct cbq_sched_data *q = qdisc_priv(sch);
1785 struct cbq_class *cl = (struct cbq_class*)*arg;
1786 struct rtattr *opt = tca[TCA_OPTIONS-1];
1787 struct rtattr *tb[TCA_CBQ_MAX];
1788 struct cbq_class *parent;
1789 struct qdisc_rate_table *rtab = NULL;
1790
1791 if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1792 return -EINVAL;
1793
1794 if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1795 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1796 return -EINVAL;
1797
1798 if (tb[TCA_CBQ_FOPT-1] &&
1799 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1800 return -EINVAL;
1801
1802 if (tb[TCA_CBQ_RATE-1] &&
1803 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1804 return -EINVAL;
1805
1806 if (tb[TCA_CBQ_LSSOPT-1] &&
1807 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1808 return -EINVAL;
1809
1810 if (tb[TCA_CBQ_WRROPT-1] &&
1811 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1812 return -EINVAL;
1813
1814#ifdef CONFIG_NET_CLS_POLICE
1815 if (tb[TCA_CBQ_POLICE-1] &&
1816 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1817 return -EINVAL;
1818#endif
1819
1820 if (cl) {
1821 /* Check parent */
1822 if (parentid) {
1823 if (cl->tparent && cl->tparent->classid != parentid)
1824 return -EINVAL;
1825 if (!cl->tparent && parentid != TC_H_ROOT)
1826 return -EINVAL;
1827 }
1828
1829 if (tb[TCA_CBQ_RATE-1]) {
1830 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1831 if (rtab == NULL)
1832 return -EINVAL;
1833 }
1834
1835 /* Change class parameters */
1836 sch_tree_lock(sch);
1837
1838 if (cl->next_alive != NULL)
1839 cbq_deactivate_class(cl);
1840
1841 if (rtab) {
1842 rtab = xchg(&cl->R_tab, rtab);
1843 qdisc_put_rtab(rtab);
1844 }
1845
1846 if (tb[TCA_CBQ_LSSOPT-1])
1847 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1848
1849 if (tb[TCA_CBQ_WRROPT-1]) {
1850 cbq_rmprio(q, cl);
1851 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1852 }
1853
1854 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1855 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1856
1857#ifdef CONFIG_NET_CLS_POLICE
1858 if (tb[TCA_CBQ_POLICE-1])
1859 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1860#endif
1861
1862 if (tb[TCA_CBQ_FOPT-1])
1863 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1864
1865 if (cl->q->q.qlen)
1866 cbq_activate_class(cl);
1867
1868 sch_tree_unlock(sch);
1869
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870 if (tca[TCA_RATE-1])
1871 gen_replace_estimator(&cl->bstats, &cl->rate_est,
Patrick McHardy4bdf3992007-07-02 22:47:37 -07001872 &sch->dev->queue_lock,
1873 tca[TCA_RATE-1]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001874 return 0;
1875 }
1876
1877 if (parentid == TC_H_ROOT)
1878 return -EINVAL;
1879
1880 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1881 tb[TCA_CBQ_LSSOPT-1] == NULL)
1882 return -EINVAL;
1883
1884 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1885 if (rtab == NULL)
1886 return -EINVAL;
1887
1888 if (classid) {
1889 err = -EINVAL;
1890 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1891 goto failure;
1892 } else {
1893 int i;
1894 classid = TC_H_MAKE(sch->handle,0x8000);
1895
1896 for (i=0; i<0x8000; i++) {
1897 if (++q->hgenerator >= 0x8000)
1898 q->hgenerator = 1;
1899 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1900 break;
1901 }
1902 err = -ENOSR;
1903 if (i >= 0x8000)
1904 goto failure;
1905 classid = classid|q->hgenerator;
1906 }
1907
1908 parent = &q->link;
1909 if (parentid) {
1910 parent = cbq_class_lookup(q, parentid);
1911 err = -EINVAL;
1912 if (parent == NULL)
1913 goto failure;
1914 }
1915
1916 err = -ENOBUFS;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001917 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918 if (cl == NULL)
1919 goto failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001920 cl->R_tab = rtab;
1921 rtab = NULL;
1922 cl->refcnt = 1;
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001923 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924 cl->q = &noop_qdisc;
1925 cl->classid = classid;
1926 cl->tparent = parent;
1927 cl->qdisc = sch;
1928 cl->allot = parent->allot;
1929 cl->quantum = cl->allot;
1930 cl->weight = cl->R_tab->rate.rate;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931
1932 sch_tree_lock(sch);
1933 cbq_link_class(cl);
1934 cl->borrow = cl->tparent;
1935 if (cl->tparent != &q->link)
1936 cl->share = cl->tparent;
1937 cbq_adjust_levels(parent);
1938 cl->minidle = -0x7FFFFFFF;
1939 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1940 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1941 if (cl->ewma_log==0)
1942 cl->ewma_log = q->link.ewma_log;
1943 if (cl->maxidle==0)
1944 cl->maxidle = q->link.maxidle;
1945 if (cl->avpkt==0)
1946 cl->avpkt = q->link.avpkt;
1947 cl->overlimit = cbq_ovl_classic;
1948 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1949 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1950#ifdef CONFIG_NET_CLS_POLICE
1951 if (tb[TCA_CBQ_POLICE-1])
1952 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1953#endif
1954 if (tb[TCA_CBQ_FOPT-1])
1955 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1956 sch_tree_unlock(sch);
1957
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958 if (tca[TCA_RATE-1])
1959 gen_new_estimator(&cl->bstats, &cl->rate_est,
Patrick McHardy4bdf3992007-07-02 22:47:37 -07001960 &sch->dev->queue_lock, tca[TCA_RATE-1]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961
1962 *arg = (unsigned long)cl;
1963 return 0;
1964
1965failure:
1966 qdisc_put_rtab(rtab);
1967 return err;
1968}
1969
1970static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1971{
1972 struct cbq_sched_data *q = qdisc_priv(sch);
1973 struct cbq_class *cl = (struct cbq_class*)arg;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001974 unsigned int qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001975
1976 if (cl->filters || cl->children || cl == &q->link)
1977 return -EBUSY;
1978
1979 sch_tree_lock(sch);
1980
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001981 qlen = cl->q->q.qlen;
1982 qdisc_reset(cl->q);
1983 qdisc_tree_decrease_qlen(cl->q, qlen);
1984
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985 if (cl->next_alive)
1986 cbq_deactivate_class(cl);
1987
1988 if (q->tx_borrowed == cl)
1989 q->tx_borrowed = q->tx_class;
1990 if (q->tx_class == cl) {
1991 q->tx_class = NULL;
1992 q->tx_borrowed = NULL;
1993 }
1994#ifdef CONFIG_NET_CLS_POLICE
1995 if (q->rx_class == cl)
1996 q->rx_class = NULL;
1997#endif
1998
1999 cbq_unlink_class(cl);
2000 cbq_adjust_levels(cl->tparent);
2001 cl->defmap = 0;
2002 cbq_sync_defmap(cl);
2003
2004 cbq_rmprio(q, cl);
2005 sch_tree_unlock(sch);
2006
2007 if (--cl->refcnt == 0)
2008 cbq_destroy_class(sch, cl);
2009
2010 return 0;
2011}
2012
2013static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2014{
2015 struct cbq_sched_data *q = qdisc_priv(sch);
2016 struct cbq_class *cl = (struct cbq_class *)arg;
2017
2018 if (cl == NULL)
2019 cl = &q->link;
2020
2021 return &cl->filter_list;
2022}
2023
2024static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2025 u32 classid)
2026{
2027 struct cbq_sched_data *q = qdisc_priv(sch);
2028 struct cbq_class *p = (struct cbq_class*)parent;
2029 struct cbq_class *cl = cbq_class_lookup(q, classid);
2030
2031 if (cl) {
2032 if (p && p->level <= cl->level)
2033 return 0;
2034 cl->filters++;
2035 return (unsigned long)cl;
2036 }
2037 return 0;
2038}
2039
2040static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2041{
2042 struct cbq_class *cl = (struct cbq_class*)arg;
2043
2044 cl->filters--;
2045}
2046
2047static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2048{
2049 struct cbq_sched_data *q = qdisc_priv(sch);
2050 unsigned h;
2051
2052 if (arg->stop)
2053 return;
2054
2055 for (h = 0; h < 16; h++) {
2056 struct cbq_class *cl;
2057
2058 for (cl = q->classes[h]; cl; cl = cl->next) {
2059 if (arg->count < arg->skip) {
2060 arg->count++;
2061 continue;
2062 }
2063 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2064 arg->stop = 1;
2065 return;
2066 }
2067 arg->count++;
2068 }
2069 }
2070}
2071
2072static struct Qdisc_class_ops cbq_class_ops = {
2073 .graft = cbq_graft,
2074 .leaf = cbq_leaf,
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08002075 .qlen_notify = cbq_qlen_notify,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076 .get = cbq_get,
2077 .put = cbq_put,
2078 .change = cbq_change_class,
2079 .delete = cbq_delete,
2080 .walk = cbq_walk,
2081 .tcf_chain = cbq_find_tcf,
2082 .bind_tcf = cbq_bind_filter,
2083 .unbind_tcf = cbq_unbind_filter,
2084 .dump = cbq_dump_class,
2085 .dump_stats = cbq_dump_class_stats,
2086};
2087
2088static struct Qdisc_ops cbq_qdisc_ops = {
2089 .next = NULL,
2090 .cl_ops = &cbq_class_ops,
2091 .id = "cbq",
2092 .priv_size = sizeof(struct cbq_sched_data),
2093 .enqueue = cbq_enqueue,
2094 .dequeue = cbq_dequeue,
2095 .requeue = cbq_requeue,
2096 .drop = cbq_drop,
2097 .init = cbq_init,
2098 .reset = cbq_reset,
2099 .destroy = cbq_destroy,
2100 .change = NULL,
2101 .dump = cbq_dump,
2102 .dump_stats = cbq_dump_stats,
2103 .owner = THIS_MODULE,
2104};
2105
2106static int __init cbq_module_init(void)
2107{
2108 return register_qdisc(&cbq_qdisc_ops);
2109}
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09002110static void __exit cbq_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002111{
2112 unregister_qdisc(&cbq_qdisc_ops);
2113}
2114module_init(cbq_module_init)
2115module_exit(cbq_module_exit)
2116MODULE_LICENSE("GPL");