blob: b184c35451455446d1d4407c205f83fe6a57a405 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/types.h>
15#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/skbuff.h>
Patrick McHardy0ba48052007-07-02 22:49:07 -070019#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <net/pkt_sched.h>
21
22
23/* Class-Based Queueing (CBQ) algorithm.
24 =======================================
25
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090027 Management Models for Packet Networks",
Linus Torvalds1da177e2005-04-16 15:20:36 -070028 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
29
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090030 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090032 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
Linus Torvalds1da177e2005-04-16 15:20:36 -070033 Parameters", 1996
34
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
37
38 -----------------------------------------------------------------------
39
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
44
45 --- The WRR algorithm is different. Our version looks more
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090046 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070052
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
54
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
57
58
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
63
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
70
71struct cbq_sched_data;
72
73
74struct cbq_class
75{
76 struct cbq_class *next; /* hash table link */
77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
78
79/* Parameters */
80 u32 classid;
81 unsigned char priority; /* class priority */
82 unsigned char priority2; /* priority to be used after overlimit */
83 unsigned char ewma_log; /* time constant for idle time calculation */
84 unsigned char ovl_strategy;
85#ifdef CONFIG_NET_CLS_POLICE
86 unsigned char police;
87#endif
88
89 u32 defmap;
90
91 /* Link-sharing scheduler parameters */
92 long maxidle; /* Class parameters: see below. */
93 long offtime;
94 long minidle;
95 u32 avpkt;
96 struct qdisc_rate_table *R_tab;
97
98 /* Overlimit strategy parameters */
99 void (*overlimit)(struct cbq_class *cl);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700100 psched_tdiff_t penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101
102 /* General scheduler (WRR) parameters */
103 long allot;
104 long quantum; /* Allotment per WRR round */
105 long weight; /* Relative allotment: see below */
106
107 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
108 struct cbq_class *split; /* Ptr to split node */
109 struct cbq_class *share; /* Ptr to LS parent in the class tree */
110 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
111 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
112 parent otherwise */
113 struct cbq_class *sibling; /* Sibling chain */
114 struct cbq_class *children; /* Pointer to children chain */
115
116 struct Qdisc *q; /* Elementary queueing discipline */
117
118
119/* Variables */
120 unsigned char cpriority; /* Effective priority */
121 unsigned char delayed;
122 unsigned char level; /* level of the class in hierarchy:
123 0 for leaf classes, and maximal
124 level of children + 1 for nodes.
125 */
126
127 psched_time_t last; /* Last end of service */
128 psched_time_t undertime;
129 long avgidle;
130 long deficit; /* Saved deficit for WRR */
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700131 psched_time_t penalized;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 struct gnet_stats_basic bstats;
133 struct gnet_stats_queue qstats;
134 struct gnet_stats_rate_est rate_est;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 struct tc_cbq_xstats xstats;
136
137 struct tcf_proto *filter_list;
138
139 int refcnt;
140 int filters;
141
142 struct cbq_class *defaults[TC_PRIO_MAX+1];
143};
144
145struct cbq_sched_data
146{
147 struct cbq_class *classes[16]; /* Hash table of all classes */
148 int nclasses[TC_CBQ_MAXPRIO+1];
149 unsigned quanta[TC_CBQ_MAXPRIO+1];
150
151 struct cbq_class link;
152
153 unsigned activemask;
154 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
155 with backlog */
156
157#ifdef CONFIG_NET_CLS_POLICE
158 struct cbq_class *rx_class;
159#endif
160 struct cbq_class *tx_class;
161 struct cbq_class *tx_borrowed;
162 int tx_len;
163 psched_time_t now; /* Cached timestamp */
164 psched_time_t now_rt; /* Cached real time */
165 unsigned pmask;
166
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700167 struct hrtimer delay_timer;
Patrick McHardy88a99352007-03-16 01:21:11 -0700168 struct qdisc_watchdog watchdog; /* Watchdog timer,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 started when CBQ has
170 backlog, but cannot
171 transmit just now */
Patrick McHardy88a99352007-03-16 01:21:11 -0700172 psched_tdiff_t wd_expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173 int toplevel;
174 u32 hgenerator;
175};
176
177
178#define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
179
180
181static __inline__ unsigned cbq_hash(u32 h)
182{
183 h ^= h>>8;
184 h ^= h>>4;
185 return h&0xF;
186}
187
188static __inline__ struct cbq_class *
189cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
190{
191 struct cbq_class *cl;
192
193 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
194 if (cl->classid == classid)
195 return cl;
196 return NULL;
197}
198
199#ifdef CONFIG_NET_CLS_POLICE
200
201static struct cbq_class *
202cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
203{
204 struct cbq_class *cl, *new;
205
206 for (cl = this->tparent; cl; cl = cl->tparent)
207 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
208 return new;
209
210 return NULL;
211}
212
213#endif
214
215/* Classify packet. The procedure is pretty complicated, but
216 it allows us to combine link sharing and priority scheduling
217 transparently.
218
219 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
220 so that it resolves to split nodes. Then packets are classified
221 by logical priority, or a more specific classifier may be attached
222 to the split node.
223 */
224
225static struct cbq_class *
226cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
227{
228 struct cbq_sched_data *q = qdisc_priv(sch);
229 struct cbq_class *head = &q->link;
230 struct cbq_class **defmap;
231 struct cbq_class *cl = NULL;
232 u32 prio = skb->priority;
233 struct tcf_result res;
234
235 /*
236 * Step 1. If skb->priority points to one of our classes, use it.
237 */
238 if (TC_H_MAJ(prio^sch->handle) == 0 &&
239 (cl = cbq_class_lookup(q, prio)) != NULL)
240 return cl;
241
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800242 *qerr = NET_XMIT_BYPASS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 for (;;) {
244 int result = 0;
245 defmap = head->defaults;
246
247 /*
248 * Step 2+n. Apply classifier.
249 */
250 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
251 goto fallback;
252
253 if ((cl = (void*)res.class) == NULL) {
254 if (TC_H_MAJ(res.classid))
255 cl = cbq_class_lookup(q, res.classid);
256 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
257 cl = defmap[TC_PRIO_BESTEFFORT];
258
259 if (cl == NULL || cl->level >= head->level)
260 goto fallback;
261 }
262
263#ifdef CONFIG_NET_CLS_ACT
264 switch (result) {
265 case TC_ACT_QUEUED:
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900266 case TC_ACT_STOLEN:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 *qerr = NET_XMIT_SUCCESS;
268 case TC_ACT_SHOT:
269 return NULL;
270 }
271#elif defined(CONFIG_NET_CLS_POLICE)
272 switch (result) {
273 case TC_POLICE_RECLASSIFY:
274 return cbq_reclassify(skb, cl);
275 case TC_POLICE_SHOT:
276 return NULL;
277 default:
278 break;
279 }
280#endif
281 if (cl->level == 0)
282 return cl;
283
284 /*
285 * Step 3+n. If classifier selected a link sharing class,
286 * apply agency specific classifier.
287 * Repeat this procdure until we hit a leaf node.
288 */
289 head = cl;
290 }
291
292fallback:
293 cl = head;
294
295 /*
296 * Step 4. No success...
297 */
298 if (TC_H_MAJ(prio) == 0 &&
299 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
300 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
301 return head;
302
303 return cl;
304}
305
306/*
307 A packet has just been enqueued on the empty class.
308 cbq_activate_class adds it to the tail of active class list
309 of its priority band.
310 */
311
312static __inline__ void cbq_activate_class(struct cbq_class *cl)
313{
314 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
315 int prio = cl->cpriority;
316 struct cbq_class *cl_tail;
317
318 cl_tail = q->active[prio];
319 q->active[prio] = cl;
320
321 if (cl_tail != NULL) {
322 cl->next_alive = cl_tail->next_alive;
323 cl_tail->next_alive = cl;
324 } else {
325 cl->next_alive = cl;
326 q->activemask |= (1<<prio);
327 }
328}
329
330/*
331 Unlink class from active chain.
332 Note that this same procedure is done directly in cbq_dequeue*
333 during round-robin procedure.
334 */
335
336static void cbq_deactivate_class(struct cbq_class *this)
337{
338 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
339 int prio = this->cpriority;
340 struct cbq_class *cl;
341 struct cbq_class *cl_prev = q->active[prio];
342
343 do {
344 cl = cl_prev->next_alive;
345 if (cl == this) {
346 cl_prev->next_alive = cl->next_alive;
347 cl->next_alive = NULL;
348
349 if (cl == q->active[prio]) {
350 q->active[prio] = cl_prev;
351 if (cl == q->active[prio]) {
352 q->active[prio] = NULL;
353 q->activemask &= ~(1<<prio);
354 return;
355 }
356 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357 return;
358 }
359 } while ((cl_prev = cl) != q->active[prio]);
360}
361
362static void
363cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
364{
365 int toplevel = q->toplevel;
366
367 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
368 psched_time_t now;
369 psched_tdiff_t incr;
370
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700371 now = psched_get_time();
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700372 incr = now - q->now_rt;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700373 now = q->now + incr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374
375 do {
Patrick McHardy104e0872007-03-23 11:28:07 -0700376 if (cl->undertime < now) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 q->toplevel = cl->level;
378 return;
379 }
380 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
381 }
382}
383
384static int
385cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
386{
387 struct cbq_sched_data *q = qdisc_priv(sch);
388 int len = skb->len;
389 int ret;
390 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
391
392#ifdef CONFIG_NET_CLS_POLICE
393 q->rx_class = cl;
394#endif
395 if (cl == NULL) {
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800396 if (ret == NET_XMIT_BYPASS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 sch->qstats.drops++;
398 kfree_skb(skb);
399 return ret;
400 }
401
402#ifdef CONFIG_NET_CLS_POLICE
403 cl->q->__parent = sch;
404#endif
405 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
406 sch->q.qlen++;
407 sch->bstats.packets++;
408 sch->bstats.bytes+=len;
409 cbq_mark_toplevel(q, cl);
410 if (!cl->next_alive)
411 cbq_activate_class(cl);
412 return ret;
413 }
414
415 sch->qstats.drops++;
416 cbq_mark_toplevel(q, cl);
417 cl->qstats.drops++;
418 return ret;
419}
420
421static int
422cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
423{
424 struct cbq_sched_data *q = qdisc_priv(sch);
425 struct cbq_class *cl;
426 int ret;
427
428 if ((cl = q->tx_class) == NULL) {
429 kfree_skb(skb);
430 sch->qstats.drops++;
431 return NET_XMIT_CN;
432 }
433 q->tx_class = NULL;
434
435 cbq_mark_toplevel(q, cl);
436
437#ifdef CONFIG_NET_CLS_POLICE
438 q->rx_class = cl;
439 cl->q->__parent = sch;
440#endif
441 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
442 sch->q.qlen++;
443 sch->qstats.requeues++;
444 if (!cl->next_alive)
445 cbq_activate_class(cl);
446 return 0;
447 }
448 sch->qstats.drops++;
449 cl->qstats.drops++;
450 return ret;
451}
452
453/* Overlimit actions */
454
455/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
456
457static void cbq_ovl_classic(struct cbq_class *cl)
458{
459 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700460 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461
462 if (!cl->delayed) {
463 delay += cl->offtime;
464
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900465 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466 Class goes to sleep, so that it will have no
467 chance to work avgidle. Let's forgive it 8)
468
469 BTW cbq-2.0 has a crap in this
470 place, apparently they forgot to shift it by cl->ewma_log.
471 */
472 if (cl->avgidle < 0)
473 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
474 if (cl->avgidle < cl->minidle)
475 cl->avgidle = cl->minidle;
476 if (delay <= 0)
477 delay = 1;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700478 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479
480 cl->xstats.overactions++;
481 cl->delayed = 1;
482 }
483 if (q->wd_expires == 0 || q->wd_expires > delay)
484 q->wd_expires = delay;
485
486 /* Dirty work! We must schedule wakeups based on
487 real available rate, rather than leaf rate,
488 which may be tiny (even zero).
489 */
490 if (q->toplevel == TC_CBQ_MAXLEVEL) {
491 struct cbq_class *b;
492 psched_tdiff_t base_delay = q->wd_expires;
493
494 for (b = cl->borrow; b; b = b->borrow) {
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700495 delay = b->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496 if (delay < base_delay) {
497 if (delay <= 0)
498 delay = 1;
499 base_delay = delay;
500 }
501 }
502
503 q->wd_expires = base_delay;
504 }
505}
506
507/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
508 they go overlimit
509 */
510
511static void cbq_ovl_rclassic(struct cbq_class *cl)
512{
513 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
514 struct cbq_class *this = cl;
515
516 do {
517 if (cl->level > q->toplevel) {
518 cl = NULL;
519 break;
520 }
521 } while ((cl = cl->borrow) != NULL);
522
523 if (cl == NULL)
524 cl = this;
525 cbq_ovl_classic(cl);
526}
527
528/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
529
530static void cbq_ovl_delay(struct cbq_class *cl)
531{
532 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700533 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534
535 if (!cl->delayed) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700536 psched_time_t sched = q->now;
537 ktime_t expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538
539 delay += cl->offtime;
540 if (cl->avgidle < 0)
541 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
542 if (cl->avgidle < cl->minidle)
543 cl->avgidle = cl->minidle;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700544 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545
546 if (delay > 0) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700547 sched += delay + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 cl->penalized = sched;
549 cl->cpriority = TC_CBQ_MAXPRIO;
550 q->pmask |= (1<<TC_CBQ_MAXPRIO);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700551
552 expires = ktime_set(0, 0);
553 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
554 if (hrtimer_try_to_cancel(&q->delay_timer) &&
555 ktime_to_ns(ktime_sub(q->delay_timer.expires,
556 expires)) > 0)
557 q->delay_timer.expires = expires;
558 hrtimer_restart(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559 cl->delayed = 1;
560 cl->xstats.overactions++;
561 return;
562 }
563 delay = 1;
564 }
565 if (q->wd_expires == 0 || q->wd_expires > delay)
566 q->wd_expires = delay;
567}
568
569/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
570
571static void cbq_ovl_lowprio(struct cbq_class *cl)
572{
573 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
574
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700575 cl->penalized = q->now + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576
577 if (cl->cpriority != cl->priority2) {
578 cl->cpriority = cl->priority2;
579 q->pmask |= (1<<cl->cpriority);
580 cl->xstats.overactions++;
581 }
582 cbq_ovl_classic(cl);
583}
584
585/* TC_CBQ_OVL_DROP: penalize class by dropping */
586
587static void cbq_ovl_drop(struct cbq_class *cl)
588{
589 if (cl->q->ops->drop)
590 if (cl->q->ops->drop(cl->q))
591 cl->qdisc->q.qlen--;
592 cl->xstats.overactions++;
593 cbq_ovl_classic(cl);
594}
595
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700596static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
597 psched_time_t now)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598{
599 struct cbq_class *cl;
600 struct cbq_class *cl_prev = q->active[prio];
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700601 psched_time_t sched = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602
603 if (cl_prev == NULL)
Patrick McHardye9054a32007-03-16 01:21:40 -0700604 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605
606 do {
607 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700608 if (now - cl->penalized > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 cl_prev->next_alive = cl->next_alive;
610 cl->next_alive = NULL;
611 cl->cpriority = cl->priority;
612 cl->delayed = 0;
613 cbq_activate_class(cl);
614
615 if (cl == q->active[prio]) {
616 q->active[prio] = cl_prev;
617 if (cl == q->active[prio]) {
618 q->active[prio] = NULL;
619 return 0;
620 }
621 }
622
623 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700624 } else if (sched - cl->penalized > 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625 sched = cl->penalized;
626 } while ((cl_prev = cl) != q->active[prio]);
627
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700628 return sched - now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629}
630
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700631static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632{
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700633 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
634 delay_timer);
635 struct Qdisc *sch = q->watchdog.qdisc;
636 psched_time_t now;
637 psched_tdiff_t delay = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 unsigned pmask;
639
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700640 now = psched_get_time();
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700641
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642 pmask = q->pmask;
643 q->pmask = 0;
644
645 while (pmask) {
646 int prio = ffz(~pmask);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700647 psched_tdiff_t tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648
649 pmask &= ~(1<<prio);
650
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700651 tmp = cbq_undelay_prio(q, prio, now);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652 if (tmp > 0) {
653 q->pmask |= 1<<prio;
654 if (tmp < delay || delay == 0)
655 delay = tmp;
656 }
657 }
658
659 if (delay) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700660 ktime_t time;
661
662 time = ktime_set(0, 0);
663 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
664 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 }
666
667 sch->flags &= ~TCQ_F_THROTTLED;
668 netif_schedule(sch->dev);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700669 return HRTIMER_NORESTART;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670}
671
672
673#ifdef CONFIG_NET_CLS_POLICE
674
675static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
676{
677 int len = skb->len;
678 struct Qdisc *sch = child->__parent;
679 struct cbq_sched_data *q = qdisc_priv(sch);
680 struct cbq_class *cl = q->rx_class;
681
682 q->rx_class = NULL;
683
684 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
685
686 cbq_mark_toplevel(q, cl);
687
688 q->rx_class = cl;
689 cl->q->__parent = sch;
690
691 if (cl->q->enqueue(skb, cl->q) == 0) {
692 sch->q.qlen++;
693 sch->bstats.packets++;
694 sch->bstats.bytes+=len;
695 if (!cl->next_alive)
696 cbq_activate_class(cl);
697 return 0;
698 }
699 sch->qstats.drops++;
700 return 0;
701 }
702
703 sch->qstats.drops++;
704 return -1;
705}
706#endif
707
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900708/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 It is mission critical procedure.
710
711 We "regenerate" toplevel cutoff, if transmitting class
712 has backlog and it is not regulated. It is not part of
713 original CBQ description, but looks more reasonable.
714 Probably, it is wrong. This question needs further investigation.
715*/
716
717static __inline__ void
718cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
719 struct cbq_class *borrowed)
720{
721 if (cl && q->toplevel >= borrowed->level) {
722 if (cl->q->q.qlen > 1) {
723 do {
Patrick McHardya0849802007-03-23 11:28:30 -0700724 if (borrowed->undertime == PSCHED_PASTPERFECT) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725 q->toplevel = borrowed->level;
726 return;
727 }
728 } while ((borrowed=borrowed->borrow) != NULL);
729 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900730#if 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 /* It is not necessary now. Uncommenting it
732 will save CPU cycles, but decrease fairness.
733 */
734 q->toplevel = TC_CBQ_MAXLEVEL;
735#endif
736 }
737}
738
739static void
740cbq_update(struct cbq_sched_data *q)
741{
742 struct cbq_class *this = q->tx_class;
743 struct cbq_class *cl = this;
744 int len = q->tx_len;
745
746 q->tx_class = NULL;
747
748 for ( ; cl; cl = cl->share) {
749 long avgidle = cl->avgidle;
750 long idle;
751
752 cl->bstats.packets++;
753 cl->bstats.bytes += len;
754
755 /*
756 (now - last) is total time between packet right edges.
757 (last_pktlen/rate) is "virtual" busy time, so that
758
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900759 idle = (now - last) - last_pktlen/rate
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 */
761
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700762 idle = q->now - cl->last;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 if ((unsigned long)idle > 128*1024*1024) {
764 avgidle = cl->maxidle;
765 } else {
766 idle -= L2T(cl, len);
767
768 /* true_avgidle := (1-W)*true_avgidle + W*idle,
769 where W=2^{-ewma_log}. But cl->avgidle is scaled:
770 cl->avgidle == true_avgidle/W,
771 hence:
772 */
773 avgidle += idle - (avgidle>>cl->ewma_log);
774 }
775
776 if (avgidle <= 0) {
777 /* Overlimit or at-limit */
778
779 if (avgidle < cl->minidle)
780 avgidle = cl->minidle;
781
782 cl->avgidle = avgidle;
783
784 /* Calculate expected time, when this class
785 will be allowed to send.
786 It will occur, when:
787 (1-W)*true_avgidle + W*delay = 0, i.e.
788 idle = (1/W - 1)*(-true_avgidle)
789 or
790 idle = (1 - W)*(-cl->avgidle);
791 */
792 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
793
794 /*
795 That is not all.
796 To maintain the rate allocated to the class,
797 we add to undertime virtual clock,
798 necessary to complete transmitted packet.
799 (len/phys_bandwidth has been already passed
800 to the moment of cbq_update)
801 */
802
803 idle -= L2T(&q->link, len);
804 idle += L2T(cl, len);
805
Patrick McHardy7c59e252007-03-23 11:27:45 -0700806 cl->undertime = q->now + idle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 } else {
808 /* Underlimit */
809
Patrick McHardya0849802007-03-23 11:28:30 -0700810 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811 if (avgidle > cl->maxidle)
812 cl->avgidle = cl->maxidle;
813 else
814 cl->avgidle = avgidle;
815 }
816 cl->last = q->now;
817 }
818
819 cbq_update_toplevel(q, this, q->tx_borrowed);
820}
821
822static __inline__ struct cbq_class *
823cbq_under_limit(struct cbq_class *cl)
824{
825 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
826 struct cbq_class *this_cl = cl;
827
828 if (cl->tparent == NULL)
829 return cl;
830
Patrick McHardya0849802007-03-23 11:28:30 -0700831 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832 cl->delayed = 0;
833 return cl;
834 }
835
836 do {
837 /* It is very suspicious place. Now overlimit
838 action is generated for not bounded classes
839 only if link is completely congested.
840 Though it is in agree with ancestor-only paradigm,
841 it looks very stupid. Particularly,
842 it means that this chunk of code will either
843 never be called or result in strong amplification
844 of burstiness. Dangerous, silly, and, however,
845 no another solution exists.
846 */
847 if ((cl = cl->borrow) == NULL) {
848 this_cl->qstats.overlimits++;
849 this_cl->overlimit(this_cl);
850 return NULL;
851 }
852 if (cl->level > q->toplevel)
853 return NULL;
Patrick McHardya0849802007-03-23 11:28:30 -0700854 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855
856 cl->delayed = 0;
857 return cl;
858}
859
860static __inline__ struct sk_buff *
861cbq_dequeue_prio(struct Qdisc *sch, int prio)
862{
863 struct cbq_sched_data *q = qdisc_priv(sch);
864 struct cbq_class *cl_tail, *cl_prev, *cl;
865 struct sk_buff *skb;
866 int deficit;
867
868 cl_tail = cl_prev = q->active[prio];
869 cl = cl_prev->next_alive;
870
871 do {
872 deficit = 0;
873
874 /* Start round */
875 do {
876 struct cbq_class *borrow = cl;
877
878 if (cl->q->q.qlen &&
879 (borrow = cbq_under_limit(cl)) == NULL)
880 goto skip_class;
881
882 if (cl->deficit <= 0) {
883 /* Class exhausted its allotment per
884 this round. Switch to the next one.
885 */
886 deficit = 1;
887 cl->deficit += cl->quantum;
888 goto next_class;
889 }
890
891 skb = cl->q->dequeue(cl->q);
892
893 /* Class did not give us any skb :-(
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900894 It could occur even if cl->q->q.qlen != 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 f.e. if cl->q == "tbf"
896 */
897 if (skb == NULL)
898 goto skip_class;
899
900 cl->deficit -= skb->len;
901 q->tx_class = cl;
902 q->tx_borrowed = borrow;
903 if (borrow != cl) {
904#ifndef CBQ_XSTATS_BORROWS_BYTES
905 borrow->xstats.borrows++;
906 cl->xstats.borrows++;
907#else
908 borrow->xstats.borrows += skb->len;
909 cl->xstats.borrows += skb->len;
910#endif
911 }
912 q->tx_len = skb->len;
913
914 if (cl->deficit <= 0) {
915 q->active[prio] = cl;
916 cl = cl->next_alive;
917 cl->deficit += cl->quantum;
918 }
919 return skb;
920
921skip_class:
922 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
923 /* Class is empty or penalized.
924 Unlink it from active chain.
925 */
926 cl_prev->next_alive = cl->next_alive;
927 cl->next_alive = NULL;
928
929 /* Did cl_tail point to it? */
930 if (cl == cl_tail) {
931 /* Repair it! */
932 cl_tail = cl_prev;
933
934 /* Was it the last class in this band? */
935 if (cl == cl_tail) {
936 /* Kill the band! */
937 q->active[prio] = NULL;
938 q->activemask &= ~(1<<prio);
939 if (cl->q->q.qlen)
940 cbq_activate_class(cl);
941 return NULL;
942 }
943
944 q->active[prio] = cl_tail;
945 }
946 if (cl->q->q.qlen)
947 cbq_activate_class(cl);
948
949 cl = cl_prev;
950 }
951
952next_class:
953 cl_prev = cl;
954 cl = cl->next_alive;
955 } while (cl_prev != cl_tail);
956 } while (deficit);
957
958 q->active[prio] = cl_prev;
959
960 return NULL;
961}
962
963static __inline__ struct sk_buff *
964cbq_dequeue_1(struct Qdisc *sch)
965{
966 struct cbq_sched_data *q = qdisc_priv(sch);
967 struct sk_buff *skb;
968 unsigned activemask;
969
970 activemask = q->activemask&0xFF;
971 while (activemask) {
972 int prio = ffz(~activemask);
973 activemask &= ~(1<<prio);
974 skb = cbq_dequeue_prio(sch, prio);
975 if (skb)
976 return skb;
977 }
978 return NULL;
979}
980
981static struct sk_buff *
982cbq_dequeue(struct Qdisc *sch)
983{
984 struct sk_buff *skb;
985 struct cbq_sched_data *q = qdisc_priv(sch);
986 psched_time_t now;
987 psched_tdiff_t incr;
988
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700989 now = psched_get_time();
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700990 incr = now - q->now_rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991
992 if (q->tx_class) {
993 psched_tdiff_t incr2;
994 /* Time integrator. We calculate EOS time
995 by adding expected packet transmission time.
996 If real time is greater, we warp artificial clock,
997 so that:
998
999 cbq_time = max(real_time, work);
1000 */
1001 incr2 = L2T(&q->link, q->tx_len);
Patrick McHardy7c59e252007-03-23 11:27:45 -07001002 q->now += incr2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 cbq_update(q);
1004 if ((incr -= incr2) < 0)
1005 incr = 0;
1006 }
Patrick McHardy7c59e252007-03-23 11:27:45 -07001007 q->now += incr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008 q->now_rt = now;
1009
1010 for (;;) {
1011 q->wd_expires = 0;
1012
1013 skb = cbq_dequeue_1(sch);
1014 if (skb) {
1015 sch->q.qlen--;
1016 sch->flags &= ~TCQ_F_THROTTLED;
1017 return skb;
1018 }
1019
1020 /* All the classes are overlimit.
1021
1022 It is possible, if:
1023
1024 1. Scheduler is empty.
1025 2. Toplevel cutoff inhibited borrowing.
1026 3. Root class is overlimit.
1027
1028 Reset 2d and 3d conditions and retry.
1029
1030 Note, that NS and cbq-2.0 are buggy, peeking
1031 an arbitrary class is appropriate for ancestor-only
1032 sharing, but not for toplevel algorithm.
1033
1034 Our version is better, but slower, because it requires
1035 two passes, but it is unavoidable with top-level sharing.
1036 */
1037
1038 if (q->toplevel == TC_CBQ_MAXLEVEL &&
Patrick McHardya0849802007-03-23 11:28:30 -07001039 q->link.undertime == PSCHED_PASTPERFECT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040 break;
1041
1042 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardya0849802007-03-23 11:28:30 -07001043 q->link.undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044 }
1045
1046 /* No packets in scheduler or nobody wants to give them to us :-(
1047 Sigh... start watchdog timer in the last case. */
1048
1049 if (sch->q.qlen) {
1050 sch->qstats.overlimits++;
Patrick McHardy88a99352007-03-16 01:21:11 -07001051 if (q->wd_expires)
1052 qdisc_watchdog_schedule(&q->watchdog,
Patrick McHardybb239ac2007-03-16 12:31:28 -07001053 now + q->wd_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054 }
1055 return NULL;
1056}
1057
1058/* CBQ class maintanance routines */
1059
1060static void cbq_adjust_levels(struct cbq_class *this)
1061{
1062 if (this == NULL)
1063 return;
1064
1065 do {
1066 int level = 0;
1067 struct cbq_class *cl;
1068
1069 if ((cl = this->children) != NULL) {
1070 do {
1071 if (cl->level > level)
1072 level = cl->level;
1073 } while ((cl = cl->sibling) != this->children);
1074 }
1075 this->level = level+1;
1076 } while ((this = this->tparent) != NULL);
1077}
1078
1079static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1080{
1081 struct cbq_class *cl;
1082 unsigned h;
1083
1084 if (q->quanta[prio] == 0)
1085 return;
1086
1087 for (h=0; h<16; h++) {
1088 for (cl = q->classes[h]; cl; cl = cl->next) {
1089 /* BUGGGG... Beware! This expression suffer of
1090 arithmetic overflows!
1091 */
1092 if (cl->priority == prio) {
1093 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1094 q->quanta[prio];
1095 }
1096 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1097 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1098 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1099 }
1100 }
1101 }
1102}
1103
1104static void cbq_sync_defmap(struct cbq_class *cl)
1105{
1106 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1107 struct cbq_class *split = cl->split;
1108 unsigned h;
1109 int i;
1110
1111 if (split == NULL)
1112 return;
1113
1114 for (i=0; i<=TC_PRIO_MAX; i++) {
1115 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1116 split->defaults[i] = NULL;
1117 }
1118
1119 for (i=0; i<=TC_PRIO_MAX; i++) {
1120 int level = split->level;
1121
1122 if (split->defaults[i])
1123 continue;
1124
1125 for (h=0; h<16; h++) {
1126 struct cbq_class *c;
1127
1128 for (c = q->classes[h]; c; c = c->next) {
1129 if (c->split == split && c->level < level &&
1130 c->defmap&(1<<i)) {
1131 split->defaults[i] = c;
1132 level = c->level;
1133 }
1134 }
1135 }
1136 }
1137}
1138
1139static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1140{
1141 struct cbq_class *split = NULL;
1142
1143 if (splitid == 0) {
1144 if ((split = cl->split) == NULL)
1145 return;
1146 splitid = split->classid;
1147 }
1148
1149 if (split == NULL || split->classid != splitid) {
1150 for (split = cl->tparent; split; split = split->tparent)
1151 if (split->classid == splitid)
1152 break;
1153 }
1154
1155 if (split == NULL)
1156 return;
1157
1158 if (cl->split != split) {
1159 cl->defmap = 0;
1160 cbq_sync_defmap(cl);
1161 cl->split = split;
1162 cl->defmap = def&mask;
1163 } else
1164 cl->defmap = (cl->defmap&~mask)|(def&mask);
1165
1166 cbq_sync_defmap(cl);
1167}
1168
1169static void cbq_unlink_class(struct cbq_class *this)
1170{
1171 struct cbq_class *cl, **clp;
1172 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1173
1174 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1175 if (cl == this) {
1176 *clp = cl->next;
1177 cl->next = NULL;
1178 break;
1179 }
1180 }
1181
1182 if (this->tparent) {
1183 clp=&this->sibling;
1184 cl = *clp;
1185 do {
1186 if (cl == this) {
1187 *clp = cl->sibling;
1188 break;
1189 }
1190 clp = &cl->sibling;
1191 } while ((cl = *clp) != this->sibling);
1192
1193 if (this->tparent->children == this) {
1194 this->tparent->children = this->sibling;
1195 if (this->sibling == this)
1196 this->tparent->children = NULL;
1197 }
1198 } else {
1199 BUG_TRAP(this->sibling == this);
1200 }
1201}
1202
1203static void cbq_link_class(struct cbq_class *this)
1204{
1205 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1206 unsigned h = cbq_hash(this->classid);
1207 struct cbq_class *parent = this->tparent;
1208
1209 this->sibling = this;
1210 this->next = q->classes[h];
1211 q->classes[h] = this;
1212
1213 if (parent == NULL)
1214 return;
1215
1216 if (parent->children == NULL) {
1217 parent->children = this;
1218 } else {
1219 this->sibling = parent->children->sibling;
1220 parent->children->sibling = this;
1221 }
1222}
1223
1224static unsigned int cbq_drop(struct Qdisc* sch)
1225{
1226 struct cbq_sched_data *q = qdisc_priv(sch);
1227 struct cbq_class *cl, *cl_head;
1228 int prio;
1229 unsigned int len;
1230
1231 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1232 if ((cl_head = q->active[prio]) == NULL)
1233 continue;
1234
1235 cl = cl_head;
1236 do {
1237 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1238 sch->q.qlen--;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001239 if (!cl->q->q.qlen)
1240 cbq_deactivate_class(cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241 return len;
1242 }
1243 } while ((cl = cl->next_alive) != cl_head);
1244 }
1245 return 0;
1246}
1247
1248static void
1249cbq_reset(struct Qdisc* sch)
1250{
1251 struct cbq_sched_data *q = qdisc_priv(sch);
1252 struct cbq_class *cl;
1253 int prio;
1254 unsigned h;
1255
1256 q->activemask = 0;
1257 q->pmask = 0;
1258 q->tx_class = NULL;
1259 q->tx_borrowed = NULL;
Patrick McHardy88a99352007-03-16 01:21:11 -07001260 qdisc_watchdog_cancel(&q->watchdog);
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001261 hrtimer_cancel(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001263 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264 q->now_rt = q->now;
1265
1266 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1267 q->active[prio] = NULL;
1268
1269 for (h = 0; h < 16; h++) {
1270 for (cl = q->classes[h]; cl; cl = cl->next) {
1271 qdisc_reset(cl->q);
1272
1273 cl->next_alive = NULL;
Patrick McHardya0849802007-03-23 11:28:30 -07001274 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275 cl->avgidle = cl->maxidle;
1276 cl->deficit = cl->quantum;
1277 cl->cpriority = cl->priority;
1278 }
1279 }
1280 sch->q.qlen = 0;
1281}
1282
1283
1284static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1285{
1286 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1287 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1288 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1289 }
1290 if (lss->change&TCF_CBQ_LSS_EWMA)
1291 cl->ewma_log = lss->ewma_log;
1292 if (lss->change&TCF_CBQ_LSS_AVPKT)
1293 cl->avpkt = lss->avpkt;
1294 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1295 cl->minidle = -(long)lss->minidle;
1296 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1297 cl->maxidle = lss->maxidle;
1298 cl->avgidle = lss->maxidle;
1299 }
1300 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1301 cl->offtime = lss->offtime;
1302 return 0;
1303}
1304
1305static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1306{
1307 q->nclasses[cl->priority]--;
1308 q->quanta[cl->priority] -= cl->weight;
1309 cbq_normalize_quanta(q, cl->priority);
1310}
1311
1312static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1313{
1314 q->nclasses[cl->priority]++;
1315 q->quanta[cl->priority] += cl->weight;
1316 cbq_normalize_quanta(q, cl->priority);
1317}
1318
1319static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1320{
1321 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1322
1323 if (wrr->allot)
1324 cl->allot = wrr->allot;
1325 if (wrr->weight)
1326 cl->weight = wrr->weight;
1327 if (wrr->priority) {
1328 cl->priority = wrr->priority-1;
1329 cl->cpriority = cl->priority;
1330 if (cl->priority >= cl->priority2)
1331 cl->priority2 = TC_CBQ_MAXPRIO-1;
1332 }
1333
1334 cbq_addprio(q, cl);
1335 return 0;
1336}
1337
1338static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1339{
1340 switch (ovl->strategy) {
1341 case TC_CBQ_OVL_CLASSIC:
1342 cl->overlimit = cbq_ovl_classic;
1343 break;
1344 case TC_CBQ_OVL_DELAY:
1345 cl->overlimit = cbq_ovl_delay;
1346 break;
1347 case TC_CBQ_OVL_LOWPRIO:
1348 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1349 ovl->priority2-1 <= cl->priority)
1350 return -EINVAL;
1351 cl->priority2 = ovl->priority2-1;
1352 cl->overlimit = cbq_ovl_lowprio;
1353 break;
1354 case TC_CBQ_OVL_DROP:
1355 cl->overlimit = cbq_ovl_drop;
1356 break;
1357 case TC_CBQ_OVL_RCLASSIC:
1358 cl->overlimit = cbq_ovl_rclassic;
1359 break;
1360 default:
1361 return -EINVAL;
1362 }
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001363 cl->penalty = ovl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364 return 0;
1365}
1366
1367#ifdef CONFIG_NET_CLS_POLICE
1368static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1369{
1370 cl->police = p->police;
1371
1372 if (cl->q->handle) {
1373 if (p->police == TC_POLICE_RECLASSIFY)
1374 cl->q->reshape_fail = cbq_reshape_fail;
1375 else
1376 cl->q->reshape_fail = NULL;
1377 }
1378 return 0;
1379}
1380#endif
1381
1382static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1383{
1384 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1385 return 0;
1386}
1387
1388static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1389{
1390 struct cbq_sched_data *q = qdisc_priv(sch);
1391 struct rtattr *tb[TCA_CBQ_MAX];
1392 struct tc_ratespec *r;
1393
1394 if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1395 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1396 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1397 return -EINVAL;
1398
1399 if (tb[TCA_CBQ_LSSOPT-1] &&
1400 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1401 return -EINVAL;
1402
1403 r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1404
1405 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1406 return -EINVAL;
1407
1408 q->link.refcnt = 1;
1409 q->link.sibling = &q->link;
1410 q->link.classid = sch->handle;
1411 q->link.qdisc = sch;
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001412 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1413 sch->handle)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414 q->link.q = &noop_qdisc;
1415
1416 q->link.priority = TC_CBQ_MAXPRIO-1;
1417 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1418 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1419 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1420 q->link.overlimit = cbq_ovl_classic;
1421 q->link.allot = psched_mtu(sch->dev);
1422 q->link.quantum = q->link.allot;
1423 q->link.weight = q->link.R_tab->rate.rate;
1424
1425 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1426 q->link.avpkt = q->link.allot/2;
1427 q->link.minidle = -0x7FFFFFFF;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428
Patrick McHardy88a99352007-03-16 01:21:11 -07001429 qdisc_watchdog_init(&q->watchdog, sch);
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001430 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431 q->delay_timer.function = cbq_undelay;
1432 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001433 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001434 q->now_rt = q->now;
1435
1436 cbq_link_class(&q->link);
1437
1438 if (tb[TCA_CBQ_LSSOPT-1])
1439 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1440
1441 cbq_addprio(q, &q->link);
1442 return 0;
1443}
1444
1445static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1446{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001447 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448
1449 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1450 return skb->len;
1451
1452rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001453 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454 return -1;
1455}
1456
1457static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1458{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001459 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460 struct tc_cbq_lssopt opt;
1461
1462 opt.flags = 0;
1463 if (cl->borrow == NULL)
1464 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1465 if (cl->share == NULL)
1466 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1467 opt.ewma_log = cl->ewma_log;
1468 opt.level = cl->level;
1469 opt.avpkt = cl->avpkt;
1470 opt.maxidle = cl->maxidle;
1471 opt.minidle = (u32)(-cl->minidle);
1472 opt.offtime = cl->offtime;
1473 opt.change = ~0;
1474 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1475 return skb->len;
1476
1477rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001478 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479 return -1;
1480}
1481
1482static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1483{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001484 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 struct tc_cbq_wrropt opt;
1486
1487 opt.flags = 0;
1488 opt.allot = cl->allot;
1489 opt.priority = cl->priority+1;
1490 opt.cpriority = cl->cpriority+1;
1491 opt.weight = cl->weight;
1492 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1493 return skb->len;
1494
1495rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001496 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497 return -1;
1498}
1499
1500static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1501{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001502 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001503 struct tc_cbq_ovl opt;
1504
1505 opt.strategy = cl->ovl_strategy;
1506 opt.priority2 = cl->priority2+1;
Patrick McHardy8a470772005-06-28 12:56:45 -07001507 opt.pad = 0;
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001508 opt.penalty = cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1510 return skb->len;
1511
1512rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001513 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514 return -1;
1515}
1516
1517static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1518{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001519 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520 struct tc_cbq_fopt opt;
1521
1522 if (cl->split || cl->defmap) {
1523 opt.split = cl->split ? cl->split->classid : 0;
1524 opt.defmap = cl->defmap;
1525 opt.defchange = ~0;
1526 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1527 }
1528 return skb->len;
1529
1530rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001531 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 return -1;
1533}
1534
1535#ifdef CONFIG_NET_CLS_POLICE
1536static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1537{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001538 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539 struct tc_cbq_police opt;
1540
1541 if (cl->police) {
1542 opt.police = cl->police;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001543 opt.__res1 = 0;
1544 opt.__res2 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1546 }
1547 return skb->len;
1548
1549rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001550 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551 return -1;
1552}
1553#endif
1554
1555static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1556{
1557 if (cbq_dump_lss(skb, cl) < 0 ||
1558 cbq_dump_rate(skb, cl) < 0 ||
1559 cbq_dump_wrr(skb, cl) < 0 ||
1560 cbq_dump_ovl(skb, cl) < 0 ||
1561#ifdef CONFIG_NET_CLS_POLICE
1562 cbq_dump_police(skb, cl) < 0 ||
1563#endif
1564 cbq_dump_fopt(skb, cl) < 0)
1565 return -1;
1566 return 0;
1567}
1568
1569static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1570{
1571 struct cbq_sched_data *q = qdisc_priv(sch);
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001572 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573 struct rtattr *rta;
1574
1575 rta = (struct rtattr*)b;
1576 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1577 if (cbq_dump_attr(skb, &q->link) < 0)
1578 goto rtattr_failure;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001579 rta->rta_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580 return skb->len;
1581
1582rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001583 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584 return -1;
1585}
1586
1587static int
1588cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1589{
1590 struct cbq_sched_data *q = qdisc_priv(sch);
1591
1592 q->link.xstats.avgidle = q->link.avgidle;
1593 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1594}
1595
1596static int
1597cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1598 struct sk_buff *skb, struct tcmsg *tcm)
1599{
1600 struct cbq_class *cl = (struct cbq_class*)arg;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001601 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001602 struct rtattr *rta;
1603
1604 if (cl->tparent)
1605 tcm->tcm_parent = cl->tparent->classid;
1606 else
1607 tcm->tcm_parent = TC_H_ROOT;
1608 tcm->tcm_handle = cl->classid;
1609 tcm->tcm_info = cl->q->handle;
1610
1611 rta = (struct rtattr*)b;
1612 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1613 if (cbq_dump_attr(skb, cl) < 0)
1614 goto rtattr_failure;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001615 rta->rta_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616 return skb->len;
1617
1618rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001619 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 return -1;
1621}
1622
1623static int
1624cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1625 struct gnet_dump *d)
1626{
1627 struct cbq_sched_data *q = qdisc_priv(sch);
1628 struct cbq_class *cl = (struct cbq_class*)arg;
1629
1630 cl->qstats.qlen = cl->q->q.qlen;
1631 cl->xstats.avgidle = cl->avgidle;
1632 cl->xstats.undertime = 0;
1633
Patrick McHardya0849802007-03-23 11:28:30 -07001634 if (cl->undertime != PSCHED_PASTPERFECT)
Patrick McHardy8edc0c32007-03-23 11:28:55 -07001635 cl->xstats.undertime = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636
1637 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001639 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1640 return -1;
1641
1642 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1643}
1644
1645static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1646 struct Qdisc **old)
1647{
1648 struct cbq_class *cl = (struct cbq_class*)arg;
1649
1650 if (cl) {
1651 if (new == NULL) {
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001652 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1653 cl->classid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654 return -ENOBUFS;
1655 } else {
1656#ifdef CONFIG_NET_CLS_POLICE
1657 if (cl->police == TC_POLICE_RECLASSIFY)
1658 new->reshape_fail = cbq_reshape_fail;
1659#endif
1660 }
1661 sch_tree_lock(sch);
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001662 *old = xchg(&cl->q, new);
Patrick McHardy5e50da02006-11-29 17:36:20 -08001663 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664 qdisc_reset(*old);
1665 sch_tree_unlock(sch);
1666
1667 return 0;
1668 }
1669 return -ENOENT;
1670}
1671
1672static struct Qdisc *
1673cbq_leaf(struct Qdisc *sch, unsigned long arg)
1674{
1675 struct cbq_class *cl = (struct cbq_class*)arg;
1676
1677 return cl ? cl->q : NULL;
1678}
1679
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001680static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1681{
1682 struct cbq_class *cl = (struct cbq_class *)arg;
1683
1684 if (cl->q->q.qlen == 0)
1685 cbq_deactivate_class(cl);
1686}
1687
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1689{
1690 struct cbq_sched_data *q = qdisc_priv(sch);
1691 struct cbq_class *cl = cbq_class_lookup(q, classid);
1692
1693 if (cl) {
1694 cl->refcnt++;
1695 return (unsigned long)cl;
1696 }
1697 return 0;
1698}
1699
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1701{
1702 struct cbq_sched_data *q = qdisc_priv(sch);
1703
1704 BUG_TRAP(!cl->filters);
1705
Patrick McHardya48b5a62007-03-23 11:29:43 -07001706 tcf_destroy_chain(cl->filter_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707 qdisc_destroy(cl->q);
1708 qdisc_put_rtab(cl->R_tab);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709 gen_kill_estimator(&cl->bstats, &cl->rate_est);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 if (cl != &q->link)
1711 kfree(cl);
1712}
1713
1714static void
1715cbq_destroy(struct Qdisc* sch)
1716{
1717 struct cbq_sched_data *q = qdisc_priv(sch);
1718 struct cbq_class *cl;
1719 unsigned h;
1720
1721#ifdef CONFIG_NET_CLS_POLICE
1722 q->rx_class = NULL;
1723#endif
1724 /*
1725 * Filters must be destroyed first because we don't destroy the
1726 * classes from root to leafs which means that filters can still
1727 * be bound to classes which have been destroyed already. --TGR '04
1728 */
Patrick McHardyb00b4bf2007-06-05 16:06:59 -07001729 for (h = 0; h < 16; h++) {
1730 for (cl = q->classes[h]; cl; cl = cl->next) {
Patrick McHardya48b5a62007-03-23 11:29:43 -07001731 tcf_destroy_chain(cl->filter_list);
Patrick McHardyb00b4bf2007-06-05 16:06:59 -07001732 cl->filter_list = NULL;
1733 }
1734 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001735 for (h = 0; h < 16; h++) {
1736 struct cbq_class *next;
1737
1738 for (cl = q->classes[h]; cl; cl = next) {
1739 next = cl->next;
1740 cbq_destroy_class(sch, cl);
1741 }
1742 }
1743}
1744
1745static void cbq_put(struct Qdisc *sch, unsigned long arg)
1746{
1747 struct cbq_class *cl = (struct cbq_class*)arg;
1748
1749 if (--cl->refcnt == 0) {
1750#ifdef CONFIG_NET_CLS_POLICE
1751 struct cbq_sched_data *q = qdisc_priv(sch);
1752
1753 spin_lock_bh(&sch->dev->queue_lock);
1754 if (q->rx_class == cl)
1755 q->rx_class = NULL;
1756 spin_unlock_bh(&sch->dev->queue_lock);
1757#endif
1758
1759 cbq_destroy_class(sch, cl);
1760 }
1761}
1762
1763static int
1764cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1765 unsigned long *arg)
1766{
1767 int err;
1768 struct cbq_sched_data *q = qdisc_priv(sch);
1769 struct cbq_class *cl = (struct cbq_class*)*arg;
1770 struct rtattr *opt = tca[TCA_OPTIONS-1];
1771 struct rtattr *tb[TCA_CBQ_MAX];
1772 struct cbq_class *parent;
1773 struct qdisc_rate_table *rtab = NULL;
1774
1775 if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1776 return -EINVAL;
1777
1778 if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1779 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1780 return -EINVAL;
1781
1782 if (tb[TCA_CBQ_FOPT-1] &&
1783 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1784 return -EINVAL;
1785
1786 if (tb[TCA_CBQ_RATE-1] &&
1787 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1788 return -EINVAL;
1789
1790 if (tb[TCA_CBQ_LSSOPT-1] &&
1791 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1792 return -EINVAL;
1793
1794 if (tb[TCA_CBQ_WRROPT-1] &&
1795 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1796 return -EINVAL;
1797
1798#ifdef CONFIG_NET_CLS_POLICE
1799 if (tb[TCA_CBQ_POLICE-1] &&
1800 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1801 return -EINVAL;
1802#endif
1803
1804 if (cl) {
1805 /* Check parent */
1806 if (parentid) {
1807 if (cl->tparent && cl->tparent->classid != parentid)
1808 return -EINVAL;
1809 if (!cl->tparent && parentid != TC_H_ROOT)
1810 return -EINVAL;
1811 }
1812
1813 if (tb[TCA_CBQ_RATE-1]) {
1814 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1815 if (rtab == NULL)
1816 return -EINVAL;
1817 }
1818
1819 /* Change class parameters */
1820 sch_tree_lock(sch);
1821
1822 if (cl->next_alive != NULL)
1823 cbq_deactivate_class(cl);
1824
1825 if (rtab) {
1826 rtab = xchg(&cl->R_tab, rtab);
1827 qdisc_put_rtab(rtab);
1828 }
1829
1830 if (tb[TCA_CBQ_LSSOPT-1])
1831 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1832
1833 if (tb[TCA_CBQ_WRROPT-1]) {
1834 cbq_rmprio(q, cl);
1835 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1836 }
1837
1838 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1839 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1840
1841#ifdef CONFIG_NET_CLS_POLICE
1842 if (tb[TCA_CBQ_POLICE-1])
1843 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1844#endif
1845
1846 if (tb[TCA_CBQ_FOPT-1])
1847 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1848
1849 if (cl->q->q.qlen)
1850 cbq_activate_class(cl);
1851
1852 sch_tree_unlock(sch);
1853
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854 if (tca[TCA_RATE-1])
1855 gen_replace_estimator(&cl->bstats, &cl->rate_est,
Patrick McHardy4bdf3992007-07-02 22:47:37 -07001856 &sch->dev->queue_lock,
1857 tca[TCA_RATE-1]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001858 return 0;
1859 }
1860
1861 if (parentid == TC_H_ROOT)
1862 return -EINVAL;
1863
1864 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1865 tb[TCA_CBQ_LSSOPT-1] == NULL)
1866 return -EINVAL;
1867
1868 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1869 if (rtab == NULL)
1870 return -EINVAL;
1871
1872 if (classid) {
1873 err = -EINVAL;
1874 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1875 goto failure;
1876 } else {
1877 int i;
1878 classid = TC_H_MAKE(sch->handle,0x8000);
1879
1880 for (i=0; i<0x8000; i++) {
1881 if (++q->hgenerator >= 0x8000)
1882 q->hgenerator = 1;
1883 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1884 break;
1885 }
1886 err = -ENOSR;
1887 if (i >= 0x8000)
1888 goto failure;
1889 classid = classid|q->hgenerator;
1890 }
1891
1892 parent = &q->link;
1893 if (parentid) {
1894 parent = cbq_class_lookup(q, parentid);
1895 err = -EINVAL;
1896 if (parent == NULL)
1897 goto failure;
1898 }
1899
1900 err = -ENOBUFS;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001901 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902 if (cl == NULL)
1903 goto failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904 cl->R_tab = rtab;
1905 rtab = NULL;
1906 cl->refcnt = 1;
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001907 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908 cl->q = &noop_qdisc;
1909 cl->classid = classid;
1910 cl->tparent = parent;
1911 cl->qdisc = sch;
1912 cl->allot = parent->allot;
1913 cl->quantum = cl->allot;
1914 cl->weight = cl->R_tab->rate.rate;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001915
1916 sch_tree_lock(sch);
1917 cbq_link_class(cl);
1918 cl->borrow = cl->tparent;
1919 if (cl->tparent != &q->link)
1920 cl->share = cl->tparent;
1921 cbq_adjust_levels(parent);
1922 cl->minidle = -0x7FFFFFFF;
1923 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1924 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1925 if (cl->ewma_log==0)
1926 cl->ewma_log = q->link.ewma_log;
1927 if (cl->maxidle==0)
1928 cl->maxidle = q->link.maxidle;
1929 if (cl->avpkt==0)
1930 cl->avpkt = q->link.avpkt;
1931 cl->overlimit = cbq_ovl_classic;
1932 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1933 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1934#ifdef CONFIG_NET_CLS_POLICE
1935 if (tb[TCA_CBQ_POLICE-1])
1936 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1937#endif
1938 if (tb[TCA_CBQ_FOPT-1])
1939 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1940 sch_tree_unlock(sch);
1941
Linus Torvalds1da177e2005-04-16 15:20:36 -07001942 if (tca[TCA_RATE-1])
1943 gen_new_estimator(&cl->bstats, &cl->rate_est,
Patrick McHardy4bdf3992007-07-02 22:47:37 -07001944 &sch->dev->queue_lock, tca[TCA_RATE-1]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945
1946 *arg = (unsigned long)cl;
1947 return 0;
1948
1949failure:
1950 qdisc_put_rtab(rtab);
1951 return err;
1952}
1953
1954static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1955{
1956 struct cbq_sched_data *q = qdisc_priv(sch);
1957 struct cbq_class *cl = (struct cbq_class*)arg;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001958 unsigned int qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001959
1960 if (cl->filters || cl->children || cl == &q->link)
1961 return -EBUSY;
1962
1963 sch_tree_lock(sch);
1964
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001965 qlen = cl->q->q.qlen;
1966 qdisc_reset(cl->q);
1967 qdisc_tree_decrease_qlen(cl->q, qlen);
1968
Linus Torvalds1da177e2005-04-16 15:20:36 -07001969 if (cl->next_alive)
1970 cbq_deactivate_class(cl);
1971
1972 if (q->tx_borrowed == cl)
1973 q->tx_borrowed = q->tx_class;
1974 if (q->tx_class == cl) {
1975 q->tx_class = NULL;
1976 q->tx_borrowed = NULL;
1977 }
1978#ifdef CONFIG_NET_CLS_POLICE
1979 if (q->rx_class == cl)
1980 q->rx_class = NULL;
1981#endif
1982
1983 cbq_unlink_class(cl);
1984 cbq_adjust_levels(cl->tparent);
1985 cl->defmap = 0;
1986 cbq_sync_defmap(cl);
1987
1988 cbq_rmprio(q, cl);
1989 sch_tree_unlock(sch);
1990
1991 if (--cl->refcnt == 0)
1992 cbq_destroy_class(sch, cl);
1993
1994 return 0;
1995}
1996
1997static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1998{
1999 struct cbq_sched_data *q = qdisc_priv(sch);
2000 struct cbq_class *cl = (struct cbq_class *)arg;
2001
2002 if (cl == NULL)
2003 cl = &q->link;
2004
2005 return &cl->filter_list;
2006}
2007
2008static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2009 u32 classid)
2010{
2011 struct cbq_sched_data *q = qdisc_priv(sch);
2012 struct cbq_class *p = (struct cbq_class*)parent;
2013 struct cbq_class *cl = cbq_class_lookup(q, classid);
2014
2015 if (cl) {
2016 if (p && p->level <= cl->level)
2017 return 0;
2018 cl->filters++;
2019 return (unsigned long)cl;
2020 }
2021 return 0;
2022}
2023
2024static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2025{
2026 struct cbq_class *cl = (struct cbq_class*)arg;
2027
2028 cl->filters--;
2029}
2030
2031static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2032{
2033 struct cbq_sched_data *q = qdisc_priv(sch);
2034 unsigned h;
2035
2036 if (arg->stop)
2037 return;
2038
2039 for (h = 0; h < 16; h++) {
2040 struct cbq_class *cl;
2041
2042 for (cl = q->classes[h]; cl; cl = cl->next) {
2043 if (arg->count < arg->skip) {
2044 arg->count++;
2045 continue;
2046 }
2047 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2048 arg->stop = 1;
2049 return;
2050 }
2051 arg->count++;
2052 }
2053 }
2054}
2055
2056static struct Qdisc_class_ops cbq_class_ops = {
2057 .graft = cbq_graft,
2058 .leaf = cbq_leaf,
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08002059 .qlen_notify = cbq_qlen_notify,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002060 .get = cbq_get,
2061 .put = cbq_put,
2062 .change = cbq_change_class,
2063 .delete = cbq_delete,
2064 .walk = cbq_walk,
2065 .tcf_chain = cbq_find_tcf,
2066 .bind_tcf = cbq_bind_filter,
2067 .unbind_tcf = cbq_unbind_filter,
2068 .dump = cbq_dump_class,
2069 .dump_stats = cbq_dump_class_stats,
2070};
2071
2072static struct Qdisc_ops cbq_qdisc_ops = {
2073 .next = NULL,
2074 .cl_ops = &cbq_class_ops,
2075 .id = "cbq",
2076 .priv_size = sizeof(struct cbq_sched_data),
2077 .enqueue = cbq_enqueue,
2078 .dequeue = cbq_dequeue,
2079 .requeue = cbq_requeue,
2080 .drop = cbq_drop,
2081 .init = cbq_init,
2082 .reset = cbq_reset,
2083 .destroy = cbq_destroy,
2084 .change = NULL,
2085 .dump = cbq_dump,
2086 .dump_stats = cbq_dump_stats,
2087 .owner = THIS_MODULE,
2088};
2089
2090static int __init cbq_module_init(void)
2091{
2092 return register_qdisc(&cbq_qdisc_ops);
2093}
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09002094static void __exit cbq_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095{
2096 unregister_qdisc(&cbq_qdisc_ops);
2097}
2098module_init(cbq_module_init)
2099module_exit(cbq_module_exit)
2100MODULE_LICENSE("GPL");