blob: f30e3f7ad885b50238b3682875558e6a8f9c6358 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020031#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110032#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070033#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070034#include <net/pkt_sched.h>
35
Linus Torvalds1da177e2005-04-16 15:20:36 -070036static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
37 struct Qdisc *old, struct Qdisc *new);
38static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
39 struct Qdisc *q, unsigned long cl, int event);
40
41/*
42
43 Short review.
44 -------------
45
46 This file consists of two interrelated parts:
47
48 1. queueing disciplines manager frontend.
49 2. traffic classes manager frontend.
50
51 Generally, queueing discipline ("qdisc") is a black box,
52 which is able to enqueue packets and to dequeue them (when
53 device is ready to send something) in order and at times
54 determined by algorithm hidden in it.
55
56 qdisc's are divided to two categories:
57 - "queues", which have no internal structure visible from outside.
58 - "schedulers", which split all the packets to "traffic classes",
59 using "packet classifiers" (look at cls_api.c)
60
61 In turn, classes may have child qdiscs (as rule, queues)
62 attached to them etc. etc. etc.
63
64 The goal of the routines in this file is to translate
65 information supplied by user in the form of handles
66 to more intelligible for kernel form, to make some sanity
67 checks and part of work, which is common to all qdiscs
68 and to provide rtnetlink notifications.
69
70 All real intelligent work is done inside qdisc modules.
71
72
73
74 Every discipline has two major routines: enqueue and dequeue.
75
76 ---dequeue
77
78 dequeue usually returns a skb to send. It is allowed to return NULL,
79 but it does not mean that queue is empty, it just means that
80 discipline does not want to send anything this time.
81 Queue is really empty if q->q.qlen == 0.
82 For complicated disciplines with multiple queues q->q is not
83 real packet queue, but however q->q.qlen must be valid.
84
85 ---enqueue
86
87 enqueue returns 0, if packet was enqueued successfully.
88 If packet (this one or another one) was dropped, it returns
89 not zero error code.
90 NET_XMIT_DROP - this packet dropped
91 Expected action: do not backoff, but wait until queue will clear.
92 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
93 Expected action: backoff or ignore
94 NET_XMIT_POLICED - dropped by police.
95 Expected action: backoff or error to real-time apps.
96
97 Auxiliary routines:
98
99 ---requeue
100
101 requeues once dequeued packet. It is used for non-standard or
102 just buggy devices, which can defer output even if dev->tbusy=0.
103
104 ---reset
105
106 returns qdisc to initial state: purge all buffers, clear all
107 timers, counters (except for statistics) etc.
108
109 ---init
110
111 initializes newly created qdisc.
112
113 ---destroy
114
115 destroys resources allocated by init and during lifetime of qdisc.
116
117 ---change
118
119 changes qdisc parameters.
120 */
121
122/* Protects list of registered TC modules. It is pure SMP lock. */
123static DEFINE_RWLOCK(qdisc_mod_lock);
124
125
126/************************************************
127 * Queueing disciplines manipulation. *
128 ************************************************/
129
130
131/* The list of all installed queueing disciplines. */
132
133static struct Qdisc_ops *qdisc_base;
134
135/* Register/uregister queueing discipline */
136
137int register_qdisc(struct Qdisc_ops *qops)
138{
139 struct Qdisc_ops *q, **qp;
140 int rc = -EEXIST;
141
142 write_lock(&qdisc_mod_lock);
143 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
144 if (!strcmp(qops->id, q->id))
145 goto out;
146
147 if (qops->enqueue == NULL)
148 qops->enqueue = noop_qdisc_ops.enqueue;
149 if (qops->requeue == NULL)
150 qops->requeue = noop_qdisc_ops.requeue;
151 if (qops->dequeue == NULL)
152 qops->dequeue = noop_qdisc_ops.dequeue;
153
154 qops->next = NULL;
155 *qp = qops;
156 rc = 0;
157out:
158 write_unlock(&qdisc_mod_lock);
159 return rc;
160}
161
162int unregister_qdisc(struct Qdisc_ops *qops)
163{
164 struct Qdisc_ops *q, **qp;
165 int err = -ENOENT;
166
167 write_lock(&qdisc_mod_lock);
168 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
169 if (q == qops)
170 break;
171 if (q) {
172 *qp = q->next;
173 q->next = NULL;
174 err = 0;
175 }
176 write_unlock(&qdisc_mod_lock);
177 return err;
178}
179
180/* We know handle. Find qdisc among all qdisc's attached to device
181 (root qdisc, all its children, children of children etc.)
182 */
183
Patrick McHardy0463d4a2007-04-16 17:02:10 -0700184struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800185{
186 struct Qdisc *q;
187
188 list_for_each_entry(q, &dev->qdisc_list, list) {
189 if (q->handle == handle)
190 return q;
191 }
192 return NULL;
193}
194
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
196{
197 unsigned long cl;
198 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800199 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200
201 if (cops == NULL)
202 return NULL;
203 cl = cops->get(p, classid);
204
205 if (cl == 0)
206 return NULL;
207 leaf = cops->leaf(p, cl);
208 cops->put(p, cl);
209 return leaf;
210}
211
212/* Find queueing discipline by name */
213
214static struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
215{
216 struct Qdisc_ops *q = NULL;
217
218 if (kind) {
219 read_lock(&qdisc_mod_lock);
220 for (q = qdisc_base; q; q = q->next) {
221 if (rtattr_strcmp(kind, q->id) == 0) {
222 if (!try_module_get(q->owner))
223 q = NULL;
224 break;
225 }
226 }
227 read_unlock(&qdisc_mod_lock);
228 }
229 return q;
230}
231
232static struct qdisc_rate_table *qdisc_rtab_list;
233
234struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab)
235{
236 struct qdisc_rate_table *rtab;
237
238 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
239 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
240 rtab->refcnt++;
241 return rtab;
242 }
243 }
244
245 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024)
246 return NULL;
247
248 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
249 if (rtab) {
250 rtab->rate = *r;
251 rtab->refcnt = 1;
252 memcpy(rtab->data, RTA_DATA(tab), 1024);
253 rtab->next = qdisc_rtab_list;
254 qdisc_rtab_list = rtab;
255 }
256 return rtab;
257}
258
259void qdisc_put_rtab(struct qdisc_rate_table *tab)
260{
261 struct qdisc_rate_table *rtab, **rtabp;
262
263 if (!tab || --tab->refcnt)
264 return;
265
266 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
267 if (rtab == tab) {
268 *rtabp = rtab->next;
269 kfree(rtab);
270 return;
271 }
272 }
273}
274
Patrick McHardy41794772007-03-16 01:19:15 -0700275static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
276{
277 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
278 timer);
Stephen Hemminger19365022007-03-22 12:18:35 -0700279 struct net_device *dev = wd->qdisc->dev;
Patrick McHardy41794772007-03-16 01:19:15 -0700280
281 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
Stephen Hemminger11274e52007-03-22 12:17:42 -0700282 smp_wmb();
Patrick McHardy0621ed22007-07-14 20:49:26 -0700283 netif_schedule(dev);
Stephen Hemminger19365022007-03-22 12:18:35 -0700284
Patrick McHardy41794772007-03-16 01:19:15 -0700285 return HRTIMER_NORESTART;
286}
287
288void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
289{
290 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
291 wd->timer.function = qdisc_watchdog;
292 wd->qdisc = qdisc;
293}
294EXPORT_SYMBOL(qdisc_watchdog_init);
295
296void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
297{
298 ktime_t time;
299
300 wd->qdisc->flags |= TCQ_F_THROTTLED;
301 time = ktime_set(0, 0);
302 time = ktime_add_ns(time, PSCHED_US2NS(expires));
303 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
304}
305EXPORT_SYMBOL(qdisc_watchdog_schedule);
306
307void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
308{
309 hrtimer_cancel(&wd->timer);
310 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
311}
312EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313
314/* Allocate an unique handle from space managed by kernel */
315
316static u32 qdisc_alloc_handle(struct net_device *dev)
317{
318 int i = 0x10000;
319 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
320
321 do {
322 autohandle += TC_H_MAKE(0x10000U, 0);
323 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
324 autohandle = TC_H_MAKE(0x80000000U, 0);
325 } while (qdisc_lookup(dev, autohandle) && --i > 0);
326
327 return i>0 ? autohandle : 0;
328}
329
330/* Attach toplevel qdisc to device dev */
331
332static struct Qdisc *
333dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
334{
335 struct Qdisc *oqdisc;
336
337 if (dev->flags & IFF_UP)
338 dev_deactivate(dev);
339
340 qdisc_lock_tree(dev);
341 if (qdisc && qdisc->flags&TCQ_F_INGRESS) {
342 oqdisc = dev->qdisc_ingress;
343 /* Prune old scheduler */
344 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
345 /* delete */
346 qdisc_reset(oqdisc);
347 dev->qdisc_ingress = NULL;
348 } else { /* new */
349 dev->qdisc_ingress = qdisc;
350 }
351
352 } else {
353
354 oqdisc = dev->qdisc_sleeping;
355
356 /* Prune old scheduler */
357 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
358 qdisc_reset(oqdisc);
359
360 /* ... and graft new one */
361 if (qdisc == NULL)
362 qdisc = &noop_qdisc;
363 dev->qdisc_sleeping = qdisc;
364 dev->qdisc = &noop_qdisc;
365 }
366
367 qdisc_unlock_tree(dev);
368
369 if (dev->flags & IFF_UP)
370 dev_activate(dev);
371
372 return oqdisc;
373}
374
Patrick McHardy43effa12006-11-29 17:35:48 -0800375void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
376{
Eric Dumazet20fea082007-11-14 01:44:41 -0800377 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800378 unsigned long cl;
379 u32 parentid;
380
381 if (n == 0)
382 return;
383 while ((parentid = sch->parent)) {
Patrick McHardy0463d4a2007-04-16 17:02:10 -0700384 sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700385 if (sch == NULL) {
386 WARN_ON(parentid != TC_H_ROOT);
387 return;
388 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800389 cops = sch->ops->cl_ops;
390 if (cops->qlen_notify) {
391 cl = cops->get(sch, parentid);
392 cops->qlen_notify(sch, cl);
393 cops->put(sch, cl);
394 }
395 sch->q.qlen -= n;
396 }
397}
398EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399
400/* Graft qdisc "new" to class "classid" of qdisc "parent" or
401 to device "dev".
402
403 Old qdisc is not destroyed but returned in *old.
404 */
405
406static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
407 u32 classid,
408 struct Qdisc *new, struct Qdisc **old)
409{
410 int err = 0;
411 struct Qdisc *q = *old;
412
413
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900414 if (parent == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415 if (q && q->flags&TCQ_F_INGRESS) {
416 *old = dev_graft_qdisc(dev, q);
417 } else {
418 *old = dev_graft_qdisc(dev, new);
419 }
420 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -0800421 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422
423 err = -EINVAL;
424
425 if (cops) {
426 unsigned long cl = cops->get(parent, classid);
427 if (cl) {
428 err = cops->graft(parent, cl, new, old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 cops->put(parent, cl);
430 }
431 }
432 }
433 return err;
434}
435
436/*
437 Allocate and initialize new qdisc.
438
439 Parameters are passed via opt.
440 */
441
442static struct Qdisc *
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700443qdisc_create(struct net_device *dev, u32 parent, u32 handle,
444 struct rtattr **tca, int *errp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445{
446 int err;
447 struct rtattr *kind = tca[TCA_KIND-1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 struct Qdisc *sch;
449 struct Qdisc_ops *ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450
451 ops = qdisc_lookup_ops(kind);
452#ifdef CONFIG_KMOD
453 if (ops == NULL && kind != NULL) {
454 char name[IFNAMSIZ];
455 if (rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
456 /* We dropped the RTNL semaphore in order to
457 * perform the module load. So, even if we
458 * succeeded in loading the module we have to
459 * tell the caller to replay the request. We
460 * indicate this using -EAGAIN.
461 * We replay the request because the device may
462 * go away in the mean time.
463 */
464 rtnl_unlock();
465 request_module("sch_%s", name);
466 rtnl_lock();
467 ops = qdisc_lookup_ops(kind);
468 if (ops != NULL) {
469 /* We will try again qdisc_lookup_ops,
470 * so don't keep a reference.
471 */
472 module_put(ops->owner);
473 err = -EAGAIN;
474 goto err_out;
475 }
476 }
477 }
478#endif
479
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -0700480 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481 if (ops == NULL)
482 goto err_out;
483
Thomas Graf3d54b822005-07-05 14:15:09 -0700484 sch = qdisc_alloc(dev, ops);
485 if (IS_ERR(sch)) {
486 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -0700488 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700490 sch->parent = parent;
491
Thomas Graf3d54b822005-07-05 14:15:09 -0700492 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 sch->flags |= TCQ_F_INGRESS;
Patrick McHardyfd44de72007-04-16 17:07:08 -0700494 sch->stats_lock = &dev->ingress_lock;
Thomas Graf3d54b822005-07-05 14:15:09 -0700495 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700496 } else {
497 sch->stats_lock = &dev->queue_lock;
498 if (handle == 0) {
499 handle = qdisc_alloc_handle(dev);
500 err = -ENOMEM;
501 if (handle == 0)
502 goto err_out3;
503 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 }
505
Thomas Graf3d54b822005-07-05 14:15:09 -0700506 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507
508 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
Thomas Graf023e09a2005-07-05 14:15:53 -0700509 if (tca[TCA_RATE-1]) {
510 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
511 sch->stats_lock,
512 tca[TCA_RATE-1]);
513 if (err) {
514 /*
515 * Any broken qdiscs that would require
516 * a ops->reset() here? The qdisc was never
517 * in action so it shouldn't be necessary.
518 */
519 if (ops->destroy)
520 ops->destroy(sch);
521 goto err_out3;
522 }
523 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524 qdisc_lock_tree(dev);
525 list_add_tail(&sch->list, &dev->qdisc_list);
526 qdisc_unlock_tree(dev);
527
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528 return sch;
529 }
530err_out3:
531 dev_put(dev);
Thomas Graf3d54b822005-07-05 14:15:09 -0700532 kfree((char *) sch - sch->padded);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533err_out2:
534 module_put(ops->owner);
535err_out:
536 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 return NULL;
538}
539
540static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
541{
542 if (tca[TCA_OPTIONS-1]) {
543 int err;
544
545 if (sch->ops->change == NULL)
546 return -EINVAL;
547 err = sch->ops->change(sch, tca[TCA_OPTIONS-1]);
548 if (err)
549 return err;
550 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551 if (tca[TCA_RATE-1])
552 gen_replace_estimator(&sch->bstats, &sch->rate_est,
553 sch->stats_lock, tca[TCA_RATE-1]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 return 0;
555}
556
557struct check_loop_arg
558{
559 struct qdisc_walker w;
560 struct Qdisc *p;
561 int depth;
562};
563
564static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
565
566static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
567{
568 struct check_loop_arg arg;
569
570 if (q->ops->cl_ops == NULL)
571 return 0;
572
573 arg.w.stop = arg.w.skip = arg.w.count = 0;
574 arg.w.fn = check_loop_fn;
575 arg.depth = depth;
576 arg.p = p;
577 q->ops->cl_ops->walk(q, &arg.w);
578 return arg.w.stop ? -ELOOP : 0;
579}
580
581static int
582check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
583{
584 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800585 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586 struct check_loop_arg *arg = (struct check_loop_arg *)w;
587
588 leaf = cops->leaf(q, cl);
589 if (leaf) {
590 if (leaf == arg->p || arg->depth > 7)
591 return -ELOOP;
592 return check_loop(leaf, arg->p, arg->depth + 1);
593 }
594 return 0;
595}
596
597/*
598 * Delete/get qdisc.
599 */
600
601static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
602{
Denis V. Lunevb8542722007-12-01 00:21:31 +1100603 struct net *net = skb->sk->sk_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604 struct tcmsg *tcm = NLMSG_DATA(n);
605 struct rtattr **tca = arg;
606 struct net_device *dev;
607 u32 clid = tcm->tcm_parent;
608 struct Qdisc *q = NULL;
609 struct Qdisc *p = NULL;
610 int err;
611
Denis V. Lunevb8542722007-12-01 00:21:31 +1100612 if (net != &init_net)
613 return -EINVAL;
614
Eric W. Biederman881d9662007-09-17 11:56:21 -0700615 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616 return -ENODEV;
617
618 if (clid) {
619 if (clid != TC_H_ROOT) {
620 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
621 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
622 return -ENOENT;
623 q = qdisc_leaf(p, clid);
624 } else { /* ingress */
625 q = dev->qdisc_ingress;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900626 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 } else {
628 q = dev->qdisc_sleeping;
629 }
630 if (!q)
631 return -ENOENT;
632
633 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
634 return -EINVAL;
635 } else {
636 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
637 return -ENOENT;
638 }
639
640 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
641 return -EINVAL;
642
643 if (n->nlmsg_type == RTM_DELQDISC) {
644 if (!clid)
645 return -EINVAL;
646 if (q->handle == 0)
647 return -ENOENT;
648 if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
649 return err;
650 if (q) {
651 qdisc_notify(skb, n, clid, q, NULL);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700652 qdisc_lock_tree(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653 qdisc_destroy(q);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700654 qdisc_unlock_tree(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655 }
656 } else {
657 qdisc_notify(skb, n, clid, NULL, q);
658 }
659 return 0;
660}
661
662/*
663 Create/change qdisc.
664 */
665
666static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
667{
Denis V. Lunevb8542722007-12-01 00:21:31 +1100668 struct net *net = skb->sk->sk_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669 struct tcmsg *tcm;
670 struct rtattr **tca;
671 struct net_device *dev;
672 u32 clid;
673 struct Qdisc *q, *p;
674 int err;
675
Denis V. Lunevb8542722007-12-01 00:21:31 +1100676 if (net != &init_net)
677 return -EINVAL;
678
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679replay:
680 /* Reinit, just in case something touches this. */
681 tcm = NLMSG_DATA(n);
682 tca = arg;
683 clid = tcm->tcm_parent;
684 q = p = NULL;
685
Eric W. Biederman881d9662007-09-17 11:56:21 -0700686 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687 return -ENODEV;
688
689 if (clid) {
690 if (clid != TC_H_ROOT) {
691 if (clid != TC_H_INGRESS) {
692 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
693 return -ENOENT;
694 q = qdisc_leaf(p, clid);
695 } else { /*ingress */
696 q = dev->qdisc_ingress;
697 }
698 } else {
699 q = dev->qdisc_sleeping;
700 }
701
702 /* It may be default qdisc, ignore it */
703 if (q && q->handle == 0)
704 q = NULL;
705
706 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
707 if (tcm->tcm_handle) {
708 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
709 return -EEXIST;
710 if (TC_H_MIN(tcm->tcm_handle))
711 return -EINVAL;
712 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
713 goto create_n_graft;
714 if (n->nlmsg_flags&NLM_F_EXCL)
715 return -EEXIST;
716 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
717 return -EINVAL;
718 if (q == p ||
719 (p && check_loop(q, p, 0)))
720 return -ELOOP;
721 atomic_inc(&q->refcnt);
722 goto graft;
723 } else {
724 if (q == NULL)
725 goto create_n_graft;
726
727 /* This magic test requires explanation.
728 *
729 * We know, that some child q is already
730 * attached to this parent and have choice:
731 * either to change it or to create/graft new one.
732 *
733 * 1. We are allowed to create/graft only
734 * if CREATE and REPLACE flags are set.
735 *
736 * 2. If EXCL is set, requestor wanted to say,
737 * that qdisc tcm_handle is not expected
738 * to exist, so that we choose create/graft too.
739 *
740 * 3. The last case is when no flags are set.
741 * Alas, it is sort of hole in API, we
742 * cannot decide what to do unambiguously.
743 * For now we select create/graft, if
744 * user gave KIND, which does not match existing.
745 */
746 if ((n->nlmsg_flags&NLM_F_CREATE) &&
747 (n->nlmsg_flags&NLM_F_REPLACE) &&
748 ((n->nlmsg_flags&NLM_F_EXCL) ||
749 (tca[TCA_KIND-1] &&
750 rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))))
751 goto create_n_graft;
752 }
753 }
754 } else {
755 if (!tcm->tcm_handle)
756 return -EINVAL;
757 q = qdisc_lookup(dev, tcm->tcm_handle);
758 }
759
760 /* Change qdisc parameters */
761 if (q == NULL)
762 return -ENOENT;
763 if (n->nlmsg_flags&NLM_F_EXCL)
764 return -EEXIST;
765 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
766 return -EINVAL;
767 err = qdisc_change(q, tca);
768 if (err == 0)
769 qdisc_notify(skb, n, clid, NULL, q);
770 return err;
771
772create_n_graft:
773 if (!(n->nlmsg_flags&NLM_F_CREATE))
774 return -ENOENT;
775 if (clid == TC_H_INGRESS)
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700776 q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_parent,
777 tca, &err);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900778 else
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700779 q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_handle,
780 tca, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 if (q == NULL) {
782 if (err == -EAGAIN)
783 goto replay;
784 return err;
785 }
786
787graft:
788 if (1) {
789 struct Qdisc *old_q = NULL;
790 err = qdisc_graft(dev, p, clid, q, &old_q);
791 if (err) {
792 if (q) {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700793 qdisc_lock_tree(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794 qdisc_destroy(q);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700795 qdisc_unlock_tree(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796 }
797 return err;
798 }
799 qdisc_notify(skb, n, clid, old_q, q);
800 if (old_q) {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700801 qdisc_lock_tree(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 qdisc_destroy(old_q);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700803 qdisc_unlock_tree(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 }
805 }
806 return 0;
807}
808
809static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -0700810 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811{
812 struct tcmsg *tcm;
813 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700814 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 struct gnet_dump d;
816
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -0700817 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818 tcm = NLMSG_DATA(nlh);
819 tcm->tcm_family = AF_UNSPEC;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700820 tcm->tcm__pad1 = 0;
821 tcm->tcm__pad2 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822 tcm->tcm_ifindex = q->dev->ifindex;
823 tcm->tcm_parent = clid;
824 tcm->tcm_handle = q->handle;
825 tcm->tcm_info = atomic_read(&q->refcnt);
826 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
827 if (q->ops->dump && q->ops->dump(q, skb) < 0)
828 goto rtattr_failure;
829 q->qstats.qlen = q->q.qlen;
830
831 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
832 TCA_XSTATS, q->stats_lock, &d) < 0)
833 goto rtattr_failure;
834
835 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
836 goto rtattr_failure;
837
838 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840 gnet_stats_copy_queue(&d, &q->qstats) < 0)
841 goto rtattr_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900842
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 if (gnet_stats_finish_copy(&d) < 0)
844 goto rtattr_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900845
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700846 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 return skb->len;
848
849nlmsg_failure:
850rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -0700851 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852 return -1;
853}
854
855static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
856 u32 clid, struct Qdisc *old, struct Qdisc *new)
857{
858 struct sk_buff *skb;
859 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
860
861 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
862 if (!skb)
863 return -ENOBUFS;
864
865 if (old && old->handle) {
866 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
867 goto err_out;
868 }
869 if (new) {
870 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
871 goto err_out;
872 }
873
874 if (skb->len)
Patrick McHardyac6d4392005-08-14 19:29:52 -0700875 return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876
877err_out:
878 kfree_skb(skb);
879 return -EINVAL;
880}
881
882static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
883{
Denis V. Lunevb8542722007-12-01 00:21:31 +1100884 struct net *net = skb->sk->sk_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885 int idx, q_idx;
886 int s_idx, s_q_idx;
887 struct net_device *dev;
888 struct Qdisc *q;
889
Denis V. Lunevb8542722007-12-01 00:21:31 +1100890 if (net != &init_net)
891 return 0;
892
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 s_idx = cb->args[0];
894 s_q_idx = q_idx = cb->args[1];
895 read_lock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700896 idx = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700897 for_each_netdev(&init_net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -0700899 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 if (idx > s_idx)
901 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 q_idx = 0;
903 list_for_each_entry(q, &dev->qdisc_list, list) {
904 if (q_idx < s_q_idx) {
905 q_idx++;
906 continue;
907 }
908 if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
Patrick McHardy0463d4a2007-04-16 17:02:10 -0700909 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 q_idx++;
912 }
Pavel Emelianov7562f872007-05-03 15:13:45 -0700913cont:
914 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 }
916
917done:
918 read_unlock(&dev_base_lock);
919
920 cb->args[0] = idx;
921 cb->args[1] = q_idx;
922
923 return skb->len;
924}
925
926
927
928/************************************************
929 * Traffic classes manipulation. *
930 ************************************************/
931
932
933
934static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
935{
Denis V. Lunevb8542722007-12-01 00:21:31 +1100936 struct net *net = skb->sk->sk_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937 struct tcmsg *tcm = NLMSG_DATA(n);
938 struct rtattr **tca = arg;
939 struct net_device *dev;
940 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -0800941 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942 unsigned long cl = 0;
943 unsigned long new_cl;
944 u32 pid = tcm->tcm_parent;
945 u32 clid = tcm->tcm_handle;
946 u32 qid = TC_H_MAJ(clid);
947 int err;
948
Denis V. Lunevb8542722007-12-01 00:21:31 +1100949 if (net != &init_net)
950 return -EINVAL;
951
Eric W. Biederman881d9662007-09-17 11:56:21 -0700952 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 return -ENODEV;
954
955 /*
956 parent == TC_H_UNSPEC - unspecified parent.
957 parent == TC_H_ROOT - class is root, which has no parent.
958 parent == X:0 - parent is root class.
959 parent == X:Y - parent is a node in hierarchy.
960 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
961
962 handle == 0:0 - generate handle from kernel pool.
963 handle == 0:Y - class is X:Y, where X:0 is qdisc.
964 handle == X:Y - clear.
965 handle == X:0 - root class.
966 */
967
968 /* Step 1. Determine qdisc handle X:0 */
969
970 if (pid != TC_H_ROOT) {
971 u32 qid1 = TC_H_MAJ(pid);
972
973 if (qid && qid1) {
974 /* If both majors are known, they must be identical. */
975 if (qid != qid1)
976 return -EINVAL;
977 } else if (qid1) {
978 qid = qid1;
979 } else if (qid == 0)
980 qid = dev->qdisc_sleeping->handle;
981
982 /* Now qid is genuine qdisc handle consistent
983 both with parent and child.
984
985 TC_H_MAJ(pid) still may be unspecified, complete it now.
986 */
987 if (pid)
988 pid = TC_H_MAKE(qid, pid);
989 } else {
990 if (qid == 0)
991 qid = dev->qdisc_sleeping->handle;
992 }
993
994 /* OK. Locate qdisc */
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900995 if ((q = qdisc_lookup(dev, qid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996 return -ENOENT;
997
998 /* An check that it supports classes */
999 cops = q->ops->cl_ops;
1000 if (cops == NULL)
1001 return -EINVAL;
1002
1003 /* Now try to get class */
1004 if (clid == 0) {
1005 if (pid == TC_H_ROOT)
1006 clid = qid;
1007 } else
1008 clid = TC_H_MAKE(qid, clid);
1009
1010 if (clid)
1011 cl = cops->get(q, clid);
1012
1013 if (cl == 0) {
1014 err = -ENOENT;
1015 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1016 goto out;
1017 } else {
1018 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001019 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020 err = -EEXIST;
1021 if (n->nlmsg_flags&NLM_F_EXCL)
1022 goto out;
1023 break;
1024 case RTM_DELTCLASS:
1025 err = cops->delete(q, cl);
1026 if (err == 0)
1027 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1028 goto out;
1029 case RTM_GETTCLASS:
1030 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1031 goto out;
1032 default:
1033 err = -EINVAL;
1034 goto out;
1035 }
1036 }
1037
1038 new_cl = cl;
1039 err = cops->change(q, clid, pid, tca, &new_cl);
1040 if (err == 0)
1041 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1042
1043out:
1044 if (cl)
1045 cops->put(q, cl);
1046
1047 return err;
1048}
1049
1050
1051static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1052 unsigned long cl,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001053 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054{
1055 struct tcmsg *tcm;
1056 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001057 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058 struct gnet_dump d;
Eric Dumazet20fea082007-11-14 01:44:41 -08001059 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001061 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 tcm = NLMSG_DATA(nlh);
1063 tcm->tcm_family = AF_UNSPEC;
1064 tcm->tcm_ifindex = q->dev->ifindex;
1065 tcm->tcm_parent = q->handle;
1066 tcm->tcm_handle = q->handle;
1067 tcm->tcm_info = 0;
1068 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
1069 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1070 goto rtattr_failure;
1071
1072 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
1073 TCA_XSTATS, q->stats_lock, &d) < 0)
1074 goto rtattr_failure;
1075
1076 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1077 goto rtattr_failure;
1078
1079 if (gnet_stats_finish_copy(&d) < 0)
1080 goto rtattr_failure;
1081
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001082 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083 return skb->len;
1084
1085nlmsg_failure:
1086rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001087 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088 return -1;
1089}
1090
1091static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1092 struct Qdisc *q, unsigned long cl, int event)
1093{
1094 struct sk_buff *skb;
1095 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1096
1097 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1098 if (!skb)
1099 return -ENOBUFS;
1100
1101 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1102 kfree_skb(skb);
1103 return -EINVAL;
1104 }
1105
Patrick McHardyac6d4392005-08-14 19:29:52 -07001106 return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107}
1108
1109struct qdisc_dump_args
1110{
1111 struct qdisc_walker w;
1112 struct sk_buff *skb;
1113 struct netlink_callback *cb;
1114};
1115
1116static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1117{
1118 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1119
1120 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1121 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1122}
1123
1124static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1125{
Denis V. Lunevb8542722007-12-01 00:21:31 +11001126 struct net *net = skb->sk->sk_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127 int t;
1128 int s_t;
1129 struct net_device *dev;
1130 struct Qdisc *q;
1131 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1132 struct qdisc_dump_args arg;
1133
Denis V. Lunevb8542722007-12-01 00:21:31 +11001134 if (net != &init_net)
1135 return 0;
1136
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1138 return 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001139 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 return 0;
1141
1142 s_t = cb->args[0];
1143 t = 0;
1144
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145 list_for_each_entry(q, &dev->qdisc_list, list) {
1146 if (t < s_t || !q->ops->cl_ops ||
1147 (tcm->tcm_parent &&
1148 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1149 t++;
1150 continue;
1151 }
1152 if (t > s_t)
1153 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1154 arg.w.fn = qdisc_class_dump;
1155 arg.skb = skb;
1156 arg.cb = cb;
1157 arg.w.stop = 0;
1158 arg.w.skip = cb->args[1];
1159 arg.w.count = 0;
1160 q->ops->cl_ops->walk(q, &arg.w);
1161 cb->args[1] = arg.w.count;
1162 if (arg.w.stop)
1163 break;
1164 t++;
1165 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166
1167 cb->args[0] = t;
1168
1169 dev_put(dev);
1170 return skb->len;
1171}
1172
1173/* Main classifier routine: scans classifier chain attached
1174 to this qdisc, (optionally) tests for protocol and asks
1175 specific classifiers.
1176 */
Patrick McHardy73ca4912007-07-15 00:02:31 -07001177int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1178 struct tcf_result *res)
1179{
1180 __be16 protocol = skb->protocol;
1181 int err = 0;
1182
1183 for (; tp; tp = tp->next) {
1184 if ((tp->protocol == protocol ||
1185 tp->protocol == htons(ETH_P_ALL)) &&
1186 (err = tp->classify(skb, tp, res)) >= 0) {
1187#ifdef CONFIG_NET_CLS_ACT
1188 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1189 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1190#endif
1191 return err;
1192 }
1193 }
1194 return -1;
1195}
1196EXPORT_SYMBOL(tc_classify_compat);
1197
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
Patrick McHardy73ca4912007-07-15 00:02:31 -07001199 struct tcf_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200{
1201 int err = 0;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001202 __be16 protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203#ifdef CONFIG_NET_CLS_ACT
1204 struct tcf_proto *otp = tp;
1205reclassify:
1206#endif
1207 protocol = skb->protocol;
1208
Patrick McHardy73ca4912007-07-15 00:02:31 -07001209 err = tc_classify_compat(skb, tp, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy73ca4912007-07-15 00:02:31 -07001211 if (err == TC_ACT_RECLASSIFY) {
1212 u32 verd = G_TC_VERD(skb->tc_verd);
1213 tp = otp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214
Patrick McHardy73ca4912007-07-15 00:02:31 -07001215 if (verd++ >= MAX_REC_LOOP) {
1216 printk("rule prio %u protocol %02x reclassify loop, "
1217 "packet dropped\n",
1218 tp->prio&0xffff, ntohs(tp->protocol));
1219 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001221 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1222 goto reclassify;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001224#endif
1225 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226}
Patrick McHardy73ca4912007-07-15 00:02:31 -07001227EXPORT_SYMBOL(tc_classify);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228
Patrick McHardya48b5a62007-03-23 11:29:43 -07001229void tcf_destroy(struct tcf_proto *tp)
1230{
1231 tp->ops->destroy(tp);
1232 module_put(tp->ops->owner);
1233 kfree(tp);
1234}
1235
1236void tcf_destroy_chain(struct tcf_proto *fl)
1237{
1238 struct tcf_proto *tp;
1239
1240 while ((tp = fl) != NULL) {
1241 fl = tp->next;
1242 tcf_destroy(tp);
1243 }
1244}
1245EXPORT_SYMBOL(tcf_destroy_chain);
1246
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247#ifdef CONFIG_PROC_FS
1248static int psched_show(struct seq_file *seq, void *v)
1249{
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001250 struct timespec ts;
1251
1252 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253 seq_printf(seq, "%08x %08x %08x %08x\n",
Patrick McHardy641b9e02007-03-16 01:18:42 -07001254 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07001255 1000000,
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001256 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257
1258 return 0;
1259}
1260
1261static int psched_open(struct inode *inode, struct file *file)
1262{
1263 return single_open(file, psched_show, PDE(inode)->data);
1264}
1265
Arjan van de Venda7071d2007-02-12 00:55:36 -08001266static const struct file_operations psched_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267 .owner = THIS_MODULE,
1268 .open = psched_open,
1269 .read = seq_read,
1270 .llseek = seq_lseek,
1271 .release = single_release,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001272};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273#endif
1274
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275static int __init pktsched_init(void)
1276{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277 register_qdisc(&pfifo_qdisc_ops);
1278 register_qdisc(&bfifo_qdisc_ops);
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02001279 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280
Thomas Grafbe577dd2007-03-22 11:55:50 -07001281 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1282 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1283 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1284 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1285 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1286 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1287
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288 return 0;
1289}
1290
1291subsys_initcall(pktsched_init);
1292
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293EXPORT_SYMBOL(qdisc_get_rtab);
1294EXPORT_SYMBOL(qdisc_put_rtab);
1295EXPORT_SYMBOL(register_qdisc);
1296EXPORT_SYMBOL(unregister_qdisc);