blob: bf03e7fa184953b6112554f829bb691ceb9643df [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU General Public License
5 * as published by the Free Software Foundation; either version
6 * 2 of the License, or (at your option) any later version.
7 *
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 */
10
11#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070012#include <linux/types.h>
13#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include <linux/errno.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020016#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/netdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/skbuff.h>
20#include <linux/moduleparam.h>
Patrick McHardy0ba48052007-07-02 22:49:07 -070021#include <net/dst.h>
22#include <net/neighbour.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <net/pkt_sched.h>
24
25/*
26 How to setup it.
27 ----------------
28
29 After loading this module you will find a new device teqlN
30 and new qdisc with the same name. To join a slave to the equalizer
31 you should just set this qdisc on a device f.e.
32
33 # tc qdisc add dev eth0 root teql0
34 # tc qdisc add dev eth1 root teql0
35
36 That's all. Full PnP 8)
37
38 Applicability.
39 --------------
40
41 1. Slave devices MUST be active devices, i.e., they must raise the tbusy
42 signal and generate EOI events. If you want to equalize virtual devices
43 like tunnels, use a normal eql device.
44 2. This device puts no limitations on physical slave characteristics
45 f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
46 Certainly, large difference in link speeds will make the resulting
47 eqalized link unusable, because of huge packet reordering.
48 I estimate an upper useful difference as ~10 times.
49 3. If the slave requires address resolution, only protocols using
50 neighbour cache (IPv4/IPv6) will work over the equalized link.
51 Other protocols are still allowed to use the slave device directly,
52 which will not break load balancing, though native slave
53 traffic will have the highest priority. */
54
55struct teql_master
56{
57 struct Qdisc_ops qops;
58 struct net_device *dev;
59 struct Qdisc *slaves;
60 struct list_head master_list;
61 struct net_device_stats stats;
62};
63
64struct teql_sched_data
65{
66 struct Qdisc *next;
67 struct teql_master *m;
68 struct neighbour *ncache;
69 struct sk_buff_head q;
70};
71
72#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
73
Roel Kluincc8fd142008-01-31 17:08:47 -080074#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
76/* "teql*" qdisc routines */
77
78static int
79teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
80{
David S. Miller5ce2d482008-07-08 17:06:30 -070081 struct net_device *dev = qdisc_dev(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -070082 struct teql_sched_data *q = qdisc_priv(sch);
83
Krishna Kumar4cd8c9e2007-05-08 18:57:50 -070084 if (q->q.qlen < dev->tx_queue_len) {
85 __skb_queue_tail(&q->q, skb);
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -070086 sch->bstats.bytes += qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -070087 sch->bstats.packets++;
88 return 0;
89 }
90
Linus Torvalds1da177e2005-04-16 15:20:36 -070091 kfree_skb(skb);
92 sch->qstats.drops++;
93 return NET_XMIT_DROP;
94}
95
96static int
97teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
98{
99 struct teql_sched_data *q = qdisc_priv(sch);
100
101 __skb_queue_head(&q->q, skb);
102 sch->qstats.requeues++;
103 return 0;
104}
105
106static struct sk_buff *
107teql_dequeue(struct Qdisc* sch)
108{
109 struct teql_sched_data *dat = qdisc_priv(sch);
David S. Millerb0e1e642008-07-08 17:42:10 -0700110 struct netdev_queue *dat_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111 struct sk_buff *skb;
112
113 skb = __skb_dequeue(&dat->q);
David S. Millere8a04642008-07-17 00:34:19 -0700114 dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 if (skb == NULL) {
David S. Millerb0e1e642008-07-08 17:42:10 -0700116 struct net_device *m = qdisc_dev(dat_queue->qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117 if (m) {
118 dat->m->slaves = sch;
119 netif_wake_queue(m);
120 }
121 }
David S. Millerb0e1e642008-07-08 17:42:10 -0700122 sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123 return skb;
124}
125
Jarek Poplawski8e3af9782008-10-31 00:45:55 -0700126static struct sk_buff *
127teql_peek(struct Qdisc* sch)
128{
129 /* teql is meant to be used as root qdisc */
130 return NULL;
131}
132
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133static __inline__ void
134teql_neigh_release(struct neighbour *n)
135{
136 if (n)
137 neigh_release(n);
138}
139
140static void
141teql_reset(struct Qdisc* sch)
142{
143 struct teql_sched_data *dat = qdisc_priv(sch);
144
145 skb_queue_purge(&dat->q);
146 sch->q.qlen = 0;
147 teql_neigh_release(xchg(&dat->ncache, NULL));
148}
149
150static void
151teql_destroy(struct Qdisc* sch)
152{
153 struct Qdisc *q, *prev;
154 struct teql_sched_data *dat = qdisc_priv(sch);
155 struct teql_master *master = dat->m;
156
157 if ((prev = master->slaves) != NULL) {
158 do {
159 q = NEXT_SLAVE(prev);
160 if (q == sch) {
161 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
162 if (q == master->slaves) {
163 master->slaves = NEXT_SLAVE(q);
164 if (q == master->slaves) {
David S. Millere8a04642008-07-17 00:34:19 -0700165 struct netdev_queue *txq;
David S. Miller83874002008-07-17 00:53:03 -0700166 spinlock_t *root_lock;
David S. Millere8a04642008-07-17 00:34:19 -0700167
168 txq = netdev_get_tx_queue(master->dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 master->slaves = NULL;
David S. Miller83874002008-07-17 00:53:03 -0700170
Jarek Poplawski102396a2008-08-29 14:21:52 -0700171 root_lock = qdisc_root_sleeping_lock(txq->qdisc);
David S. Miller83874002008-07-17 00:53:03 -0700172 spin_lock_bh(root_lock);
David S. Millere8a04642008-07-17 00:34:19 -0700173 qdisc_reset(txq->qdisc);
David S. Miller83874002008-07-17 00:53:03 -0700174 spin_unlock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 }
176 }
177 skb_queue_purge(&dat->q);
178 teql_neigh_release(xchg(&dat->ncache, NULL));
179 break;
180 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900181
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 } while ((prev = q) != master->slaves);
183 }
184}
185
Patrick McHardy1e904742008-01-22 22:11:17 -0800186static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187{
David S. Miller5ce2d482008-07-08 17:06:30 -0700188 struct net_device *dev = qdisc_dev(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 struct teql_master *m = (struct teql_master*)sch->ops;
190 struct teql_sched_data *q = qdisc_priv(sch);
191
192 if (dev->hard_header_len > m->dev->hard_header_len)
193 return -EINVAL;
194
195 if (m->dev == dev)
196 return -ELOOP;
197
198 q->m = m;
199
200 skb_queue_head_init(&q->q);
201
202 if (m->slaves) {
203 if (m->dev->flags & IFF_UP) {
204 if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
205 || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
206 || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
207 || dev->mtu < m->dev->mtu)
208 return -EINVAL;
209 } else {
210 if (!(dev->flags&IFF_POINTOPOINT))
211 m->dev->flags &= ~IFF_POINTOPOINT;
212 if (!(dev->flags&IFF_BROADCAST))
213 m->dev->flags &= ~IFF_BROADCAST;
214 if (!(dev->flags&IFF_MULTICAST))
215 m->dev->flags &= ~IFF_MULTICAST;
216 if (dev->mtu < m->dev->mtu)
217 m->dev->mtu = dev->mtu;
218 }
219 q->next = NEXT_SLAVE(m->slaves);
220 NEXT_SLAVE(m->slaves) = sch;
221 } else {
222 q->next = sch;
223 m->slaves = sch;
224 m->dev->mtu = dev->mtu;
225 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
226 }
227 return 0;
228}
229
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230
231static int
232__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
233{
David S. Millere8a04642008-07-17 00:34:19 -0700234 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
235 struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 struct neighbour *mn = skb->dst->neighbour;
237 struct neighbour *n = q->ncache;
238
239 if (mn->tbl == NULL)
240 return -EINVAL;
241 if (n && n->tbl == mn->tbl &&
242 memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
243 atomic_inc(&n->refcnt);
244 } else {
245 n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
246 if (IS_ERR(n))
247 return PTR_ERR(n);
248 }
249 if (neigh_event_send(n, skb_res) == 0) {
250 int err;
Stephen Hemminger0c4e8582007-10-09 01:36:32 -0700251
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 read_lock(&n->lock);
Stephen Hemminger0c4e8582007-10-09 01:36:32 -0700253 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
254 n->ha, NULL, skb->len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 read_unlock(&n->lock);
Stephen Hemminger0c4e8582007-10-09 01:36:32 -0700256
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 if (err < 0) {
258 neigh_release(n);
259 return -EINVAL;
260 }
261 teql_neigh_release(xchg(&q->ncache, n));
262 return 0;
263 }
264 neigh_release(n);
265 return (skb_res == NULL) ? -EAGAIN : 1;
266}
267
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700268static inline int teql_resolve(struct sk_buff *skb,
269 struct sk_buff *skb_res, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270{
David S. Millere8a04642008-07-17 00:34:19 -0700271 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
272 if (txq->qdisc == &noop_qdisc)
Evgeniy Polyakov4f9f8312007-11-06 03:08:09 -0800273 return -ENODEV;
274
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700275 if (dev->header_ops == NULL ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276 skb->dst == NULL ||
277 skb->dst->neighbour == NULL)
278 return 0;
279 return __teql_resolve(skb, skb_res, dev);
280}
281
282static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
283{
Patrick McHardy2941a482006-01-08 22:05:26 -0800284 struct teql_master *master = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 struct Qdisc *start, *q;
286 int busy;
287 int nores;
Pavel Emelyanov4e3ab472007-10-21 17:01:29 -0700288 int subq = skb_get_queue_mapping(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 struct sk_buff *skb_res = NULL;
290
291 start = master->slaves;
292
293restart:
294 nores = 0;
295 busy = 0;
296
297 if ((q = start) == NULL)
298 goto drop;
299
300 do {
David S. Miller5ce2d482008-07-08 17:06:30 -0700301 struct net_device *slave = qdisc_dev(q);
David S. Millere8a04642008-07-17 00:34:19 -0700302 struct netdev_queue *slave_txq;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900303
David S. Millere8a04642008-07-17 00:34:19 -0700304 slave_txq = netdev_get_tx_queue(slave, 0);
305 if (slave_txq->qdisc_sleeping != q)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 continue;
David S. Millerfd2ea0a2008-07-17 01:56:23 -0700307 if (__netif_subqueue_stopped(slave, subq) ||
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -0700308 !netif_running(slave)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309 busy = 1;
310 continue;
311 }
312
313 switch (teql_resolve(skb, skb_res, slave)) {
314 case 0:
David S. Millerc3f26a22008-07-31 16:58:50 -0700315 if (__netif_tx_trylock(slave_txq)) {
316 if (!netif_tx_queue_stopped(slave_txq) &&
317 !netif_tx_queue_frozen(slave_txq) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318 slave->hard_start_xmit(skb, slave) == 0) {
David S. Millerc3f26a22008-07-31 16:58:50 -0700319 __netif_tx_unlock(slave_txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320 master->slaves = NEXT_SLAVE(q);
321 netif_wake_queue(dev);
322 master->stats.tx_packets++;
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700323 master->stats.tx_bytes +=
324 qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 return 0;
326 }
David S. Millerc3f26a22008-07-31 16:58:50 -0700327 __netif_tx_unlock(slave_txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328 }
329 if (netif_queue_stopped(dev))
330 busy = 1;
331 break;
332 case 1:
333 master->slaves = NEXT_SLAVE(q);
334 return 0;
335 default:
336 nores = 1;
337 break;
338 }
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300339 __skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340 } while ((q = NEXT_SLAVE(q)) != start);
341
342 if (nores && skb_res == NULL) {
343 skb_res = skb;
344 goto restart;
345 }
346
347 if (busy) {
348 netif_stop_queue(dev);
349 return 1;
350 }
351 master->stats.tx_errors++;
352
353drop:
354 master->stats.tx_dropped++;
355 dev_kfree_skb(skb);
356 return 0;
357}
358
359static int teql_master_open(struct net_device *dev)
360{
361 struct Qdisc * q;
Patrick McHardy2941a482006-01-08 22:05:26 -0800362 struct teql_master *m = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 int mtu = 0xFFFE;
364 unsigned flags = IFF_NOARP|IFF_MULTICAST;
365
366 if (m->slaves == NULL)
367 return -EUNATCH;
368
369 flags = FMASK;
370
371 q = m->slaves;
372 do {
David S. Miller5ce2d482008-07-08 17:06:30 -0700373 struct net_device *slave = qdisc_dev(q);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374
375 if (slave == NULL)
376 return -EUNATCH;
377
378 if (slave->mtu < mtu)
379 mtu = slave->mtu;
380 if (slave->hard_header_len > LL_MAX_HEADER)
381 return -EINVAL;
382
383 /* If all the slaves are BROADCAST, master is BROADCAST
384 If all the slaves are PtP, master is PtP
385 Otherwise, master is NBMA.
386 */
387 if (!(slave->flags&IFF_POINTOPOINT))
388 flags &= ~IFF_POINTOPOINT;
389 if (!(slave->flags&IFF_BROADCAST))
390 flags &= ~IFF_BROADCAST;
391 if (!(slave->flags&IFF_MULTICAST))
392 flags &= ~IFF_MULTICAST;
393 } while ((q = NEXT_SLAVE(q)) != m->slaves);
394
395 m->dev->mtu = mtu;
396 m->dev->flags = (m->dev->flags&~FMASK) | flags;
397 netif_start_queue(m->dev);
398 return 0;
399}
400
401static int teql_master_close(struct net_device *dev)
402{
403 netif_stop_queue(dev);
404 return 0;
405}
406
407static struct net_device_stats *teql_master_stats(struct net_device *dev)
408{
Patrick McHardy2941a482006-01-08 22:05:26 -0800409 struct teql_master *m = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410 return &m->stats;
411}
412
413static int teql_master_mtu(struct net_device *dev, int new_mtu)
414{
Patrick McHardy2941a482006-01-08 22:05:26 -0800415 struct teql_master *m = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416 struct Qdisc *q;
417
418 if (new_mtu < 68)
419 return -EINVAL;
420
421 q = m->slaves;
422 if (q) {
423 do {
David S. Miller5ce2d482008-07-08 17:06:30 -0700424 if (new_mtu > qdisc_dev(q)->mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 return -EINVAL;
426 } while ((q=NEXT_SLAVE(q)) != m->slaves);
427 }
428
429 dev->mtu = new_mtu;
430 return 0;
431}
432
433static __init void teql_master_setup(struct net_device *dev)
434{
Patrick McHardy2941a482006-01-08 22:05:26 -0800435 struct teql_master *master = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 struct Qdisc_ops *ops = &master->qops;
437
438 master->dev = dev;
439 ops->priv_size = sizeof(struct teql_sched_data);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900440
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 ops->enqueue = teql_enqueue;
442 ops->dequeue = teql_dequeue;
Jarek Poplawski8e3af9782008-10-31 00:45:55 -0700443 ops->peek = teql_peek;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444 ops->requeue = teql_requeue;
445 ops->init = teql_qdisc_init;
446 ops->reset = teql_reset;
447 ops->destroy = teql_destroy;
448 ops->owner = THIS_MODULE;
449
450 dev->open = teql_master_open;
451 dev->hard_start_xmit = teql_master_xmit;
452 dev->stop = teql_master_close;
453 dev->get_stats = teql_master_stats;
454 dev->change_mtu = teql_master_mtu;
455 dev->type = ARPHRD_VOID;
456 dev->mtu = 1500;
457 dev->tx_queue_len = 100;
458 dev->flags = IFF_NOARP;
459 dev->hard_header_len = LL_MAX_HEADER;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460}
461
462static LIST_HEAD(master_dev_list);
463static int max_equalizers = 1;
464module_param(max_equalizers, int, 0);
465MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
466
467static int __init teql_init(void)
468{
469 int i;
470 int err = -ENODEV;
471
472 for (i = 0; i < max_equalizers; i++) {
473 struct net_device *dev;
474 struct teql_master *master;
475
476 dev = alloc_netdev(sizeof(struct teql_master),
477 "teql%d", teql_master_setup);
478 if (!dev) {
479 err = -ENOMEM;
480 break;
481 }
482
483 if ((err = register_netdev(dev))) {
484 free_netdev(dev);
485 break;
486 }
487
Patrick McHardy2941a482006-01-08 22:05:26 -0800488 master = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489
490 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
491 err = register_qdisc(&master->qops);
492
493 if (err) {
494 unregister_netdev(dev);
495 free_netdev(dev);
496 break;
497 }
498
499 list_add_tail(&master->master_list, &master_dev_list);
500 }
501 return i ? 0 : err;
502}
503
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900504static void __exit teql_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505{
506 struct teql_master *master, *nxt;
507
508 list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
509
510 list_del(&master->master_list);
511
512 unregister_qdisc(&master->qops);
513 unregister_netdev(master->dev);
514 free_netdev(master->dev);
515 }
516}
517
518module_init(teql_init);
519module_exit(teql_exit);
520
521MODULE_LICENSE("GPL");