blob: 7a7ee1cc3b5a12d8fd45a20c4e30b2d202828672 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
Alan Cox113aa832008-10-13 19:01:08 -07004 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070012 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080032#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080049#include <linux/if_ether.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090050#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020051#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020055#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070056#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070065#include <net/netlink.h>
Patrick McHardyf0ad0862010-04-13 05:03:23 +000066#include <net/fib_rules.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070067
68#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69#define CONFIG_IP_PIMSM 1
70#endif
71
Patrick McHardy0c122952010-04-13 05:03:22 +000072struct mr_table {
Patrick McHardyf0ad0862010-04-13 05:03:23 +000073 struct list_head list;
Patrick McHardy8de53df2010-04-15 13:29:28 +020074#ifdef CONFIG_NET_NS
75 struct net *net;
76#endif
Patrick McHardyf0ad0862010-04-13 05:03:23 +000077 u32 id;
Patrick McHardy0c122952010-04-13 05:03:22 +000078 struct sock *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
83 int maxvif;
84 atomic_t cache_resolve_queue_len;
85 int mroute_do_assert;
86 int mroute_do_pim;
87#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
89#endif
90};
91
Patrick McHardyf0ad0862010-04-13 05:03:23 +000092struct ipmr_rule {
93 struct fib_rule common;
94};
95
96struct ipmr_result {
97 struct mr_table *mrt;
98};
99
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100/* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
102 */
103
104static DEFINE_RWLOCK(mrt_lock);
105
106/*
107 * Multicast router control variables
108 */
109
Patrick McHardy0c122952010-04-13 05:03:22 +0000110#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
112/* Special spinlock for queue of unresolved entries */
113static DEFINE_SPINLOCK(mfc_unres_lock);
114
115/* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
119
120 In this case data path is free of exclusive locks at all.
121 */
122
Christoph Lametere18b8902006-12-06 20:33:20 -0800123static struct kmem_cache *mrt_cachep __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000125static struct mr_table *ipmr_new_table(struct net *net, u32 id);
Patrick McHardy0c122952010-04-13 05:03:22 +0000126static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
128 int local);
129static int ipmr_cache_report(struct mr_table *mrt,
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000130 struct sk_buff *pkt, vifi_t vifi, int assert);
Patrick McHardycb6a4e42010-04-26 16:02:08 +0200131static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000133static void ipmr_expire_process(unsigned long arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000135#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136#define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138
139static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140{
141 struct mr_table *mrt;
142
143 ipmr_for_each_table(mrt, net) {
144 if (mrt->id == id)
145 return mrt;
146 }
147 return NULL;
148}
149
150static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
152{
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
155 int err;
156
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158 if (err < 0)
159 return err;
160 *mrt = res.mrt;
161 return 0;
162}
163
164static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
166{
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
169
170 switch (rule->action) {
171 case FR_ACT_TO_TBL:
172 break;
173 case FR_ACT_UNREACHABLE:
174 return -ENETUNREACH;
175 case FR_ACT_PROHIBIT:
176 return -EACCES;
177 case FR_ACT_BLACKHOLE:
178 default:
179 return -EINVAL;
180 }
181
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
183 if (mrt == NULL)
184 return -EAGAIN;
185 res->mrt = mrt;
186 return 0;
187}
188
189static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190{
191 return 1;
192}
193
194static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 FRA_GENERIC_POLICY,
196};
197
198static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
200{
201 return 0;
202}
203
204static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb)
206{
207 return 1;
208}
209
210static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
212{
213 frh->dst_len = 0;
214 frh->src_len = 0;
215 frh->tos = 0;
216 return 0;
217}
218
Patrick McHardy3d0c9c42010-04-26 16:02:04 +0200219static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
Patrick McHardy25239ce2010-04-26 16:02:05 +0200220 .family = RTNL_FAMILY_IPMR,
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
232};
233
234static int __net_init ipmr_rules_init(struct net *net)
235{
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
238 int err;
239
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 if (IS_ERR(ops))
242 return PTR_ERR(ops);
243
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
245
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247 if (mrt == NULL) {
248 err = -ENOMEM;
249 goto err1;
250 }
251
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 if (err < 0)
254 goto err2;
255
256 net->ipv4.mr_rules_ops = ops;
257 return 0;
258
259err2:
260 kfree(mrt);
261err1:
262 fib_rules_unregister(ops);
263 return err;
264}
265
266static void __net_exit ipmr_rules_exit(struct net *net)
267{
268 struct mr_table *mrt, *next;
269
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
271 kfree(mrt);
272 fib_rules_unregister(net->ipv4.mr_rules_ops);
273}
274#else
275#define ipmr_for_each_table(mrt, net) \
276 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
277
278static struct mr_table *ipmr_get_table(struct net *net, u32 id)
279{
280 return net->ipv4.mrt;
281}
282
283static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
284 struct mr_table **mrt)
285{
286 *mrt = net->ipv4.mrt;
287 return 0;
288}
289
290static int __net_init ipmr_rules_init(struct net *net)
291{
292 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
293 return net->ipv4.mrt ? 0 : -ENOMEM;
294}
295
296static void __net_exit ipmr_rules_exit(struct net *net)
297{
298 kfree(net->ipv4.mrt);
299}
300#endif
301
302static struct mr_table *ipmr_new_table(struct net *net, u32 id)
303{
304 struct mr_table *mrt;
305 unsigned int i;
306
307 mrt = ipmr_get_table(net, id);
308 if (mrt != NULL)
309 return mrt;
310
311 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
312 if (mrt == NULL)
313 return NULL;
Patrick McHardy8de53df2010-04-15 13:29:28 +0200314 write_pnet(&mrt->net, net);
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000315 mrt->id = id;
316
317 /* Forwarding cache */
318 for (i = 0; i < MFC_LINES; i++)
319 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
320
321 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
322
323 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
324 (unsigned long)mrt);
325
326#ifdef CONFIG_IP_PIMSM
327 mrt->mroute_reg_vif_num = -1;
328#endif
329#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
330 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
331#endif
332 return mrt;
333}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334
335/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
336
Wang Chend6070322008-07-14 20:55:26 -0700337static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
338{
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000339 struct net *net = dev_net(dev);
340
Wang Chend6070322008-07-14 20:55:26 -0700341 dev_close(dev);
342
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000343 dev = __dev_get_by_name(net, "tunl0");
Wang Chend6070322008-07-14 20:55:26 -0700344 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800345 const struct net_device_ops *ops = dev->netdev_ops;
Wang Chend6070322008-07-14 20:55:26 -0700346 struct ifreq ifr;
Wang Chend6070322008-07-14 20:55:26 -0700347 struct ip_tunnel_parm p;
348
349 memset(&p, 0, sizeof(p));
350 p.iph.daddr = v->vifc_rmt_addr.s_addr;
351 p.iph.saddr = v->vifc_lcl_addr.s_addr;
352 p.iph.version = 4;
353 p.iph.ihl = 5;
354 p.iph.protocol = IPPROTO_IPIP;
355 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
356 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
357
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800358 if (ops->ndo_do_ioctl) {
359 mm_segment_t oldfs = get_fs();
360
361 set_fs(KERNEL_DS);
362 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
363 set_fs(oldfs);
364 }
Wang Chend6070322008-07-14 20:55:26 -0700365 }
366}
367
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368static
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000369struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370{
371 struct net_device *dev;
372
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000373 dev = __dev_get_by_name(net, "tunl0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374
375 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800376 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 int err;
378 struct ifreq ifr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 struct ip_tunnel_parm p;
380 struct in_device *in_dev;
381
382 memset(&p, 0, sizeof(p));
383 p.iph.daddr = v->vifc_rmt_addr.s_addr;
384 p.iph.saddr = v->vifc_lcl_addr.s_addr;
385 p.iph.version = 4;
386 p.iph.ihl = 5;
387 p.iph.protocol = IPPROTO_IPIP;
388 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
Stephen Hemmingerba93ef72008-01-21 17:28:59 -0800389 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800391 if (ops->ndo_do_ioctl) {
392 mm_segment_t oldfs = get_fs();
393
394 set_fs(KERNEL_DS);
395 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
396 set_fs(oldfs);
397 } else
398 err = -EOPNOTSUPP;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399
400 dev = NULL;
401
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000402 if (err == 0 &&
403 (dev = __dev_get_by_name(net, p.name)) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 dev->flags |= IFF_MULTICAST;
405
Herbert Xue5ed6392005-10-03 14:35:55 -0700406 in_dev = __in_dev_get_rtnl(dev);
Herbert Xu71e27da2007-06-04 23:36:06 -0700407 if (in_dev == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700409
410 ipv4_devconf_setall(in_dev);
411 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412
413 if (dev_open(dev))
414 goto failure;
Wang Chen7dc00c82008-07-14 20:56:34 -0700415 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416 }
417 }
418 return dev;
419
420failure:
421 /* allow the register to be completed before unregistering. */
422 rtnl_unlock();
423 rtnl_lock();
424
425 unregister_netdevice(dev);
426 return NULL;
427}
428
429#ifdef CONFIG_IP_PIMSM
430
Stephen Hemminger6fef4c02009-08-31 19:50:41 +0000431static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432{
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000433 struct net *net = dev_net(dev);
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000434 struct mr_table *mrt;
435 struct flowi fl = {
436 .oif = dev->ifindex,
437 .iif = skb->skb_iif,
438 .mark = skb->mark,
439 };
440 int err;
441
442 err = ipmr_fib_lookup(net, &fl, &mrt);
443 if (err < 0)
444 return err;
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000445
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 read_lock(&mrt_lock);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700447 dev->stats.tx_bytes += skb->len;
448 dev->stats.tx_packets++;
Patrick McHardy0c122952010-04-13 05:03:22 +0000449 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 read_unlock(&mrt_lock);
451 kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000452 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453}
454
Stephen Hemminger007c3832008-11-20 20:28:35 -0800455static const struct net_device_ops reg_vif_netdev_ops = {
456 .ndo_start_xmit = reg_vif_xmit,
457};
458
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459static void reg_vif_setup(struct net_device *dev)
460{
461 dev->type = ARPHRD_PIMREG;
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800462 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 dev->flags = IFF_NOARP;
Stephen Hemminger007c3832008-11-20 20:28:35 -0800464 dev->netdev_ops = &reg_vif_netdev_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 dev->destructor = free_netdev;
Tom Goff403dbb92009-06-14 03:16:13 -0700466 dev->features |= NETIF_F_NETNS_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467}
468
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000469static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470{
471 struct net_device *dev;
472 struct in_device *in_dev;
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000473 char name[IFNAMSIZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000475 if (mrt->id == RT_TABLE_DEFAULT)
476 sprintf(name, "pimreg");
477 else
478 sprintf(name, "pimreg%u", mrt->id);
479
480 dev = alloc_netdev(0, name, reg_vif_setup);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481
482 if (dev == NULL)
483 return NULL;
484
Tom Goff403dbb92009-06-14 03:16:13 -0700485 dev_net_set(dev, net);
486
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 if (register_netdevice(dev)) {
488 free_netdev(dev);
489 return NULL;
490 }
491 dev->iflink = 0;
492
Herbert Xu71e27da2007-06-04 23:36:06 -0700493 rcu_read_lock();
494 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
495 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700497 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
Herbert Xu71e27da2007-06-04 23:36:06 -0700499 ipv4_devconf_setall(in_dev);
500 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
501 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502
503 if (dev_open(dev))
504 goto failure;
505
Wang Chen7dc00c82008-07-14 20:56:34 -0700506 dev_hold(dev);
507
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 return dev;
509
510failure:
511 /* allow the register to be completed before unregistering. */
512 rtnl_unlock();
513 rtnl_lock();
514
515 unregister_netdevice(dev);
516 return NULL;
517}
518#endif
519
520/*
521 * Delete a VIF entry
Wang Chen7dc00c82008-07-14 20:56:34 -0700522 * @notify: Set to 1, if the caller is a notifier_call
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900524
Patrick McHardy0c122952010-04-13 05:03:22 +0000525static int vif_delete(struct mr_table *mrt, int vifi, int notify,
Eric Dumazetd17fa6f2009-10-28 05:21:38 +0000526 struct list_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527{
528 struct vif_device *v;
529 struct net_device *dev;
530 struct in_device *in_dev;
531
Patrick McHardy0c122952010-04-13 05:03:22 +0000532 if (vifi < 0 || vifi >= mrt->maxvif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 return -EADDRNOTAVAIL;
534
Patrick McHardy0c122952010-04-13 05:03:22 +0000535 v = &mrt->vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536
537 write_lock_bh(&mrt_lock);
538 dev = v->dev;
539 v->dev = NULL;
540
541 if (!dev) {
542 write_unlock_bh(&mrt_lock);
543 return -EADDRNOTAVAIL;
544 }
545
546#ifdef CONFIG_IP_PIMSM
Patrick McHardy0c122952010-04-13 05:03:22 +0000547 if (vifi == mrt->mroute_reg_vif_num)
548 mrt->mroute_reg_vif_num = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549#endif
550
Patrick McHardy0c122952010-04-13 05:03:22 +0000551 if (vifi+1 == mrt->maxvif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 int tmp;
553 for (tmp=vifi-1; tmp>=0; tmp--) {
Patrick McHardy0c122952010-04-13 05:03:22 +0000554 if (VIF_EXISTS(mrt, tmp))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 break;
556 }
Patrick McHardy0c122952010-04-13 05:03:22 +0000557 mrt->maxvif = tmp+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 }
559
560 write_unlock_bh(&mrt_lock);
561
562 dev_set_allmulti(dev, -1);
563
Herbert Xue5ed6392005-10-03 14:35:55 -0700564 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
Herbert Xu42f811b2007-06-04 23:34:44 -0700565 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 ip_rt_multicast_event(in_dev);
567 }
568
Wang Chen7dc00c82008-07-14 20:56:34 -0700569 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
Eric Dumazetd17fa6f2009-10-28 05:21:38 +0000570 unregister_netdevice_queue(dev, head);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571
572 dev_put(dev);
573 return 0;
574}
575
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000576static inline void ipmr_cache_free(struct mfc_cache *c)
577{
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000578 kmem_cache_free(mrt_cachep, c);
579}
580
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581/* Destroy an unresolved cache entry, killing queued skbs
582 and reporting error to netlink readers.
583 */
584
Patrick McHardy0c122952010-04-13 05:03:22 +0000585static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586{
Patrick McHardy8de53df2010-04-15 13:29:28 +0200587 struct net *net = read_pnet(&mrt->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700589 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590
Patrick McHardy0c122952010-04-13 05:03:22 +0000591 atomic_dec(&mrt->cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592
Jianjun Kongc354e122008-11-03 00:28:02 -0800593 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700594 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
596 nlh->nlmsg_type = NLMSG_ERROR;
597 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
598 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700599 e = NLMSG_DATA(nlh);
600 e->error = -ETIMEDOUT;
601 memset(&e->msg, 0, sizeof(e->msg));
Thomas Graf2942e902006-08-15 00:30:25 -0700602
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000603 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604 } else
605 kfree_skb(skb);
606 }
607
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000608 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609}
610
611
Patrick McHardye258beb2010-04-13 05:03:19 +0000612/* Timer process for the unresolved queue. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613
Patrick McHardye258beb2010-04-13 05:03:19 +0000614static void ipmr_expire_process(unsigned long arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615{
Patrick McHardy0c122952010-04-13 05:03:22 +0000616 struct mr_table *mrt = (struct mr_table *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617 unsigned long now;
618 unsigned long expires;
Patrick McHardy862465f2010-04-13 05:03:21 +0000619 struct mfc_cache *c, *next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620
621 if (!spin_trylock(&mfc_unres_lock)) {
Patrick McHardy0c122952010-04-13 05:03:22 +0000622 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 return;
624 }
625
Patrick McHardy0c122952010-04-13 05:03:22 +0000626 if (list_empty(&mrt->mfc_unres_queue))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 goto out;
628
629 now = jiffies;
630 expires = 10*HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631
Patrick McHardy0c122952010-04-13 05:03:22 +0000632 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 if (time_after(c->mfc_un.unres.expires, now)) {
634 unsigned long interval = c->mfc_un.unres.expires - now;
635 if (interval < expires)
636 expires = interval;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637 continue;
638 }
639
Patrick McHardy862465f2010-04-13 05:03:21 +0000640 list_del(&c->list);
Patrick McHardy0c122952010-04-13 05:03:22 +0000641 ipmr_destroy_unres(mrt, c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642 }
643
Patrick McHardy0c122952010-04-13 05:03:22 +0000644 if (!list_empty(&mrt->mfc_unres_queue))
645 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646
647out:
648 spin_unlock(&mfc_unres_lock);
649}
650
651/* Fill oifs list. It is called under write locked mrt_lock. */
652
Patrick McHardy0c122952010-04-13 05:03:22 +0000653static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000654 unsigned char *ttls)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655{
656 int vifi;
657
658 cache->mfc_un.res.minvif = MAXVIFS;
659 cache->mfc_un.res.maxvif = 0;
660 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
661
Patrick McHardy0c122952010-04-13 05:03:22 +0000662 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
663 if (VIF_EXISTS(mrt, vifi) &&
Benjamin Therycf958ae32009-01-22 04:56:16 +0000664 ttls[vifi] && ttls[vifi] < 255) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
666 if (cache->mfc_un.res.minvif > vifi)
667 cache->mfc_un.res.minvif = vifi;
668 if (cache->mfc_un.res.maxvif <= vifi)
669 cache->mfc_un.res.maxvif = vifi + 1;
670 }
671 }
672}
673
Patrick McHardy0c122952010-04-13 05:03:22 +0000674static int vif_add(struct net *net, struct mr_table *mrt,
675 struct vifctl *vifc, int mrtsock)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676{
677 int vifi = vifc->vifc_vifi;
Patrick McHardy0c122952010-04-13 05:03:22 +0000678 struct vif_device *v = &mrt->vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679 struct net_device *dev;
680 struct in_device *in_dev;
Wang Chend6070322008-07-14 20:55:26 -0700681 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682
683 /* Is vif busy ? */
Patrick McHardy0c122952010-04-13 05:03:22 +0000684 if (VIF_EXISTS(mrt, vifi))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 return -EADDRINUSE;
686
687 switch (vifc->vifc_flags) {
688#ifdef CONFIG_IP_PIMSM
689 case VIFF_REGISTER:
690 /*
691 * Special Purpose VIF in PIM
692 * All the packets will be sent to the daemon
693 */
Patrick McHardy0c122952010-04-13 05:03:22 +0000694 if (mrt->mroute_reg_vif_num >= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 return -EADDRINUSE;
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000696 dev = ipmr_reg_vif(net, mrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697 if (!dev)
698 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700699 err = dev_set_allmulti(dev, 1);
700 if (err) {
701 unregister_netdevice(dev);
Wang Chen7dc00c82008-07-14 20:56:34 -0700702 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700703 return err;
704 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705 break;
706#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900707 case VIFF_TUNNEL:
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000708 dev = ipmr_new_tunnel(net, vifc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 if (!dev)
710 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700711 err = dev_set_allmulti(dev, 1);
712 if (err) {
713 ipmr_del_tunnel(dev, vifc);
Wang Chen7dc00c82008-07-14 20:56:34 -0700714 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700715 return err;
716 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717 break;
Ilia Kee5e81f2009-09-16 05:53:07 +0000718
719 case VIFF_USE_IFINDEX:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 case 0:
Ilia Kee5e81f2009-09-16 05:53:07 +0000721 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
722 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
723 if (dev && dev->ip_ptr == NULL) {
724 dev_put(dev);
725 return -EADDRNOTAVAIL;
726 }
727 } else
728 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
729
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 if (!dev)
731 return -EADDRNOTAVAIL;
Wang Chend6070322008-07-14 20:55:26 -0700732 err = dev_set_allmulti(dev, 1);
Wang Chen7dc00c82008-07-14 20:56:34 -0700733 if (err) {
734 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700735 return err;
Wang Chen7dc00c82008-07-14 20:56:34 -0700736 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 break;
738 default:
739 return -EINVAL;
740 }
741
Dan Carpenterd0490cf2009-11-11 02:03:54 +0000742 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
743 dev_put(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744 return -EADDRNOTAVAIL;
Dan Carpenterd0490cf2009-11-11 02:03:54 +0000745 }
Herbert Xu42f811b2007-06-04 23:34:44 -0700746 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 ip_rt_multicast_event(in_dev);
748
749 /*
750 * Fill in the VIF structures
751 */
Jianjun Kongc354e122008-11-03 00:28:02 -0800752 v->rate_limit = vifc->vifc_rate_limit;
753 v->local = vifc->vifc_lcl_addr.s_addr;
754 v->remote = vifc->vifc_rmt_addr.s_addr;
755 v->flags = vifc->vifc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 if (!mrtsock)
757 v->flags |= VIFF_STATIC;
Jianjun Kongc354e122008-11-03 00:28:02 -0800758 v->threshold = vifc->vifc_threshold;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 v->bytes_in = 0;
760 v->bytes_out = 0;
761 v->pkt_in = 0;
762 v->pkt_out = 0;
763 v->link = dev->ifindex;
764 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
765 v->link = dev->iflink;
766
767 /* And finish update writing critical data */
768 write_lock_bh(&mrt_lock);
Jianjun Kongc354e122008-11-03 00:28:02 -0800769 v->dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770#ifdef CONFIG_IP_PIMSM
771 if (v->flags&VIFF_REGISTER)
Patrick McHardy0c122952010-04-13 05:03:22 +0000772 mrt->mroute_reg_vif_num = vifi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773#endif
Patrick McHardy0c122952010-04-13 05:03:22 +0000774 if (vifi+1 > mrt->maxvif)
775 mrt->maxvif = vifi+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 write_unlock_bh(&mrt_lock);
777 return 0;
778}
779
Patrick McHardy0c122952010-04-13 05:03:22 +0000780static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000781 __be32 origin,
782 __be32 mcastgrp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783{
Jianjun Kongc354e122008-11-03 00:28:02 -0800784 int line = MFC_HASH(mcastgrp, origin);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785 struct mfc_cache *c;
786
Patrick McHardy0c122952010-04-13 05:03:22 +0000787 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
Patrick McHardy862465f2010-04-13 05:03:21 +0000788 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
789 return c;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790 }
Patrick McHardy862465f2010-04-13 05:03:21 +0000791 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792}
793
794/*
795 * Allocate a multicast cache entry
796 */
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000797static struct mfc_cache *ipmr_cache_alloc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798{
Jianjun Kongc354e122008-11-03 00:28:02 -0800799 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
800 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 c->mfc_un.res.minvif = MAXVIFS;
803 return c;
804}
805
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000806static struct mfc_cache *ipmr_cache_alloc_unres(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807{
Jianjun Kongc354e122008-11-03 00:28:02 -0800808 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
809 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811 skb_queue_head_init(&c->mfc_un.unres.unresolved);
812 c->mfc_un.unres.expires = jiffies + 10*HZ;
813 return c;
814}
815
816/*
817 * A cache entry has gone into a resolved state from queued
818 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900819
Patrick McHardy0c122952010-04-13 05:03:22 +0000820static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
821 struct mfc_cache *uc, struct mfc_cache *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822{
823 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700824 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825
826 /*
827 * Play the pending entries through our router
828 */
829
Jianjun Kongc354e122008-11-03 00:28:02 -0800830 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700831 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
833
Patrick McHardycb6a4e42010-04-26 16:02:08 +0200834 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700835 nlh->nlmsg_len = (skb_tail_pointer(skb) -
836 (u8 *)nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 } else {
838 nlh->nlmsg_type = NLMSG_ERROR;
839 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
840 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700841 e = NLMSG_DATA(nlh);
842 e->error = -EMSGSIZE;
843 memset(&e->msg, 0, sizeof(e->msg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 }
Thomas Graf2942e902006-08-15 00:30:25 -0700845
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000846 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 } else
Patrick McHardy0c122952010-04-13 05:03:22 +0000848 ip_mr_forward(net, mrt, skb, c, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 }
850}
851
852/*
853 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
854 * expects the following bizarre scheme.
855 *
856 * Called under mrt_lock.
857 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900858
Patrick McHardy0c122952010-04-13 05:03:22 +0000859static int ipmr_cache_report(struct mr_table *mrt,
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000860 struct sk_buff *pkt, vifi_t vifi, int assert)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861{
862 struct sk_buff *skb;
Arnaldo Carvalho de Meloc9bdd4b2007-03-12 20:09:15 -0300863 const int ihl = ip_hdrlen(pkt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864 struct igmphdr *igmp;
865 struct igmpmsg *msg;
866 int ret;
867
868#ifdef CONFIG_IP_PIMSM
869 if (assert == IGMPMSG_WHOLEPKT)
870 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
871 else
872#endif
873 skb = alloc_skb(128, GFP_ATOMIC);
874
Stephen Hemminger132adf52007-03-08 20:44:43 -0800875 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 return -ENOBUFS;
877
878#ifdef CONFIG_IP_PIMSM
879 if (assert == IGMPMSG_WHOLEPKT) {
880 /* Ugly, but we have no choice with this interface.
881 Duplicate old header, fix ihl, length etc.
882 And all this only to mangle msg->im_msgtype and
883 to set msg->im_mbz to "mbz" :-)
884 */
Arnaldo Carvalho de Melo878c8142007-03-11 22:38:29 -0300885 skb_push(skb, sizeof(struct iphdr));
886 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -0300887 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melo0272ffc2007-03-12 20:05:39 -0300888 msg = (struct igmpmsg *)skb_network_header(skb);
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700889 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890 msg->im_msgtype = IGMPMSG_WHOLEPKT;
891 msg->im_mbz = 0;
Patrick McHardy0c122952010-04-13 05:03:22 +0000892 msg->im_vif = mrt->mroute_reg_vif_num;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700893 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
894 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
895 sizeof(struct iphdr));
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900896 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900898 {
899
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 /*
901 * Copy the IP header
902 */
903
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700904 skb->network_header = skb->tail;
Arnaldo Carvalho de Meloddc7b8e2007-03-15 21:42:27 -0300905 skb_put(skb, ihl);
Arnaldo Carvalho de Melo27d7ff42007-03-31 11:55:19 -0300906 skb_copy_to_linear_data(skb, pkt->data, ihl);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700907 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
908 msg = (struct igmpmsg *)skb_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 msg->im_vif = vifi;
Eric Dumazetadf30902009-06-02 05:19:30 +0000910 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911
912 /*
913 * Add our header
914 */
915
Jianjun Kongc354e122008-11-03 00:28:02 -0800916 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917 igmp->type =
918 msg->im_msgtype = assert;
919 igmp->code = 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700920 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700921 skb->transport_header = skb->network_header;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900922 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923
Patrick McHardy0c122952010-04-13 05:03:22 +0000924 if (mrt->mroute_sk == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 kfree_skb(skb);
926 return -EINVAL;
927 }
928
929 /*
930 * Deliver to mrouted
931 */
Patrick McHardy0c122952010-04-13 05:03:22 +0000932 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
Benjamin Thery70a269e2009-01-22 04:56:15 +0000933 if (ret < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 if (net_ratelimit())
935 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
936 kfree_skb(skb);
937 }
938
939 return ret;
940}
941
942/*
943 * Queue a packet for resolution. It gets locked cache entry!
944 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900945
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946static int
Patrick McHardy0c122952010-04-13 05:03:22 +0000947ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948{
Patrick McHardy862465f2010-04-13 05:03:21 +0000949 bool found = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950 int err;
951 struct mfc_cache *c;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700952 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953
954 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +0000955 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
Patrick McHardye258beb2010-04-13 05:03:19 +0000956 if (c->mfc_mcastgrp == iph->daddr &&
Patrick McHardy862465f2010-04-13 05:03:21 +0000957 c->mfc_origin == iph->saddr) {
958 found = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959 break;
Patrick McHardy862465f2010-04-13 05:03:21 +0000960 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961 }
962
Patrick McHardy862465f2010-04-13 05:03:21 +0000963 if (!found) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 /*
965 * Create a new entry if allowable
966 */
967
Patrick McHardy0c122952010-04-13 05:03:22 +0000968 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000969 (c = ipmr_cache_alloc_unres()) == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 spin_unlock_bh(&mfc_unres_lock);
971
972 kfree_skb(skb);
973 return -ENOBUFS;
974 }
975
976 /*
977 * Fill in the new cache entry
978 */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700979 c->mfc_parent = -1;
980 c->mfc_origin = iph->saddr;
981 c->mfc_mcastgrp = iph->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982
983 /*
984 * Reflect first query at mrouted.
985 */
Patrick McHardy0c122952010-04-13 05:03:22 +0000986 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000987 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900988 /* If the report failed throw the cache entry
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 out - Brad Parker
990 */
991 spin_unlock_bh(&mfc_unres_lock);
992
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000993 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994 kfree_skb(skb);
995 return err;
996 }
997
Patrick McHardy0c122952010-04-13 05:03:22 +0000998 atomic_inc(&mrt->cache_resolve_queue_len);
999 list_add(&c->list, &mrt->mfc_unres_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000
David S. Miller278554b2010-05-12 00:05:35 -07001001 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1002 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 }
1004
1005 /*
1006 * See if we can append the packet
1007 */
1008 if (c->mfc_un.unres.unresolved.qlen>3) {
1009 kfree_skb(skb);
1010 err = -ENOBUFS;
1011 } else {
Jianjun Kongc354e122008-11-03 00:28:02 -08001012 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013 err = 0;
1014 }
1015
1016 spin_unlock_bh(&mfc_unres_lock);
1017 return err;
1018}
1019
1020/*
1021 * MFC cache manipulation by user space mroute daemon
1022 */
1023
Patrick McHardy0c122952010-04-13 05:03:22 +00001024static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025{
1026 int line;
Patrick McHardy862465f2010-04-13 05:03:21 +00001027 struct mfc_cache *c, *next;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028
Jianjun Kongc354e122008-11-03 00:28:02 -08001029 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030
Patrick McHardy0c122952010-04-13 05:03:22 +00001031 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1033 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1034 write_lock_bh(&mrt_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00001035 list_del(&c->list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036 write_unlock_bh(&mrt_lock);
1037
Benjamin Thery5c0a66f2009-01-22 04:56:17 +00001038 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 return 0;
1040 }
1041 }
1042 return -ENOENT;
1043}
1044
Patrick McHardy0c122952010-04-13 05:03:22 +00001045static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1046 struct mfcctl *mfc, int mrtsock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047{
Patrick McHardy862465f2010-04-13 05:03:21 +00001048 bool found = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049 int line;
Patrick McHardy862465f2010-04-13 05:03:21 +00001050 struct mfc_cache *uc, *c;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051
Patrick McHardya50436f2010-03-17 06:04:14 +00001052 if (mfc->mfcc_parent >= MAXVIFS)
1053 return -ENFILE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054
Jianjun Kongc354e122008-11-03 00:28:02 -08001055 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056
Patrick McHardy0c122952010-04-13 05:03:22 +00001057 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
Patrick McHardy862465f2010-04-13 05:03:21 +00001059 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1060 found = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 break;
Patrick McHardy862465f2010-04-13 05:03:21 +00001062 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063 }
1064
Patrick McHardy862465f2010-04-13 05:03:21 +00001065 if (found) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001066 write_lock_bh(&mrt_lock);
1067 c->mfc_parent = mfc->mfcc_parent;
Patrick McHardy0c122952010-04-13 05:03:22 +00001068 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069 if (!mrtsock)
1070 c->mfc_flags |= MFC_STATIC;
1071 write_unlock_bh(&mrt_lock);
1072 return 0;
1073 }
1074
Joe Perchesf97c1e02007-12-16 13:45:43 -08001075 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076 return -EINVAL;
1077
Patrick McHardyd658f8a2010-04-13 05:03:20 +00001078 c = ipmr_cache_alloc();
Jianjun Kongc354e122008-11-03 00:28:02 -08001079 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080 return -ENOMEM;
1081
Jianjun Kongc354e122008-11-03 00:28:02 -08001082 c->mfc_origin = mfc->mfcc_origin.s_addr;
1083 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1084 c->mfc_parent = mfc->mfcc_parent;
Patrick McHardy0c122952010-04-13 05:03:22 +00001085 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086 if (!mrtsock)
1087 c->mfc_flags |= MFC_STATIC;
1088
1089 write_lock_bh(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001090 list_add(&c->list, &mrt->mfc_cache_array[line]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091 write_unlock_bh(&mrt_lock);
1092
1093 /*
1094 * Check to see if we resolved a queued list. If so we
1095 * need to send on the frames and tidy up.
1096 */
Patrick McHardyb0ebb732010-04-15 13:29:28 +02001097 found = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001099 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
Patrick McHardye258beb2010-04-13 05:03:19 +00001100 if (uc->mfc_origin == c->mfc_origin &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
Patrick McHardy862465f2010-04-13 05:03:21 +00001102 list_del(&uc->list);
Patrick McHardy0c122952010-04-13 05:03:22 +00001103 atomic_dec(&mrt->cache_resolve_queue_len);
Patrick McHardyb0ebb732010-04-15 13:29:28 +02001104 found = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 break;
1106 }
1107 }
Patrick McHardy0c122952010-04-13 05:03:22 +00001108 if (list_empty(&mrt->mfc_unres_queue))
1109 del_timer(&mrt->ipmr_expire_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110 spin_unlock_bh(&mfc_unres_lock);
1111
Patrick McHardyb0ebb732010-04-15 13:29:28 +02001112 if (found) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001113 ipmr_cache_resolve(net, mrt, uc, c);
Benjamin Thery5c0a66f2009-01-22 04:56:17 +00001114 ipmr_cache_free(uc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115 }
1116 return 0;
1117}
1118
1119/*
1120 * Close the multicast socket, and clear the vif tables etc
1121 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001122
Patrick McHardy0c122952010-04-13 05:03:22 +00001123static void mroute_clean_tables(struct mr_table *mrt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124{
1125 int i;
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001126 LIST_HEAD(list);
Patrick McHardy862465f2010-04-13 05:03:21 +00001127 struct mfc_cache *c, *next;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001128
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 /*
1130 * Shut down all active vif entries
1131 */
Patrick McHardy0c122952010-04-13 05:03:22 +00001132 for (i = 0; i < mrt->maxvif; i++) {
1133 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1134 vif_delete(mrt, i, 0, &list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135 }
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001136 unregister_netdevice_many(&list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137
1138 /*
1139 * Wipe the cache
1140 */
Patrick McHardy862465f2010-04-13 05:03:21 +00001141 for (i = 0; i < MFC_LINES; i++) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001142 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
Patrick McHardy862465f2010-04-13 05:03:21 +00001143 if (c->mfc_flags&MFC_STATIC)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145 write_lock_bh(&mrt_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00001146 list_del(&c->list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 write_unlock_bh(&mrt_lock);
1148
Benjamin Thery5c0a66f2009-01-22 04:56:17 +00001149 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 }
1151 }
1152
Patrick McHardy0c122952010-04-13 05:03:22 +00001153 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001155 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
Patrick McHardy862465f2010-04-13 05:03:21 +00001156 list_del(&c->list);
Patrick McHardy0c122952010-04-13 05:03:22 +00001157 ipmr_destroy_unres(mrt, c);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158 }
1159 spin_unlock_bh(&mfc_unres_lock);
1160 }
1161}
1162
1163static void mrtsock_destruct(struct sock *sk)
1164{
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001165 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001166 struct mr_table *mrt;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001167
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168 rtnl_lock();
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001169 ipmr_for_each_table(mrt, net) {
1170 if (sk == mrt->mroute_sk) {
1171 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001173 write_lock_bh(&mrt_lock);
1174 mrt->mroute_sk = NULL;
1175 write_unlock_bh(&mrt_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001177 mroute_clean_tables(mrt);
1178 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179 }
1180 rtnl_unlock();
1181}
1182
1183/*
1184 * Socket options and virtual interface manipulation. The whole
1185 * virtual interface system is a complete heap, but unfortunately
1186 * that's how BSD mrouted happens to think. Maybe one day with a proper
1187 * MOSPF/PIM router set up we can clean this up.
1188 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001189
David S. Millerb7058842009-09-30 16:12:20 -07001190int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191{
1192 int ret;
1193 struct vifctl vif;
1194 struct mfcctl mfc;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001195 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001196 struct mr_table *mrt;
1197
1198 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1199 if (mrt == NULL)
1200 return -ENOENT;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001201
Stephen Hemminger132adf52007-03-08 20:44:43 -08001202 if (optname != MRT_INIT) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001203 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204 return -EACCES;
1205 }
1206
Stephen Hemminger132adf52007-03-08 20:44:43 -08001207 switch (optname) {
1208 case MRT_INIT:
1209 if (sk->sk_type != SOCK_RAW ||
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001210 inet_sk(sk)->inet_num != IPPROTO_IGMP)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001211 return -EOPNOTSUPP;
Jianjun Kongc354e122008-11-03 00:28:02 -08001212 if (optlen != sizeof(int))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001213 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214
Stephen Hemminger132adf52007-03-08 20:44:43 -08001215 rtnl_lock();
Patrick McHardy0c122952010-04-13 05:03:22 +00001216 if (mrt->mroute_sk) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217 rtnl_unlock();
Stephen Hemminger132adf52007-03-08 20:44:43 -08001218 return -EADDRINUSE;
1219 }
1220
1221 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1222 if (ret == 0) {
1223 write_lock_bh(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001224 mrt->mroute_sk = sk;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001225 write_unlock_bh(&mrt_lock);
1226
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001227 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001228 }
1229 rtnl_unlock();
1230 return ret;
1231 case MRT_DONE:
Patrick McHardy0c122952010-04-13 05:03:22 +00001232 if (sk != mrt->mroute_sk)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001233 return -EACCES;
1234 return ip_ra_control(sk, 0, NULL);
1235 case MRT_ADD_VIF:
1236 case MRT_DEL_VIF:
Jianjun Kongc354e122008-11-03 00:28:02 -08001237 if (optlen != sizeof(vif))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001238 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -08001239 if (copy_from_user(&vif, optval, sizeof(vif)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001240 return -EFAULT;
1241 if (vif.vifc_vifi >= MAXVIFS)
1242 return -ENFILE;
1243 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -08001244 if (optname == MRT_ADD_VIF) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001245 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001246 } else {
Patrick McHardy0c122952010-04-13 05:03:22 +00001247 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001248 }
1249 rtnl_unlock();
1250 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251
1252 /*
1253 * Manipulate the forwarding caches. These live
1254 * in a sort of kernel/user symbiosis.
1255 */
Stephen Hemminger132adf52007-03-08 20:44:43 -08001256 case MRT_ADD_MFC:
1257 case MRT_DEL_MFC:
Jianjun Kongc354e122008-11-03 00:28:02 -08001258 if (optlen != sizeof(mfc))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001259 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -08001260 if (copy_from_user(&mfc, optval, sizeof(mfc)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001261 return -EFAULT;
1262 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -08001263 if (optname == MRT_DEL_MFC)
Patrick McHardy0c122952010-04-13 05:03:22 +00001264 ret = ipmr_mfc_delete(mrt, &mfc);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001265 else
Patrick McHardy0c122952010-04-13 05:03:22 +00001266 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001267 rtnl_unlock();
1268 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269 /*
1270 * Control PIM assert.
1271 */
Stephen Hemminger132adf52007-03-08 20:44:43 -08001272 case MRT_ASSERT:
1273 {
1274 int v;
1275 if (get_user(v,(int __user *)optval))
1276 return -EFAULT;
Patrick McHardy0c122952010-04-13 05:03:22 +00001277 mrt->mroute_do_assert = (v) ? 1 : 0;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001278 return 0;
1279 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280#ifdef CONFIG_IP_PIMSM
Stephen Hemminger132adf52007-03-08 20:44:43 -08001281 case MRT_PIM:
1282 {
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001283 int v;
1284
Stephen Hemminger132adf52007-03-08 20:44:43 -08001285 if (get_user(v,(int __user *)optval))
1286 return -EFAULT;
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001287 v = (v) ? 1 : 0;
1288
Stephen Hemminger132adf52007-03-08 20:44:43 -08001289 rtnl_lock();
1290 ret = 0;
Patrick McHardy0c122952010-04-13 05:03:22 +00001291 if (v != mrt->mroute_do_pim) {
1292 mrt->mroute_do_pim = v;
1293 mrt->mroute_do_assert = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294 }
Stephen Hemminger132adf52007-03-08 20:44:43 -08001295 rtnl_unlock();
1296 return ret;
1297 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298#endif
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001299#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1300 case MRT_TABLE:
1301 {
1302 u32 v;
1303
1304 if (optlen != sizeof(u32))
1305 return -EINVAL;
1306 if (get_user(v, (u32 __user *)optval))
1307 return -EFAULT;
1308 if (sk == mrt->mroute_sk)
1309 return -EBUSY;
1310
1311 rtnl_lock();
1312 ret = 0;
1313 if (!ipmr_new_table(net, v))
1314 ret = -ENOMEM;
1315 raw_sk(sk)->ipmr_table = v;
1316 rtnl_unlock();
1317 return ret;
1318 }
1319#endif
Stephen Hemminger132adf52007-03-08 20:44:43 -08001320 /*
1321 * Spurious command, or MRT_VERSION which you cannot
1322 * set.
1323 */
1324 default:
1325 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326 }
1327}
1328
1329/*
1330 * Getsock opt support for the multicast routing system.
1331 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001332
Jianjun Kongc354e122008-11-03 00:28:02 -08001333int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334{
1335 int olr;
1336 int val;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001337 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001338 struct mr_table *mrt;
1339
1340 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1341 if (mrt == NULL)
1342 return -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343
Jianjun Kongc354e122008-11-03 00:28:02 -08001344 if (optname != MRT_VERSION &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345#ifdef CONFIG_IP_PIMSM
1346 optname!=MRT_PIM &&
1347#endif
1348 optname!=MRT_ASSERT)
1349 return -ENOPROTOOPT;
1350
1351 if (get_user(olr, optlen))
1352 return -EFAULT;
1353
1354 olr = min_t(unsigned int, olr, sizeof(int));
1355 if (olr < 0)
1356 return -EINVAL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001357
Jianjun Kongc354e122008-11-03 00:28:02 -08001358 if (put_user(olr, optlen))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 return -EFAULT;
Jianjun Kongc354e122008-11-03 00:28:02 -08001360 if (optname == MRT_VERSION)
1361 val = 0x0305;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001362#ifdef CONFIG_IP_PIMSM
Jianjun Kongc354e122008-11-03 00:28:02 -08001363 else if (optname == MRT_PIM)
Patrick McHardy0c122952010-04-13 05:03:22 +00001364 val = mrt->mroute_do_pim;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365#endif
1366 else
Patrick McHardy0c122952010-04-13 05:03:22 +00001367 val = mrt->mroute_do_assert;
Jianjun Kongc354e122008-11-03 00:28:02 -08001368 if (copy_to_user(optval, &val, olr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 return -EFAULT;
1370 return 0;
1371}
1372
1373/*
1374 * The IP multicast ioctl support routines.
1375 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001376
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1378{
1379 struct sioc_sg_req sr;
1380 struct sioc_vif_req vr;
1381 struct vif_device *vif;
1382 struct mfc_cache *c;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001383 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001384 struct mr_table *mrt;
1385
1386 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1387 if (mrt == NULL)
1388 return -ENOENT;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001389
Stephen Hemminger132adf52007-03-08 20:44:43 -08001390 switch (cmd) {
1391 case SIOCGETVIFCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001392 if (copy_from_user(&vr, arg, sizeof(vr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001393 return -EFAULT;
Patrick McHardy0c122952010-04-13 05:03:22 +00001394 if (vr.vifi >= mrt->maxvif)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001395 return -EINVAL;
1396 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001397 vif = &mrt->vif_table[vr.vifi];
1398 if (VIF_EXISTS(mrt, vr.vifi)) {
Jianjun Kongc354e122008-11-03 00:28:02 -08001399 vr.icount = vif->pkt_in;
1400 vr.ocount = vif->pkt_out;
1401 vr.ibytes = vif->bytes_in;
1402 vr.obytes = vif->bytes_out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001404
Jianjun Kongc354e122008-11-03 00:28:02 -08001405 if (copy_to_user(arg, &vr, sizeof(vr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001406 return -EFAULT;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001407 return 0;
1408 }
1409 read_unlock(&mrt_lock);
1410 return -EADDRNOTAVAIL;
1411 case SIOCGETSGCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001412 if (copy_from_user(&sr, arg, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001413 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414
Stephen Hemminger132adf52007-03-08 20:44:43 -08001415 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001416 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001417 if (c) {
1418 sr.pktcnt = c->mfc_un.res.pkt;
1419 sr.bytecnt = c->mfc_un.res.bytes;
1420 sr.wrong_if = c->mfc_un.res.wrong_if;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001422
Jianjun Kongc354e122008-11-03 00:28:02 -08001423 if (copy_to_user(arg, &sr, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001424 return -EFAULT;
1425 return 0;
1426 }
1427 read_unlock(&mrt_lock);
1428 return -EADDRNOTAVAIL;
1429 default:
1430 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431 }
1432}
1433
1434
1435static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1436{
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001437 struct net_device *dev = ptr;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001438 struct net *net = dev_net(dev);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001439 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001440 struct vif_device *v;
1441 int ct;
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001442 LIST_HEAD(list);
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001443
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444 if (event != NETDEV_UNREGISTER)
1445 return NOTIFY_DONE;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001446
1447 ipmr_for_each_table(mrt, net) {
1448 v = &mrt->vif_table[0];
1449 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1450 if (v->dev == dev)
1451 vif_delete(mrt, ct, 1, &list);
1452 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453 }
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001454 unregister_netdevice_many(&list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455 return NOTIFY_DONE;
1456}
1457
1458
Jianjun Kongc354e122008-11-03 00:28:02 -08001459static struct notifier_block ip_mr_notifier = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460 .notifier_call = ipmr_device_event,
1461};
1462
1463/*
1464 * Encapsulate a packet by attaching a valid IPIP header to it.
1465 * This avoids tunnel drivers and other mess and gives us the speed so
1466 * important for multicast video.
1467 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001468
Al Viro114c7842006-09-27 18:39:29 -07001469static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470{
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001471 struct iphdr *iph;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001472 struct iphdr *old_iph = ip_hdr(skb);
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001473
1474 skb_push(skb, sizeof(struct iphdr));
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001475 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001476 skb_reset_network_header(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001477 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478
1479 iph->version = 4;
Arnaldo Carvalho de Meloe023dd62007-03-12 20:09:36 -03001480 iph->tos = old_iph->tos;
1481 iph->ttl = old_iph->ttl;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 iph->frag_off = 0;
1483 iph->daddr = daddr;
1484 iph->saddr = saddr;
1485 iph->protocol = IPPROTO_IPIP;
1486 iph->ihl = 5;
1487 iph->tot_len = htons(skb->len);
Eric Dumazetadf30902009-06-02 05:19:30 +00001488 ip_select_ident(iph, skb_dst(skb), NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489 ip_send_check(iph);
1490
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1492 nf_reset(skb);
1493}
1494
1495static inline int ipmr_forward_finish(struct sk_buff *skb)
1496{
1497 struct ip_options * opt = &(IPCB(skb)->opt);
1498
Eric Dumazetadf30902009-06-02 05:19:30 +00001499 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500
1501 if (unlikely(opt->optlen))
1502 ip_forward_options(skb);
1503
1504 return dst_output(skb);
1505}
1506
1507/*
1508 * Processing handlers for ipmr_forward
1509 */
1510
Patrick McHardy0c122952010-04-13 05:03:22 +00001511static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1512 struct sk_buff *skb, struct mfc_cache *c, int vifi)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001514 const struct iphdr *iph = ip_hdr(skb);
Patrick McHardy0c122952010-04-13 05:03:22 +00001515 struct vif_device *vif = &mrt->vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516 struct net_device *dev;
1517 struct rtable *rt;
1518 int encap = 0;
1519
1520 if (vif->dev == NULL)
1521 goto out_free;
1522
1523#ifdef CONFIG_IP_PIMSM
1524 if (vif->flags & VIFF_REGISTER) {
1525 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001526 vif->bytes_out += skb->len;
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001527 vif->dev->stats.tx_bytes += skb->len;
1528 vif->dev->stats.tx_packets++;
Patrick McHardy0c122952010-04-13 05:03:22 +00001529 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
Ilpo Järvinen69ebbf52009-02-06 23:46:51 -08001530 goto out_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531 }
1532#endif
1533
1534 if (vif->flags&VIFF_TUNNEL) {
1535 struct flowi fl = { .oif = vif->link,
1536 .nl_u = { .ip4_u =
1537 { .daddr = vif->remote,
1538 .saddr = vif->local,
1539 .tos = RT_TOS(iph->tos) } },
1540 .proto = IPPROTO_IPIP };
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001541 if (ip_route_output_key(net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542 goto out_free;
1543 encap = sizeof(struct iphdr);
1544 } else {
1545 struct flowi fl = { .oif = vif->link,
1546 .nl_u = { .ip4_u =
1547 { .daddr = iph->daddr,
1548 .tos = RT_TOS(iph->tos) } },
1549 .proto = IPPROTO_IPIP };
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001550 if (ip_route_output_key(net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551 goto out_free;
1552 }
1553
1554 dev = rt->u.dst.dev;
1555
1556 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1557 /* Do not fragment multicasts. Alas, IPv4 does not
1558 allow to send ICMP, so that packets will disappear
1559 to blackhole.
1560 */
1561
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -07001562 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563 ip_rt_put(rt);
1564 goto out_free;
1565 }
1566
1567 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1568
1569 if (skb_cow(skb, encap)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001570 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571 goto out_free;
1572 }
1573
1574 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001575 vif->bytes_out += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576
Eric Dumazetadf30902009-06-02 05:19:30 +00001577 skb_dst_drop(skb);
1578 skb_dst_set(skb, &rt->u.dst);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001579 ip_decrease_ttl(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580
1581 /* FIXME: forward and output firewalls used to be called here.
1582 * What do we do with netfilter? -- RR */
1583 if (vif->flags & VIFF_TUNNEL) {
1584 ip_encap(skb, vif->local, vif->remote);
1585 /* FIXME: extra output firewall step used to be here. --RR */
Pavel Emelyanov2f4c02d2008-05-21 14:16:14 -07001586 vif->dev->stats.tx_packets++;
1587 vif->dev->stats.tx_bytes += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588 }
1589
1590 IPCB(skb)->flags |= IPSKB_FORWARDED;
1591
1592 /*
1593 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1594 * not only before forwarding, but after forwarding on all output
1595 * interfaces. It is clear, if mrouter runs a multicasting
1596 * program, it should receive packets not depending to what interface
1597 * program is joined.
1598 * If we will not make it, the program will have to join on all
1599 * interfaces. On the other hand, multihoming host (or router, but
1600 * not mrouter) cannot join to more than one interface - it will
1601 * result in receiving multiple packets.
1602 */
Jan Engelhardt9bbc7682010-03-23 04:07:29 +01001603 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001604 ipmr_forward_finish);
1605 return;
1606
1607out_free:
1608 kfree_skb(skb);
1609 return;
1610}
1611
Patrick McHardy0c122952010-04-13 05:03:22 +00001612static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613{
1614 int ct;
Patrick McHardy0c122952010-04-13 05:03:22 +00001615
1616 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1617 if (mrt->vif_table[ct].dev == dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618 break;
1619 }
1620 return ct;
1621}
1622
1623/* "local" means that we should preserve one skb (for local delivery) */
1624
Patrick McHardy0c122952010-04-13 05:03:22 +00001625static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1626 struct sk_buff *skb, struct mfc_cache *cache,
1627 int local)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628{
1629 int psend = -1;
1630 int vif, ct;
1631
1632 vif = cache->mfc_parent;
1633 cache->mfc_un.res.pkt++;
1634 cache->mfc_un.res.bytes += skb->len;
1635
1636 /*
1637 * Wrong interface: drop packet and (maybe) send PIM assert.
1638 */
Patrick McHardy0c122952010-04-13 05:03:22 +00001639 if (mrt->vif_table[vif].dev != skb->dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640 int true_vifi;
1641
Eric Dumazet511c3f92009-06-02 05:14:27 +00001642 if (skb_rtable(skb)->fl.iif == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643 /* It is our own packet, looped back.
1644 Very complicated situation...
1645
1646 The best workaround until routing daemons will be
1647 fixed is not to redistribute packet, if it was
1648 send through wrong interface. It means, that
1649 multicast applications WILL NOT work for
1650 (S,G), which have default multicast route pointing
1651 to wrong oif. In any case, it is not a good
1652 idea to use multicasting applications on router.
1653 */
1654 goto dont_forward;
1655 }
1656
1657 cache->mfc_un.res.wrong_if++;
Patrick McHardy0c122952010-04-13 05:03:22 +00001658 true_vifi = ipmr_find_vif(mrt, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001659
Patrick McHardy0c122952010-04-13 05:03:22 +00001660 if (true_vifi >= 0 && mrt->mroute_do_assert &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001661 /* pimsm uses asserts, when switching from RPT to SPT,
1662 so that we cannot check that packet arrived on an oif.
1663 It is bad, but otherwise we would need to move pretty
1664 large chunk of pimd to kernel. Ough... --ANK
1665 */
Patrick McHardy0c122952010-04-13 05:03:22 +00001666 (mrt->mroute_do_pim ||
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001667 cache->mfc_un.res.ttls[true_vifi] < 255) &&
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001668 time_after(jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1670 cache->mfc_un.res.last_assert = jiffies;
Patrick McHardy0c122952010-04-13 05:03:22 +00001671 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672 }
1673 goto dont_forward;
1674 }
1675
Patrick McHardy0c122952010-04-13 05:03:22 +00001676 mrt->vif_table[vif].pkt_in++;
1677 mrt->vif_table[vif].bytes_in += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678
1679 /*
1680 * Forward the frame
1681 */
1682 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001683 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 if (psend != -1) {
1685 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1686 if (skb2)
Patrick McHardy0c122952010-04-13 05:03:22 +00001687 ipmr_queue_xmit(net, mrt, skb2, cache,
1688 psend);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001689 }
Jianjun Kongc354e122008-11-03 00:28:02 -08001690 psend = ct;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 }
1692 }
1693 if (psend != -1) {
1694 if (local) {
1695 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1696 if (skb2)
Patrick McHardy0c122952010-04-13 05:03:22 +00001697 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 } else {
Patrick McHardy0c122952010-04-13 05:03:22 +00001699 ipmr_queue_xmit(net, mrt, skb, cache, psend);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 return 0;
1701 }
1702 }
1703
1704dont_forward:
1705 if (!local)
1706 kfree_skb(skb);
1707 return 0;
1708}
1709
1710
1711/*
1712 * Multicast packets for forwarding arrive here
1713 */
1714
1715int ip_mr_input(struct sk_buff *skb)
1716{
1717 struct mfc_cache *cache;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001718 struct net *net = dev_net(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +00001719 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001720 struct mr_table *mrt;
1721 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001722
1723 /* Packet is looped back after forward, it should not be
1724 forwarded second time, but still can be delivered locally.
1725 */
1726 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1727 goto dont_forward;
1728
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001729 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1730 if (err < 0)
1731 return err;
1732
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733 if (!local) {
1734 if (IPCB(skb)->opt.router_alert) {
1735 if (ip_call_ra_chain(skb))
1736 return 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001737 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738 /* IGMPv1 (and broken IGMPv2 implementations sort of
1739 Cisco IOS <= 11.2(8)) do not put router alert
1740 option to IGMP packets destined to routable
1741 groups. It is very bad, because it means
1742 that we can forward NO IGMP messages.
1743 */
1744 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001745 if (mrt->mroute_sk) {
Patrick McHardy2715bcf2005-06-21 14:06:24 -07001746 nf_reset(skb);
Patrick McHardy0c122952010-04-13 05:03:22 +00001747 raw_rcv(mrt->mroute_sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748 read_unlock(&mrt_lock);
1749 return 0;
1750 }
1751 read_unlock(&mrt_lock);
1752 }
1753 }
1754
1755 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001756 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757
1758 /*
1759 * No usable cache entry
1760 */
Jianjun Kongc354e122008-11-03 00:28:02 -08001761 if (cache == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762 int vif;
1763
1764 if (local) {
1765 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1766 ip_local_deliver(skb);
1767 if (skb2 == NULL) {
1768 read_unlock(&mrt_lock);
1769 return -ENOBUFS;
1770 }
1771 skb = skb2;
1772 }
1773
Patrick McHardy0c122952010-04-13 05:03:22 +00001774 vif = ipmr_find_vif(mrt, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775 if (vif >= 0) {
Eric Dumazet0eae88f2010-04-20 19:06:52 -07001776 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777 read_unlock(&mrt_lock);
1778
Eric Dumazet0eae88f2010-04-20 19:06:52 -07001779 return err2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780 }
1781 read_unlock(&mrt_lock);
1782 kfree_skb(skb);
1783 return -ENODEV;
1784 }
1785
Patrick McHardy0c122952010-04-13 05:03:22 +00001786 ip_mr_forward(net, mrt, skb, cache, local);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787
1788 read_unlock(&mrt_lock);
1789
1790 if (local)
1791 return ip_local_deliver(skb);
1792
1793 return 0;
1794
1795dont_forward:
1796 if (local)
1797 return ip_local_deliver(skb);
1798 kfree_skb(skb);
1799 return 0;
1800}
1801
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001802#ifdef CONFIG_IP_PIMSM
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001803static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1804 unsigned int pimlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001805{
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001806 struct net_device *reg_dev = NULL;
1807 struct iphdr *encap;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001808
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001809 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001810 /*
1811 Check that:
1812 a. packet is really destinted to a multicast group
1813 b. packet is not a NULL-REGISTER
1814 c. packet is not truncated
1815 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001816 if (!ipv4_is_multicast(encap->daddr) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817 encap->tot_len == 0 ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001818 ntohs(encap->tot_len) + pimlen > skb->len)
1819 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820
1821 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001822 if (mrt->mroute_reg_vif_num >= 0)
1823 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824 if (reg_dev)
1825 dev_hold(reg_dev);
1826 read_unlock(&mrt_lock);
1827
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001828 if (reg_dev == NULL)
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001829 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001831 skb->mac_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832 skb_pull(skb, (u8*)encap - skb->data);
Arnaldo Carvalho de Melo31c77112007-03-10 19:04:55 -03001833 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834 skb->dev = reg_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001835 skb->protocol = htons(ETH_P_IP);
1836 skb->ip_summed = 0;
1837 skb->pkt_type = PACKET_HOST;
Eric Dumazetadf30902009-06-02 05:19:30 +00001838 skb_dst_drop(skb);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001839 reg_dev->stats.rx_bytes += skb->len;
1840 reg_dev->stats.rx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001841 nf_reset(skb);
1842 netif_rx(skb);
1843 dev_put(reg_dev);
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001844
Linus Torvalds1da177e2005-04-16 15:20:36 -07001845 return 0;
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001846}
1847#endif
1848
1849#ifdef CONFIG_IP_PIMSM_V1
1850/*
1851 * Handle IGMP messages of PIMv1
1852 */
1853
1854int pim_rcv_v1(struct sk_buff * skb)
1855{
1856 struct igmphdr *pim;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001857 struct net *net = dev_net(skb->dev);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001858 struct mr_table *mrt;
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001859
1860 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1861 goto drop;
1862
1863 pim = igmp_hdr(skb);
1864
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001865 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1866 goto drop;
1867
Patrick McHardy0c122952010-04-13 05:03:22 +00001868 if (!mrt->mroute_do_pim ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001869 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1870 goto drop;
1871
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001872 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001873drop:
1874 kfree_skb(skb);
1875 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876 return 0;
1877}
1878#endif
1879
1880#ifdef CONFIG_IP_PIMSM_V2
1881static int pim_rcv(struct sk_buff * skb)
1882{
1883 struct pimreghdr *pim;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001884 struct net *net = dev_net(skb->dev);
1885 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001886
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001887 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888 goto drop;
1889
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001890 pim = (struct pimreghdr *)skb_transport_header(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001891 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001892 (pim->flags&PIM_NULL_REGISTER) ||
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001893 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
Al Virod3bc23e2006-11-14 21:24:49 -08001894 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001895 goto drop;
1896
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001897 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1898 goto drop;
1899
1900 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001901drop:
1902 kfree_skb(skb);
1903 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904 return 0;
1905}
1906#endif
1907
Patrick McHardycb6a4e42010-04-26 16:02:08 +02001908static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1909 struct mfc_cache *c, struct rtmsg *rtm)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910{
1911 int ct;
1912 struct rtnexthop *nhp;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001913 u8 *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914 struct rtattr *mp_head;
1915
Nicolas Dichtel74381892010-03-25 23:45:35 +00001916 /* If cache is unresolved, don't try to parse IIF and OIF */
1917 if (c->mfc_parent > MAXVIFS)
1918 return -ENOENT;
1919
Patrick McHardy0c122952010-04-13 05:03:22 +00001920 if (VIF_EXISTS(mrt, c->mfc_parent))
1921 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922
Jianjun Kongc354e122008-11-03 00:28:02 -08001923 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924
1925 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001926 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1928 goto rtattr_failure;
Jianjun Kongc354e122008-11-03 00:28:02 -08001929 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001930 nhp->rtnh_flags = 0;
1931 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
Patrick McHardy0c122952010-04-13 05:03:22 +00001932 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933 nhp->rtnh_len = sizeof(*nhp);
1934 }
1935 }
1936 mp_head->rta_type = RTA_MULTIPATH;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001937 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001938 rtm->rtm_type = RTN_MULTICAST;
1939 return 1;
1940
1941rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001942 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001943 return -EMSGSIZE;
1944}
1945
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001946int ipmr_get_route(struct net *net,
1947 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948{
1949 int err;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001950 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001951 struct mfc_cache *cache;
Eric Dumazet511c3f92009-06-02 05:14:27 +00001952 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001954 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1955 if (mrt == NULL)
1956 return -ENOENT;
1957
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001959 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960
Jianjun Kongc354e122008-11-03 00:28:02 -08001961 if (cache == NULL) {
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001962 struct sk_buff *skb2;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001963 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001964 struct net_device *dev;
1965 int vif;
1966
1967 if (nowait) {
1968 read_unlock(&mrt_lock);
1969 return -EAGAIN;
1970 }
1971
1972 dev = skb->dev;
Patrick McHardy0c122952010-04-13 05:03:22 +00001973 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001974 read_unlock(&mrt_lock);
1975 return -ENODEV;
1976 }
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001977 skb2 = skb_clone(skb, GFP_ATOMIC);
1978 if (!skb2) {
1979 read_unlock(&mrt_lock);
1980 return -ENOMEM;
1981 }
1982
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -07001983 skb_push(skb2, sizeof(struct iphdr));
1984 skb_reset_network_header(skb2);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001985 iph = ip_hdr(skb2);
1986 iph->ihl = sizeof(struct iphdr) >> 2;
1987 iph->saddr = rt->rt_src;
1988 iph->daddr = rt->rt_dst;
1989 iph->version = 0;
Patrick McHardy0c122952010-04-13 05:03:22 +00001990 err = ipmr_cache_unresolved(mrt, vif, skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991 read_unlock(&mrt_lock);
1992 return err;
1993 }
1994
1995 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1996 cache->mfc_flags |= MFC_NOTIFY;
Patrick McHardycb6a4e42010-04-26 16:02:08 +02001997 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001998 read_unlock(&mrt_lock);
1999 return err;
2000}
2001
Patrick McHardycb6a4e42010-04-26 16:02:08 +02002002static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2003 u32 pid, u32 seq, struct mfc_cache *c)
2004{
2005 struct nlmsghdr *nlh;
2006 struct rtmsg *rtm;
2007
2008 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2009 if (nlh == NULL)
2010 return -EMSGSIZE;
2011
2012 rtm = nlmsg_data(nlh);
2013 rtm->rtm_family = RTNL_FAMILY_IPMR;
2014 rtm->rtm_dst_len = 32;
2015 rtm->rtm_src_len = 32;
2016 rtm->rtm_tos = 0;
2017 rtm->rtm_table = mrt->id;
2018 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2019 rtm->rtm_type = RTN_MULTICAST;
2020 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2021 rtm->rtm_protocol = RTPROT_UNSPEC;
2022 rtm->rtm_flags = 0;
2023
2024 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2025 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2026
2027 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2028 goto nla_put_failure;
2029
2030 return nlmsg_end(skb, nlh);
2031
2032nla_put_failure:
2033 nlmsg_cancel(skb, nlh);
2034 return -EMSGSIZE;
2035}
2036
2037static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2038{
2039 struct net *net = sock_net(skb->sk);
2040 struct mr_table *mrt;
2041 struct mfc_cache *mfc;
2042 unsigned int t = 0, s_t;
2043 unsigned int h = 0, s_h;
2044 unsigned int e = 0, s_e;
2045
2046 s_t = cb->args[0];
2047 s_h = cb->args[1];
2048 s_e = cb->args[2];
2049
2050 read_lock(&mrt_lock);
2051 ipmr_for_each_table(mrt, net) {
2052 if (t < s_t)
2053 goto next_table;
2054 if (t > s_t)
2055 s_h = 0;
2056 for (h = s_h; h < MFC_LINES; h++) {
2057 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2058 if (e < s_e)
2059 goto next_entry;
2060 if (ipmr_fill_mroute(mrt, skb,
2061 NETLINK_CB(cb->skb).pid,
2062 cb->nlh->nlmsg_seq,
2063 mfc) < 0)
2064 goto done;
2065next_entry:
2066 e++;
2067 }
2068 e = s_e = 0;
2069 }
2070 s_h = 0;
2071next_table:
2072 t++;
2073 }
2074done:
2075 read_unlock(&mrt_lock);
2076
2077 cb->args[2] = e;
2078 cb->args[1] = h;
2079 cb->args[0] = t;
2080
2081 return skb->len;
2082}
2083
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002084#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07002085/*
2086 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2087 */
2088struct ipmr_vif_iter {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002089 struct seq_net_private p;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002090 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002091 int ct;
2092};
2093
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002094static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2095 struct ipmr_vif_iter *iter,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002096 loff_t pos)
2097{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002098 struct mr_table *mrt = iter->mrt;
Patrick McHardy0c122952010-04-13 05:03:22 +00002099
2100 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2101 if (!VIF_EXISTS(mrt, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002102 continue;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002103 if (pos-- == 0)
Patrick McHardy0c122952010-04-13 05:03:22 +00002104 return &mrt->vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002105 }
2106 return NULL;
2107}
2108
2109static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08002110 __acquires(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002111{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002112 struct ipmr_vif_iter *iter = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002113 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002114 struct mr_table *mrt;
2115
2116 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2117 if (mrt == NULL)
2118 return ERR_PTR(-ENOENT);
2119
2120 iter->mrt = mrt;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002121
Linus Torvalds1da177e2005-04-16 15:20:36 -07002122 read_lock(&mrt_lock);
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002123 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002124 : SEQ_START_TOKEN;
2125}
2126
2127static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2128{
2129 struct ipmr_vif_iter *iter = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002130 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002131 struct mr_table *mrt = iter->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002132
2133 ++*pos;
2134 if (v == SEQ_START_TOKEN)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002135 return ipmr_vif_seq_idx(net, iter, 0);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002136
Patrick McHardy0c122952010-04-13 05:03:22 +00002137 while (++iter->ct < mrt->maxvif) {
2138 if (!VIF_EXISTS(mrt, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139 continue;
Patrick McHardy0c122952010-04-13 05:03:22 +00002140 return &mrt->vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002141 }
2142 return NULL;
2143}
2144
2145static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08002146 __releases(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002147{
2148 read_unlock(&mrt_lock);
2149}
2150
2151static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2152{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002153 struct ipmr_vif_iter *iter = seq->private;
2154 struct mr_table *mrt = iter->mrt;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002155
Linus Torvalds1da177e2005-04-16 15:20:36 -07002156 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002157 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002158 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2159 } else {
2160 const struct vif_device *vif = v;
2161 const char *name = vif->dev ? vif->dev->name : "none";
2162
2163 seq_printf(seq,
2164 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
Patrick McHardy0c122952010-04-13 05:03:22 +00002165 vif - mrt->vif_table,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002166 name, vif->bytes_in, vif->pkt_in,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002167 vif->bytes_out, vif->pkt_out,
2168 vif->flags, vif->local, vif->remote);
2169 }
2170 return 0;
2171}
2172
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002173static const struct seq_operations ipmr_vif_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002174 .start = ipmr_vif_seq_start,
2175 .next = ipmr_vif_seq_next,
2176 .stop = ipmr_vif_seq_stop,
2177 .show = ipmr_vif_seq_show,
2178};
2179
2180static int ipmr_vif_open(struct inode *inode, struct file *file)
2181{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002182 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2183 sizeof(struct ipmr_vif_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002184}
2185
Arjan van de Ven9a321442007-02-12 00:55:35 -08002186static const struct file_operations ipmr_vif_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002187 .owner = THIS_MODULE,
2188 .open = ipmr_vif_open,
2189 .read = seq_read,
2190 .llseek = seq_lseek,
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002191 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192};
2193
2194struct ipmr_mfc_iter {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002195 struct seq_net_private p;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002196 struct mr_table *mrt;
Patrick McHardy862465f2010-04-13 05:03:21 +00002197 struct list_head *cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002198 int ct;
2199};
2200
2201
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002202static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2203 struct ipmr_mfc_iter *it, loff_t pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002204{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002205 struct mr_table *mrt = it->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002206 struct mfc_cache *mfc;
2207
Linus Torvalds1da177e2005-04-16 15:20:36 -07002208 read_lock(&mrt_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00002209 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
Patrick McHardy0c122952010-04-13 05:03:22 +00002210 it->cache = &mrt->mfc_cache_array[it->ct];
Patrick McHardy862465f2010-04-13 05:03:21 +00002211 list_for_each_entry(mfc, it->cache, list)
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002212 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002213 return mfc;
Patrick McHardy862465f2010-04-13 05:03:21 +00002214 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215 read_unlock(&mrt_lock);
2216
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00002218 it->cache = &mrt->mfc_unres_queue;
Patrick McHardy862465f2010-04-13 05:03:21 +00002219 list_for_each_entry(mfc, it->cache, list)
Patrick McHardye258beb2010-04-13 05:03:19 +00002220 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002221 return mfc;
2222 spin_unlock_bh(&mfc_unres_lock);
2223
2224 it->cache = NULL;
2225 return NULL;
2226}
2227
2228
2229static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2230{
2231 struct ipmr_mfc_iter *it = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002232 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002233 struct mr_table *mrt;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002234
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002235 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2236 if (mrt == NULL)
2237 return ERR_PTR(-ENOENT);
2238
2239 it->mrt = mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002240 it->cache = NULL;
2241 it->ct = 0;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002242 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002243 : SEQ_START_TOKEN;
2244}
2245
2246static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2247{
2248 struct mfc_cache *mfc = v;
2249 struct ipmr_mfc_iter *it = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002250 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002251 struct mr_table *mrt = it->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002252
2253 ++*pos;
2254
2255 if (v == SEQ_START_TOKEN)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002256 return ipmr_mfc_seq_idx(net, seq->private, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002257
Patrick McHardy862465f2010-04-13 05:03:21 +00002258 if (mfc->list.next != it->cache)
2259 return list_entry(mfc->list.next, struct mfc_cache, list);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002260
Patrick McHardy0c122952010-04-13 05:03:22 +00002261 if (it->cache == &mrt->mfc_unres_queue)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262 goto end_of_list;
2263
Patrick McHardy0c122952010-04-13 05:03:22 +00002264 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002265
2266 while (++it->ct < MFC_LINES) {
Patrick McHardy0c122952010-04-13 05:03:22 +00002267 it->cache = &mrt->mfc_cache_array[it->ct];
Patrick McHardy862465f2010-04-13 05:03:21 +00002268 if (list_empty(it->cache))
2269 continue;
2270 return list_first_entry(it->cache, struct mfc_cache, list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002271 }
2272
2273 /* exhausted cache_array, show unresolved */
2274 read_unlock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00002275 it->cache = &mrt->mfc_unres_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002276 it->ct = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002277
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00002279 if (!list_empty(it->cache))
2280 return list_first_entry(it->cache, struct mfc_cache, list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281
2282 end_of_list:
2283 spin_unlock_bh(&mfc_unres_lock);
2284 it->cache = NULL;
2285
2286 return NULL;
2287}
2288
2289static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2290{
2291 struct ipmr_mfc_iter *it = seq->private;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002292 struct mr_table *mrt = it->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002293
Patrick McHardy0c122952010-04-13 05:03:22 +00002294 if (it->cache == &mrt->mfc_unres_queue)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002295 spin_unlock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00002296 else if (it->cache == &mrt->mfc_cache_array[it->ct])
Linus Torvalds1da177e2005-04-16 15:20:36 -07002297 read_unlock(&mrt_lock);
2298}
2299
2300static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2301{
2302 int n;
2303
2304 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002305 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002306 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2307 } else {
2308 const struct mfc_cache *mfc = v;
2309 const struct ipmr_mfc_iter *it = seq->private;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002310 const struct mr_table *mrt = it->mrt;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002311
Eric Dumazet0eae88f2010-04-20 19:06:52 -07002312 seq_printf(seq, "%08X %08X %-3hd",
2313 (__force u32) mfc->mfc_mcastgrp,
2314 (__force u32) mfc->mfc_origin,
Benjamin Thery1ea472e2008-12-03 22:21:47 -08002315 mfc->mfc_parent);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002316
Patrick McHardy0c122952010-04-13 05:03:22 +00002317 if (it->cache != &mrt->mfc_unres_queue) {
Benjamin Thery1ea472e2008-12-03 22:21:47 -08002318 seq_printf(seq, " %8lu %8lu %8lu",
2319 mfc->mfc_un.res.pkt,
2320 mfc->mfc_un.res.bytes,
2321 mfc->mfc_un.res.wrong_if);
Stephen Hemminger132adf52007-03-08 20:44:43 -08002322 for (n = mfc->mfc_un.res.minvif;
2323 n < mfc->mfc_un.res.maxvif; n++ ) {
Patrick McHardy0c122952010-04-13 05:03:22 +00002324 if (VIF_EXISTS(mrt, n) &&
Benjamin Therycf958ae32009-01-22 04:56:16 +00002325 mfc->mfc_un.res.ttls[n] < 255)
2326 seq_printf(seq,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002327 " %2d:%-3d",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328 n, mfc->mfc_un.res.ttls[n]);
2329 }
Benjamin Thery1ea472e2008-12-03 22:21:47 -08002330 } else {
2331 /* unresolved mfc_caches don't contain
2332 * pkt, bytes and wrong_if values
2333 */
2334 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002335 }
2336 seq_putc(seq, '\n');
2337 }
2338 return 0;
2339}
2340
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002341static const struct seq_operations ipmr_mfc_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002342 .start = ipmr_mfc_seq_start,
2343 .next = ipmr_mfc_seq_next,
2344 .stop = ipmr_mfc_seq_stop,
2345 .show = ipmr_mfc_seq_show,
2346};
2347
2348static int ipmr_mfc_open(struct inode *inode, struct file *file)
2349{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002350 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2351 sizeof(struct ipmr_mfc_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352}
2353
Arjan van de Ven9a321442007-02-12 00:55:35 -08002354static const struct file_operations ipmr_mfc_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355 .owner = THIS_MODULE,
2356 .open = ipmr_mfc_open,
2357 .read = seq_read,
2358 .llseek = seq_lseek,
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002359 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002360};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002361#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002362
2363#ifdef CONFIG_IP_PIMSM_V2
Alexey Dobriyan32613092009-09-14 12:21:47 +00002364static const struct net_protocol pim_protocol = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002365 .handler = pim_rcv,
Tom Goff403dbb92009-06-14 03:16:13 -07002366 .netns_ok = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002367};
2368#endif
2369
2370
2371/*
2372 * Setup for IP multicast routing
2373 */
Benjamin Therycf958ae32009-01-22 04:56:16 +00002374static int __net_init ipmr_net_init(struct net *net)
2375{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002376 int err;
Benjamin Therycf958ae32009-01-22 04:56:16 +00002377
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002378 err = ipmr_rules_init(net);
2379 if (err < 0)
Benjamin Therycf958ae32009-01-22 04:56:16 +00002380 goto fail;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002381
2382#ifdef CONFIG_PROC_FS
2383 err = -ENOMEM;
2384 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2385 goto proc_vif_fail;
2386 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2387 goto proc_cache_fail;
2388#endif
Benjamin Thery2bb8b262009-01-22 04:56:18 +00002389 return 0;
2390
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002391#ifdef CONFIG_PROC_FS
2392proc_cache_fail:
2393 proc_net_remove(net, "ip_mr_vif");
2394proc_vif_fail:
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002395 ipmr_rules_exit(net);
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002396#endif
Benjamin Therycf958ae32009-01-22 04:56:16 +00002397fail:
2398 return err;
2399}
2400
2401static void __net_exit ipmr_net_exit(struct net *net)
2402{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002403#ifdef CONFIG_PROC_FS
2404 proc_net_remove(net, "ip_mr_cache");
2405 proc_net_remove(net, "ip_mr_vif");
2406#endif
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002407 ipmr_rules_exit(net);
Benjamin Therycf958ae32009-01-22 04:56:16 +00002408}
2409
2410static struct pernet_operations ipmr_net_ops = {
2411 .init = ipmr_net_init,
2412 .exit = ipmr_net_exit,
2413};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002414
Wang Chen03d2f892008-07-03 12:13:36 +08002415int __init ip_mr_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416{
Wang Chen03d2f892008-07-03 12:13:36 +08002417 int err;
2418
Linus Torvalds1da177e2005-04-16 15:20:36 -07002419 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2420 sizeof(struct mfc_cache),
Alexey Dobriyane5d679f2006-08-26 19:25:52 -07002421 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
Paul Mundt20c2df82007-07-20 10:11:58 +09002422 NULL);
Wang Chen03d2f892008-07-03 12:13:36 +08002423 if (!mrt_cachep)
2424 return -ENOMEM;
2425
Benjamin Therycf958ae32009-01-22 04:56:16 +00002426 err = register_pernet_subsys(&ipmr_net_ops);
2427 if (err)
2428 goto reg_pernet_fail;
2429
Wang Chen03d2f892008-07-03 12:13:36 +08002430 err = register_netdevice_notifier(&ip_mr_notifier);
2431 if (err)
2432 goto reg_notif_fail;
Tom Goff403dbb92009-06-14 03:16:13 -07002433#ifdef CONFIG_IP_PIMSM_V2
2434 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2435 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2436 err = -EAGAIN;
2437 goto add_proto_fail;
2438 }
2439#endif
Patrick McHardycb6a4e42010-04-26 16:02:08 +02002440 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
Wang Chen03d2f892008-07-03 12:13:36 +08002441 return 0;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002442
Tom Goff403dbb92009-06-14 03:16:13 -07002443#ifdef CONFIG_IP_PIMSM_V2
2444add_proto_fail:
2445 unregister_netdevice_notifier(&ip_mr_notifier);
2446#endif
Benjamin Theryc3e38892008-11-19 14:07:41 -08002447reg_notif_fail:
Benjamin Therycf958ae32009-01-22 04:56:16 +00002448 unregister_pernet_subsys(&ipmr_net_ops);
2449reg_pernet_fail:
Benjamin Theryc3e38892008-11-19 14:07:41 -08002450 kmem_cache_destroy(mrt_cachep);
Wang Chen03d2f892008-07-03 12:13:36 +08002451 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452}