blob: 5df5fd74c6d187f4175b3028ccb690c318664b76 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
Alan Cox113aa832008-10-13 19:01:08 -07004 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070012 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080032#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080049#include <linux/if_ether.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090050#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020051#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020055#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070056#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070065#include <net/netlink.h>
Patrick McHardyf0ad0862010-04-13 05:03:23 +000066#include <net/fib_rules.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070067
68#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69#define CONFIG_IP_PIMSM 1
70#endif
71
Patrick McHardy0c122952010-04-13 05:03:22 +000072struct mr_table {
Patrick McHardyf0ad0862010-04-13 05:03:23 +000073 struct list_head list;
74 u32 id;
Patrick McHardy0c122952010-04-13 05:03:22 +000075 struct sock *mroute_sk;
76 struct timer_list ipmr_expire_timer;
77 struct list_head mfc_unres_queue;
78 struct list_head mfc_cache_array[MFC_LINES];
79 struct vif_device vif_table[MAXVIFS];
80 int maxvif;
81 atomic_t cache_resolve_queue_len;
82 int mroute_do_assert;
83 int mroute_do_pim;
84#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
85 int mroute_reg_vif_num;
86#endif
87};
88
Patrick McHardyf0ad0862010-04-13 05:03:23 +000089struct ipmr_rule {
90 struct fib_rule common;
91};
92
93struct ipmr_result {
94 struct mr_table *mrt;
95};
96
Linus Torvalds1da177e2005-04-16 15:20:36 -070097/* Big lock, protecting vif table, mrt cache and mroute socket state.
98 Note that the changes are semaphored via rtnl_lock.
99 */
100
101static DEFINE_RWLOCK(mrt_lock);
102
103/*
104 * Multicast router control variables
105 */
106
Patrick McHardy0c122952010-04-13 05:03:22 +0000107#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109/* Special spinlock for queue of unresolved entries */
110static DEFINE_SPINLOCK(mfc_unres_lock);
111
112/* We return to original Alan's scheme. Hash table of resolved
113 entries is changed only in process context and protected
114 with weak lock mrt_lock. Queue of unresolved entries is protected
115 with strong spinlock mfc_unres_lock.
116
117 In this case data path is free of exclusive locks at all.
118 */
119
Christoph Lametere18b8902006-12-06 20:33:20 -0800120static struct kmem_cache *mrt_cachep __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000122static struct mr_table *ipmr_new_table(struct net *net, u32 id);
Patrick McHardy0c122952010-04-13 05:03:22 +0000123static int ip_mr_forward(struct net *net, struct mr_table *mrt,
124 struct sk_buff *skb, struct mfc_cache *cache,
125 int local);
126static int ipmr_cache_report(struct mr_table *mrt,
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000127 struct sk_buff *pkt, vifi_t vifi, int assert);
Patrick McHardy0c122952010-04-13 05:03:22 +0000128static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000129 struct mfc_cache *c, struct rtmsg *rtm);
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000130static void ipmr_expire_process(unsigned long arg);
131
132#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
133#define ipmr_for_each_table(mrt, net) \
134 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
135
136static struct mr_table *ipmr_get_table(struct net *net, u32 id)
137{
138 struct mr_table *mrt;
139
140 ipmr_for_each_table(mrt, net) {
141 if (mrt->id == id)
142 return mrt;
143 }
144 return NULL;
145}
146
147static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
148 struct mr_table **mrt)
149{
150 struct ipmr_result res;
151 struct fib_lookup_arg arg = { .result = &res, };
152 int err;
153
154 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
155 if (err < 0)
156 return err;
157 *mrt = res.mrt;
158 return 0;
159}
160
161static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
162 int flags, struct fib_lookup_arg *arg)
163{
164 struct ipmr_result *res = arg->result;
165 struct mr_table *mrt;
166
167 switch (rule->action) {
168 case FR_ACT_TO_TBL:
169 break;
170 case FR_ACT_UNREACHABLE:
171 return -ENETUNREACH;
172 case FR_ACT_PROHIBIT:
173 return -EACCES;
174 case FR_ACT_BLACKHOLE:
175 default:
176 return -EINVAL;
177 }
178
179 mrt = ipmr_get_table(rule->fr_net, rule->table);
180 if (mrt == NULL)
181 return -EAGAIN;
182 res->mrt = mrt;
183 return 0;
184}
185
186static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
187{
188 return 1;
189}
190
191static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
192 FRA_GENERIC_POLICY,
193};
194
195static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
196 struct fib_rule_hdr *frh, struct nlattr **tb)
197{
198 return 0;
199}
200
201static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
202 struct nlattr **tb)
203{
204 return 1;
205}
206
207static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
208 struct fib_rule_hdr *frh)
209{
210 frh->dst_len = 0;
211 frh->src_len = 0;
212 frh->tos = 0;
213 return 0;
214}
215
216static struct fib_rules_ops ipmr_rules_ops_template = {
217 .family = FIB_RULES_IPMR,
218 .rule_size = sizeof(struct ipmr_rule),
219 .addr_size = sizeof(u32),
220 .action = ipmr_rule_action,
221 .match = ipmr_rule_match,
222 .configure = ipmr_rule_configure,
223 .compare = ipmr_rule_compare,
224 .default_pref = fib_default_rule_pref,
225 .fill = ipmr_rule_fill,
226 .nlgroup = RTNLGRP_IPV4_RULE,
227 .policy = ipmr_rule_policy,
228 .owner = THIS_MODULE,
229};
230
231static int __net_init ipmr_rules_init(struct net *net)
232{
233 struct fib_rules_ops *ops;
234 struct mr_table *mrt;
235 int err;
236
237 ops = fib_rules_register(&ipmr_rules_ops_template, net);
238 if (IS_ERR(ops))
239 return PTR_ERR(ops);
240
241 INIT_LIST_HEAD(&net->ipv4.mr_tables);
242
243 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
244 if (mrt == NULL) {
245 err = -ENOMEM;
246 goto err1;
247 }
248
249 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
250 if (err < 0)
251 goto err2;
252
253 net->ipv4.mr_rules_ops = ops;
254 return 0;
255
256err2:
257 kfree(mrt);
258err1:
259 fib_rules_unregister(ops);
260 return err;
261}
262
263static void __net_exit ipmr_rules_exit(struct net *net)
264{
265 struct mr_table *mrt, *next;
266
267 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
268 kfree(mrt);
269 fib_rules_unregister(net->ipv4.mr_rules_ops);
270}
271#else
272#define ipmr_for_each_table(mrt, net) \
273 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
274
275static struct mr_table *ipmr_get_table(struct net *net, u32 id)
276{
277 return net->ipv4.mrt;
278}
279
280static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
281 struct mr_table **mrt)
282{
283 *mrt = net->ipv4.mrt;
284 return 0;
285}
286
287static int __net_init ipmr_rules_init(struct net *net)
288{
289 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
290 return net->ipv4.mrt ? 0 : -ENOMEM;
291}
292
293static void __net_exit ipmr_rules_exit(struct net *net)
294{
295 kfree(net->ipv4.mrt);
296}
297#endif
298
299static struct mr_table *ipmr_new_table(struct net *net, u32 id)
300{
301 struct mr_table *mrt;
302 unsigned int i;
303
304 mrt = ipmr_get_table(net, id);
305 if (mrt != NULL)
306 return mrt;
307
308 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
309 if (mrt == NULL)
310 return NULL;
311 mrt->id = id;
312
313 /* Forwarding cache */
314 for (i = 0; i < MFC_LINES; i++)
315 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
316
317 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
318
319 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
320 (unsigned long)mrt);
321
322#ifdef CONFIG_IP_PIMSM
323 mrt->mroute_reg_vif_num = -1;
324#endif
325#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
326 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
327#endif
328 return mrt;
329}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
332
Wang Chend6070322008-07-14 20:55:26 -0700333static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
334{
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000335 struct net *net = dev_net(dev);
336
Wang Chend6070322008-07-14 20:55:26 -0700337 dev_close(dev);
338
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000339 dev = __dev_get_by_name(net, "tunl0");
Wang Chend6070322008-07-14 20:55:26 -0700340 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800341 const struct net_device_ops *ops = dev->netdev_ops;
Wang Chend6070322008-07-14 20:55:26 -0700342 struct ifreq ifr;
Wang Chend6070322008-07-14 20:55:26 -0700343 struct ip_tunnel_parm p;
344
345 memset(&p, 0, sizeof(p));
346 p.iph.daddr = v->vifc_rmt_addr.s_addr;
347 p.iph.saddr = v->vifc_lcl_addr.s_addr;
348 p.iph.version = 4;
349 p.iph.ihl = 5;
350 p.iph.protocol = IPPROTO_IPIP;
351 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
352 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
353
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800354 if (ops->ndo_do_ioctl) {
355 mm_segment_t oldfs = get_fs();
356
357 set_fs(KERNEL_DS);
358 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
359 set_fs(oldfs);
360 }
Wang Chend6070322008-07-14 20:55:26 -0700361 }
362}
363
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364static
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000365struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366{
367 struct net_device *dev;
368
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000369 dev = __dev_get_by_name(net, "tunl0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370
371 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800372 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 int err;
374 struct ifreq ifr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375 struct ip_tunnel_parm p;
376 struct in_device *in_dev;
377
378 memset(&p, 0, sizeof(p));
379 p.iph.daddr = v->vifc_rmt_addr.s_addr;
380 p.iph.saddr = v->vifc_lcl_addr.s_addr;
381 p.iph.version = 4;
382 p.iph.ihl = 5;
383 p.iph.protocol = IPPROTO_IPIP;
384 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
Stephen Hemmingerba93ef72008-01-21 17:28:59 -0800385 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800387 if (ops->ndo_do_ioctl) {
388 mm_segment_t oldfs = get_fs();
389
390 set_fs(KERNEL_DS);
391 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
392 set_fs(oldfs);
393 } else
394 err = -EOPNOTSUPP;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395
396 dev = NULL;
397
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000398 if (err == 0 &&
399 (dev = __dev_get_by_name(net, p.name)) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400 dev->flags |= IFF_MULTICAST;
401
Herbert Xue5ed6392005-10-03 14:35:55 -0700402 in_dev = __in_dev_get_rtnl(dev);
Herbert Xu71e27da2007-06-04 23:36:06 -0700403 if (in_dev == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700405
406 ipv4_devconf_setall(in_dev);
407 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408
409 if (dev_open(dev))
410 goto failure;
Wang Chen7dc00c82008-07-14 20:56:34 -0700411 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 }
413 }
414 return dev;
415
416failure:
417 /* allow the register to be completed before unregistering. */
418 rtnl_unlock();
419 rtnl_lock();
420
421 unregister_netdevice(dev);
422 return NULL;
423}
424
425#ifdef CONFIG_IP_PIMSM
426
Stephen Hemminger6fef4c02009-08-31 19:50:41 +0000427static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428{
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000429 struct net *net = dev_net(dev);
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000430 struct mr_table *mrt;
431 struct flowi fl = {
432 .oif = dev->ifindex,
433 .iif = skb->skb_iif,
434 .mark = skb->mark,
435 };
436 int err;
437
438 err = ipmr_fib_lookup(net, &fl, &mrt);
439 if (err < 0)
440 return err;
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000441
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 read_lock(&mrt_lock);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700443 dev->stats.tx_bytes += skb->len;
444 dev->stats.tx_packets++;
Patrick McHardy0c122952010-04-13 05:03:22 +0000445 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 read_unlock(&mrt_lock);
447 kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000448 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449}
450
Stephen Hemminger007c3832008-11-20 20:28:35 -0800451static const struct net_device_ops reg_vif_netdev_ops = {
452 .ndo_start_xmit = reg_vif_xmit,
453};
454
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455static void reg_vif_setup(struct net_device *dev)
456{
457 dev->type = ARPHRD_PIMREG;
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800458 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459 dev->flags = IFF_NOARP;
Stephen Hemminger007c3832008-11-20 20:28:35 -0800460 dev->netdev_ops = &reg_vif_netdev_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 dev->destructor = free_netdev;
Tom Goff403dbb92009-06-14 03:16:13 -0700462 dev->features |= NETIF_F_NETNS_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463}
464
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000465static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466{
467 struct net_device *dev;
468 struct in_device *in_dev;
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000469 char name[IFNAMSIZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000471 if (mrt->id == RT_TABLE_DEFAULT)
472 sprintf(name, "pimreg");
473 else
474 sprintf(name, "pimreg%u", mrt->id);
475
476 dev = alloc_netdev(0, name, reg_vif_setup);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700477
478 if (dev == NULL)
479 return NULL;
480
Tom Goff403dbb92009-06-14 03:16:13 -0700481 dev_net_set(dev, net);
482
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 if (register_netdevice(dev)) {
484 free_netdev(dev);
485 return NULL;
486 }
487 dev->iflink = 0;
488
Herbert Xu71e27da2007-06-04 23:36:06 -0700489 rcu_read_lock();
490 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
491 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700493 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494
Herbert Xu71e27da2007-06-04 23:36:06 -0700495 ipv4_devconf_setall(in_dev);
496 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
497 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
499 if (dev_open(dev))
500 goto failure;
501
Wang Chen7dc00c82008-07-14 20:56:34 -0700502 dev_hold(dev);
503
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 return dev;
505
506failure:
507 /* allow the register to be completed before unregistering. */
508 rtnl_unlock();
509 rtnl_lock();
510
511 unregister_netdevice(dev);
512 return NULL;
513}
514#endif
515
516/*
517 * Delete a VIF entry
Wang Chen7dc00c82008-07-14 20:56:34 -0700518 * @notify: Set to 1, if the caller is a notifier_call
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900520
Patrick McHardy0c122952010-04-13 05:03:22 +0000521static int vif_delete(struct mr_table *mrt, int vifi, int notify,
Eric Dumazetd17fa6f2009-10-28 05:21:38 +0000522 struct list_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523{
524 struct vif_device *v;
525 struct net_device *dev;
526 struct in_device *in_dev;
527
Patrick McHardy0c122952010-04-13 05:03:22 +0000528 if (vifi < 0 || vifi >= mrt->maxvif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529 return -EADDRNOTAVAIL;
530
Patrick McHardy0c122952010-04-13 05:03:22 +0000531 v = &mrt->vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532
533 write_lock_bh(&mrt_lock);
534 dev = v->dev;
535 v->dev = NULL;
536
537 if (!dev) {
538 write_unlock_bh(&mrt_lock);
539 return -EADDRNOTAVAIL;
540 }
541
542#ifdef CONFIG_IP_PIMSM
Patrick McHardy0c122952010-04-13 05:03:22 +0000543 if (vifi == mrt->mroute_reg_vif_num)
544 mrt->mroute_reg_vif_num = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545#endif
546
Patrick McHardy0c122952010-04-13 05:03:22 +0000547 if (vifi+1 == mrt->maxvif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 int tmp;
549 for (tmp=vifi-1; tmp>=0; tmp--) {
Patrick McHardy0c122952010-04-13 05:03:22 +0000550 if (VIF_EXISTS(mrt, tmp))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551 break;
552 }
Patrick McHardy0c122952010-04-13 05:03:22 +0000553 mrt->maxvif = tmp+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 }
555
556 write_unlock_bh(&mrt_lock);
557
558 dev_set_allmulti(dev, -1);
559
Herbert Xue5ed6392005-10-03 14:35:55 -0700560 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
Herbert Xu42f811b2007-06-04 23:34:44 -0700561 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 ip_rt_multicast_event(in_dev);
563 }
564
Wang Chen7dc00c82008-07-14 20:56:34 -0700565 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
Eric Dumazetd17fa6f2009-10-28 05:21:38 +0000566 unregister_netdevice_queue(dev, head);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567
568 dev_put(dev);
569 return 0;
570}
571
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000572static inline void ipmr_cache_free(struct mfc_cache *c)
573{
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000574 kmem_cache_free(mrt_cachep, c);
575}
576
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577/* Destroy an unresolved cache entry, killing queued skbs
578 and reporting error to netlink readers.
579 */
580
Patrick McHardy0c122952010-04-13 05:03:22 +0000581static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582{
Patrick McHardy0c122952010-04-13 05:03:22 +0000583 struct net *net = NULL; //mrt->net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700585 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586
Patrick McHardy0c122952010-04-13 05:03:22 +0000587 atomic_dec(&mrt->cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588
Jianjun Kongc354e122008-11-03 00:28:02 -0800589 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700590 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
592 nlh->nlmsg_type = NLMSG_ERROR;
593 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
594 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700595 e = NLMSG_DATA(nlh);
596 e->error = -ETIMEDOUT;
597 memset(&e->msg, 0, sizeof(e->msg));
Thomas Graf2942e902006-08-15 00:30:25 -0700598
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000599 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600 } else
601 kfree_skb(skb);
602 }
603
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000604 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605}
606
607
Patrick McHardye258beb2010-04-13 05:03:19 +0000608/* Timer process for the unresolved queue. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609
Patrick McHardye258beb2010-04-13 05:03:19 +0000610static void ipmr_expire_process(unsigned long arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611{
Patrick McHardy0c122952010-04-13 05:03:22 +0000612 struct mr_table *mrt = (struct mr_table *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613 unsigned long now;
614 unsigned long expires;
Patrick McHardy862465f2010-04-13 05:03:21 +0000615 struct mfc_cache *c, *next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616
617 if (!spin_trylock(&mfc_unres_lock)) {
Patrick McHardy0c122952010-04-13 05:03:22 +0000618 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619 return;
620 }
621
Patrick McHardy0c122952010-04-13 05:03:22 +0000622 if (list_empty(&mrt->mfc_unres_queue))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 goto out;
624
625 now = jiffies;
626 expires = 10*HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627
Patrick McHardy0c122952010-04-13 05:03:22 +0000628 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629 if (time_after(c->mfc_un.unres.expires, now)) {
630 unsigned long interval = c->mfc_un.unres.expires - now;
631 if (interval < expires)
632 expires = interval;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 continue;
634 }
635
Patrick McHardy862465f2010-04-13 05:03:21 +0000636 list_del(&c->list);
Patrick McHardy0c122952010-04-13 05:03:22 +0000637 ipmr_destroy_unres(mrt, c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 }
639
Patrick McHardy0c122952010-04-13 05:03:22 +0000640 if (!list_empty(&mrt->mfc_unres_queue))
641 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642
643out:
644 spin_unlock(&mfc_unres_lock);
645}
646
647/* Fill oifs list. It is called under write locked mrt_lock. */
648
Patrick McHardy0c122952010-04-13 05:03:22 +0000649static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000650 unsigned char *ttls)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651{
652 int vifi;
653
654 cache->mfc_un.res.minvif = MAXVIFS;
655 cache->mfc_un.res.maxvif = 0;
656 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
657
Patrick McHardy0c122952010-04-13 05:03:22 +0000658 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
659 if (VIF_EXISTS(mrt, vifi) &&
Benjamin Therycf958ae32009-01-22 04:56:16 +0000660 ttls[vifi] && ttls[vifi] < 255) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
662 if (cache->mfc_un.res.minvif > vifi)
663 cache->mfc_un.res.minvif = vifi;
664 if (cache->mfc_un.res.maxvif <= vifi)
665 cache->mfc_un.res.maxvif = vifi + 1;
666 }
667 }
668}
669
Patrick McHardy0c122952010-04-13 05:03:22 +0000670static int vif_add(struct net *net, struct mr_table *mrt,
671 struct vifctl *vifc, int mrtsock)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672{
673 int vifi = vifc->vifc_vifi;
Patrick McHardy0c122952010-04-13 05:03:22 +0000674 struct vif_device *v = &mrt->vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675 struct net_device *dev;
676 struct in_device *in_dev;
Wang Chend6070322008-07-14 20:55:26 -0700677 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678
679 /* Is vif busy ? */
Patrick McHardy0c122952010-04-13 05:03:22 +0000680 if (VIF_EXISTS(mrt, vifi))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681 return -EADDRINUSE;
682
683 switch (vifc->vifc_flags) {
684#ifdef CONFIG_IP_PIMSM
685 case VIFF_REGISTER:
686 /*
687 * Special Purpose VIF in PIM
688 * All the packets will be sent to the daemon
689 */
Patrick McHardy0c122952010-04-13 05:03:22 +0000690 if (mrt->mroute_reg_vif_num >= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691 return -EADDRINUSE;
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000692 dev = ipmr_reg_vif(net, mrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693 if (!dev)
694 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700695 err = dev_set_allmulti(dev, 1);
696 if (err) {
697 unregister_netdevice(dev);
Wang Chen7dc00c82008-07-14 20:56:34 -0700698 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700699 return err;
700 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701 break;
702#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900703 case VIFF_TUNNEL:
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000704 dev = ipmr_new_tunnel(net, vifc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705 if (!dev)
706 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700707 err = dev_set_allmulti(dev, 1);
708 if (err) {
709 ipmr_del_tunnel(dev, vifc);
Wang Chen7dc00c82008-07-14 20:56:34 -0700710 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700711 return err;
712 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713 break;
Ilia Kee5e81f2009-09-16 05:53:07 +0000714
715 case VIFF_USE_IFINDEX:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 case 0:
Ilia Kee5e81f2009-09-16 05:53:07 +0000717 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
718 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
719 if (dev && dev->ip_ptr == NULL) {
720 dev_put(dev);
721 return -EADDRNOTAVAIL;
722 }
723 } else
724 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
725
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726 if (!dev)
727 return -EADDRNOTAVAIL;
Wang Chend6070322008-07-14 20:55:26 -0700728 err = dev_set_allmulti(dev, 1);
Wang Chen7dc00c82008-07-14 20:56:34 -0700729 if (err) {
730 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700731 return err;
Wang Chen7dc00c82008-07-14 20:56:34 -0700732 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733 break;
734 default:
735 return -EINVAL;
736 }
737
Dan Carpenterd0490cf2009-11-11 02:03:54 +0000738 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
739 dev_put(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 return -EADDRNOTAVAIL;
Dan Carpenterd0490cf2009-11-11 02:03:54 +0000741 }
Herbert Xu42f811b2007-06-04 23:34:44 -0700742 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743 ip_rt_multicast_event(in_dev);
744
745 /*
746 * Fill in the VIF structures
747 */
Jianjun Kongc354e122008-11-03 00:28:02 -0800748 v->rate_limit = vifc->vifc_rate_limit;
749 v->local = vifc->vifc_lcl_addr.s_addr;
750 v->remote = vifc->vifc_rmt_addr.s_addr;
751 v->flags = vifc->vifc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 if (!mrtsock)
753 v->flags |= VIFF_STATIC;
Jianjun Kongc354e122008-11-03 00:28:02 -0800754 v->threshold = vifc->vifc_threshold;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755 v->bytes_in = 0;
756 v->bytes_out = 0;
757 v->pkt_in = 0;
758 v->pkt_out = 0;
759 v->link = dev->ifindex;
760 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
761 v->link = dev->iflink;
762
763 /* And finish update writing critical data */
764 write_lock_bh(&mrt_lock);
Jianjun Kongc354e122008-11-03 00:28:02 -0800765 v->dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766#ifdef CONFIG_IP_PIMSM
767 if (v->flags&VIFF_REGISTER)
Patrick McHardy0c122952010-04-13 05:03:22 +0000768 mrt->mroute_reg_vif_num = vifi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769#endif
Patrick McHardy0c122952010-04-13 05:03:22 +0000770 if (vifi+1 > mrt->maxvif)
771 mrt->maxvif = vifi+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 write_unlock_bh(&mrt_lock);
773 return 0;
774}
775
Patrick McHardy0c122952010-04-13 05:03:22 +0000776static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000777 __be32 origin,
778 __be32 mcastgrp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779{
Jianjun Kongc354e122008-11-03 00:28:02 -0800780 int line = MFC_HASH(mcastgrp, origin);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 struct mfc_cache *c;
782
Patrick McHardy0c122952010-04-13 05:03:22 +0000783 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
Patrick McHardy862465f2010-04-13 05:03:21 +0000784 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
785 return c;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786 }
Patrick McHardy862465f2010-04-13 05:03:21 +0000787 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788}
789
790/*
791 * Allocate a multicast cache entry
792 */
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000793static struct mfc_cache *ipmr_cache_alloc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794{
Jianjun Kongc354e122008-11-03 00:28:02 -0800795 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
796 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 c->mfc_un.res.minvif = MAXVIFS;
799 return c;
800}
801
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000802static struct mfc_cache *ipmr_cache_alloc_unres(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803{
Jianjun Kongc354e122008-11-03 00:28:02 -0800804 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
805 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 skb_queue_head_init(&c->mfc_un.unres.unresolved);
808 c->mfc_un.unres.expires = jiffies + 10*HZ;
809 return c;
810}
811
812/*
813 * A cache entry has gone into a resolved state from queued
814 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900815
Patrick McHardy0c122952010-04-13 05:03:22 +0000816static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
817 struct mfc_cache *uc, struct mfc_cache *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818{
819 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700820 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821
822 /*
823 * Play the pending entries through our router
824 */
825
Jianjun Kongc354e122008-11-03 00:28:02 -0800826 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700827 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
829
Patrick McHardy0c122952010-04-13 05:03:22 +0000830 if (ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700831 nlh->nlmsg_len = (skb_tail_pointer(skb) -
832 (u8 *)nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833 } else {
834 nlh->nlmsg_type = NLMSG_ERROR;
835 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
836 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700837 e = NLMSG_DATA(nlh);
838 e->error = -EMSGSIZE;
839 memset(&e->msg, 0, sizeof(e->msg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840 }
Thomas Graf2942e902006-08-15 00:30:25 -0700841
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000842 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 } else
Patrick McHardy0c122952010-04-13 05:03:22 +0000844 ip_mr_forward(net, mrt, skb, c, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845 }
846}
847
848/*
849 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
850 * expects the following bizarre scheme.
851 *
852 * Called under mrt_lock.
853 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900854
Patrick McHardy0c122952010-04-13 05:03:22 +0000855static int ipmr_cache_report(struct mr_table *mrt,
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000856 struct sk_buff *pkt, vifi_t vifi, int assert)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857{
858 struct sk_buff *skb;
Arnaldo Carvalho de Meloc9bdd4b2007-03-12 20:09:15 -0300859 const int ihl = ip_hdrlen(pkt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860 struct igmphdr *igmp;
861 struct igmpmsg *msg;
862 int ret;
863
864#ifdef CONFIG_IP_PIMSM
865 if (assert == IGMPMSG_WHOLEPKT)
866 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
867 else
868#endif
869 skb = alloc_skb(128, GFP_ATOMIC);
870
Stephen Hemminger132adf52007-03-08 20:44:43 -0800871 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872 return -ENOBUFS;
873
874#ifdef CONFIG_IP_PIMSM
875 if (assert == IGMPMSG_WHOLEPKT) {
876 /* Ugly, but we have no choice with this interface.
877 Duplicate old header, fix ihl, length etc.
878 And all this only to mangle msg->im_msgtype and
879 to set msg->im_mbz to "mbz" :-)
880 */
Arnaldo Carvalho de Melo878c8142007-03-11 22:38:29 -0300881 skb_push(skb, sizeof(struct iphdr));
882 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -0300883 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melo0272ffc2007-03-12 20:05:39 -0300884 msg = (struct igmpmsg *)skb_network_header(skb);
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700885 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 msg->im_msgtype = IGMPMSG_WHOLEPKT;
887 msg->im_mbz = 0;
Patrick McHardy0c122952010-04-13 05:03:22 +0000888 msg->im_vif = mrt->mroute_reg_vif_num;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700889 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
890 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
891 sizeof(struct iphdr));
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900892 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900894 {
895
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896 /*
897 * Copy the IP header
898 */
899
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700900 skb->network_header = skb->tail;
Arnaldo Carvalho de Meloddc7b8e2007-03-15 21:42:27 -0300901 skb_put(skb, ihl);
Arnaldo Carvalho de Melo27d7ff42007-03-31 11:55:19 -0300902 skb_copy_to_linear_data(skb, pkt->data, ihl);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700903 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
904 msg = (struct igmpmsg *)skb_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 msg->im_vif = vifi;
Eric Dumazetadf30902009-06-02 05:19:30 +0000906 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907
908 /*
909 * Add our header
910 */
911
Jianjun Kongc354e122008-11-03 00:28:02 -0800912 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 igmp->type =
914 msg->im_msgtype = assert;
915 igmp->code = 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700916 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700917 skb->transport_header = skb->network_header;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900918 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919
Patrick McHardy0c122952010-04-13 05:03:22 +0000920 if (mrt->mroute_sk == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 kfree_skb(skb);
922 return -EINVAL;
923 }
924
925 /*
926 * Deliver to mrouted
927 */
Patrick McHardy0c122952010-04-13 05:03:22 +0000928 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
Benjamin Thery70a269e2009-01-22 04:56:15 +0000929 if (ret < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 if (net_ratelimit())
931 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
932 kfree_skb(skb);
933 }
934
935 return ret;
936}
937
938/*
939 * Queue a packet for resolution. It gets locked cache entry!
940 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900941
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942static int
Patrick McHardy0c122952010-04-13 05:03:22 +0000943ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944{
Patrick McHardy862465f2010-04-13 05:03:21 +0000945 bool found = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 int err;
947 struct mfc_cache *c;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700948 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949
950 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +0000951 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
Patrick McHardye258beb2010-04-13 05:03:19 +0000952 if (c->mfc_mcastgrp == iph->daddr &&
Patrick McHardy862465f2010-04-13 05:03:21 +0000953 c->mfc_origin == iph->saddr) {
954 found = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 break;
Patrick McHardy862465f2010-04-13 05:03:21 +0000956 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 }
958
Patrick McHardy862465f2010-04-13 05:03:21 +0000959 if (!found) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960 /*
961 * Create a new entry if allowable
962 */
963
Patrick McHardy0c122952010-04-13 05:03:22 +0000964 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000965 (c = ipmr_cache_alloc_unres()) == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 spin_unlock_bh(&mfc_unres_lock);
967
968 kfree_skb(skb);
969 return -ENOBUFS;
970 }
971
972 /*
973 * Fill in the new cache entry
974 */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700975 c->mfc_parent = -1;
976 c->mfc_origin = iph->saddr;
977 c->mfc_mcastgrp = iph->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978
979 /*
980 * Reflect first query at mrouted.
981 */
Patrick McHardy0c122952010-04-13 05:03:22 +0000982 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000983 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900984 /* If the report failed throw the cache entry
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985 out - Brad Parker
986 */
987 spin_unlock_bh(&mfc_unres_lock);
988
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000989 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700990 kfree_skb(skb);
991 return err;
992 }
993
Patrick McHardy0c122952010-04-13 05:03:22 +0000994 atomic_inc(&mrt->cache_resolve_queue_len);
995 list_add(&c->list, &mrt->mfc_unres_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996
Patrick McHardy0c122952010-04-13 05:03:22 +0000997 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 }
999
1000 /*
1001 * See if we can append the packet
1002 */
1003 if (c->mfc_un.unres.unresolved.qlen>3) {
1004 kfree_skb(skb);
1005 err = -ENOBUFS;
1006 } else {
Jianjun Kongc354e122008-11-03 00:28:02 -08001007 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008 err = 0;
1009 }
1010
1011 spin_unlock_bh(&mfc_unres_lock);
1012 return err;
1013}
1014
1015/*
1016 * MFC cache manipulation by user space mroute daemon
1017 */
1018
Patrick McHardy0c122952010-04-13 05:03:22 +00001019static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020{
1021 int line;
Patrick McHardy862465f2010-04-13 05:03:21 +00001022 struct mfc_cache *c, *next;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023
Jianjun Kongc354e122008-11-03 00:28:02 -08001024 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025
Patrick McHardy0c122952010-04-13 05:03:22 +00001026 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1028 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1029 write_lock_bh(&mrt_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00001030 list_del(&c->list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031 write_unlock_bh(&mrt_lock);
1032
Benjamin Thery5c0a66f2009-01-22 04:56:17 +00001033 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034 return 0;
1035 }
1036 }
1037 return -ENOENT;
1038}
1039
Patrick McHardy0c122952010-04-13 05:03:22 +00001040static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1041 struct mfcctl *mfc, int mrtsock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042{
Patrick McHardy862465f2010-04-13 05:03:21 +00001043 bool found = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044 int line;
Patrick McHardy862465f2010-04-13 05:03:21 +00001045 struct mfc_cache *uc, *c;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046
Patrick McHardya50436f2010-03-17 06:04:14 +00001047 if (mfc->mfcc_parent >= MAXVIFS)
1048 return -ENFILE;
1049
Jianjun Kongc354e122008-11-03 00:28:02 -08001050 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051
Patrick McHardy0c122952010-04-13 05:03:22 +00001052 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
Patrick McHardy862465f2010-04-13 05:03:21 +00001054 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1055 found = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056 break;
Patrick McHardy862465f2010-04-13 05:03:21 +00001057 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058 }
1059
Patrick McHardy862465f2010-04-13 05:03:21 +00001060 if (found) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 write_lock_bh(&mrt_lock);
1062 c->mfc_parent = mfc->mfcc_parent;
Patrick McHardy0c122952010-04-13 05:03:22 +00001063 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064 if (!mrtsock)
1065 c->mfc_flags |= MFC_STATIC;
1066 write_unlock_bh(&mrt_lock);
1067 return 0;
1068 }
1069
Joe Perchesf97c1e02007-12-16 13:45:43 -08001070 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 return -EINVAL;
1072
Patrick McHardyd658f8a2010-04-13 05:03:20 +00001073 c = ipmr_cache_alloc();
Jianjun Kongc354e122008-11-03 00:28:02 -08001074 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075 return -ENOMEM;
1076
Jianjun Kongc354e122008-11-03 00:28:02 -08001077 c->mfc_origin = mfc->mfcc_origin.s_addr;
1078 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1079 c->mfc_parent = mfc->mfcc_parent;
Patrick McHardy0c122952010-04-13 05:03:22 +00001080 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 if (!mrtsock)
1082 c->mfc_flags |= MFC_STATIC;
1083
1084 write_lock_bh(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001085 list_add(&c->list, &mrt->mfc_cache_array[line]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086 write_unlock_bh(&mrt_lock);
1087
1088 /*
1089 * Check to see if we resolved a queued list. If so we
1090 * need to send on the frames and tidy up.
1091 */
1092 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001093 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
Patrick McHardye258beb2010-04-13 05:03:19 +00001094 if (uc->mfc_origin == c->mfc_origin &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
Patrick McHardy862465f2010-04-13 05:03:21 +00001096 list_del(&uc->list);
Patrick McHardy0c122952010-04-13 05:03:22 +00001097 atomic_dec(&mrt->cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098 break;
1099 }
1100 }
Patrick McHardy0c122952010-04-13 05:03:22 +00001101 if (list_empty(&mrt->mfc_unres_queue))
1102 del_timer(&mrt->ipmr_expire_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 spin_unlock_bh(&mfc_unres_lock);
1104
1105 if (uc) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001106 ipmr_cache_resolve(net, mrt, uc, c);
Benjamin Thery5c0a66f2009-01-22 04:56:17 +00001107 ipmr_cache_free(uc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001108 }
1109 return 0;
1110}
1111
1112/*
1113 * Close the multicast socket, and clear the vif tables etc
1114 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001115
Patrick McHardy0c122952010-04-13 05:03:22 +00001116static void mroute_clean_tables(struct mr_table *mrt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117{
1118 int i;
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001119 LIST_HEAD(list);
Patrick McHardy862465f2010-04-13 05:03:21 +00001120 struct mfc_cache *c, *next;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001121
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122 /*
1123 * Shut down all active vif entries
1124 */
Patrick McHardy0c122952010-04-13 05:03:22 +00001125 for (i = 0; i < mrt->maxvif; i++) {
1126 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1127 vif_delete(mrt, i, 0, &list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128 }
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001129 unregister_netdevice_many(&list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130
1131 /*
1132 * Wipe the cache
1133 */
Patrick McHardy862465f2010-04-13 05:03:21 +00001134 for (i = 0; i < MFC_LINES; i++) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001135 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
Patrick McHardy862465f2010-04-13 05:03:21 +00001136 if (c->mfc_flags&MFC_STATIC)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 write_lock_bh(&mrt_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00001139 list_del(&c->list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 write_unlock_bh(&mrt_lock);
1141
Benjamin Thery5c0a66f2009-01-22 04:56:17 +00001142 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 }
1144 }
1145
Patrick McHardy0c122952010-04-13 05:03:22 +00001146 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001148 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
Patrick McHardy862465f2010-04-13 05:03:21 +00001149 list_del(&c->list);
Patrick McHardy0c122952010-04-13 05:03:22 +00001150 ipmr_destroy_unres(mrt, c);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151 }
1152 spin_unlock_bh(&mfc_unres_lock);
1153 }
1154}
1155
1156static void mrtsock_destruct(struct sock *sk)
1157{
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001158 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001159 struct mr_table *mrt;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001160
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161 rtnl_lock();
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001162 ipmr_for_each_table(mrt, net) {
1163 if (sk == mrt->mroute_sk) {
1164 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001166 write_lock_bh(&mrt_lock);
1167 mrt->mroute_sk = NULL;
1168 write_unlock_bh(&mrt_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001170 mroute_clean_tables(mrt);
1171 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172 }
1173 rtnl_unlock();
1174}
1175
1176/*
1177 * Socket options and virtual interface manipulation. The whole
1178 * virtual interface system is a complete heap, but unfortunately
1179 * that's how BSD mrouted happens to think. Maybe one day with a proper
1180 * MOSPF/PIM router set up we can clean this up.
1181 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001182
David S. Millerb7058842009-09-30 16:12:20 -07001183int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184{
1185 int ret;
1186 struct vifctl vif;
1187 struct mfcctl mfc;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001188 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001189 struct mr_table *mrt;
1190
1191 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1192 if (mrt == NULL)
1193 return -ENOENT;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001194
Stephen Hemminger132adf52007-03-08 20:44:43 -08001195 if (optname != MRT_INIT) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001196 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197 return -EACCES;
1198 }
1199
Stephen Hemminger132adf52007-03-08 20:44:43 -08001200 switch (optname) {
1201 case MRT_INIT:
1202 if (sk->sk_type != SOCK_RAW ||
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001203 inet_sk(sk)->inet_num != IPPROTO_IGMP)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001204 return -EOPNOTSUPP;
Jianjun Kongc354e122008-11-03 00:28:02 -08001205 if (optlen != sizeof(int))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001206 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207
Stephen Hemminger132adf52007-03-08 20:44:43 -08001208 rtnl_lock();
Patrick McHardy0c122952010-04-13 05:03:22 +00001209 if (mrt->mroute_sk) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210 rtnl_unlock();
Stephen Hemminger132adf52007-03-08 20:44:43 -08001211 return -EADDRINUSE;
1212 }
1213
1214 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1215 if (ret == 0) {
1216 write_lock_bh(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001217 mrt->mroute_sk = sk;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001218 write_unlock_bh(&mrt_lock);
1219
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001220 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001221 }
1222 rtnl_unlock();
1223 return ret;
1224 case MRT_DONE:
Patrick McHardy0c122952010-04-13 05:03:22 +00001225 if (sk != mrt->mroute_sk)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001226 return -EACCES;
1227 return ip_ra_control(sk, 0, NULL);
1228 case MRT_ADD_VIF:
1229 case MRT_DEL_VIF:
Jianjun Kongc354e122008-11-03 00:28:02 -08001230 if (optlen != sizeof(vif))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001231 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -08001232 if (copy_from_user(&vif, optval, sizeof(vif)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001233 return -EFAULT;
1234 if (vif.vifc_vifi >= MAXVIFS)
1235 return -ENFILE;
1236 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -08001237 if (optname == MRT_ADD_VIF) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001238 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001239 } else {
Patrick McHardy0c122952010-04-13 05:03:22 +00001240 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001241 }
1242 rtnl_unlock();
1243 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244
1245 /*
1246 * Manipulate the forwarding caches. These live
1247 * in a sort of kernel/user symbiosis.
1248 */
Stephen Hemminger132adf52007-03-08 20:44:43 -08001249 case MRT_ADD_MFC:
1250 case MRT_DEL_MFC:
Jianjun Kongc354e122008-11-03 00:28:02 -08001251 if (optlen != sizeof(mfc))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001252 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -08001253 if (copy_from_user(&mfc, optval, sizeof(mfc)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001254 return -EFAULT;
1255 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -08001256 if (optname == MRT_DEL_MFC)
Patrick McHardy0c122952010-04-13 05:03:22 +00001257 ret = ipmr_mfc_delete(mrt, &mfc);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001258 else
Patrick McHardy0c122952010-04-13 05:03:22 +00001259 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001260 rtnl_unlock();
1261 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262 /*
1263 * Control PIM assert.
1264 */
Stephen Hemminger132adf52007-03-08 20:44:43 -08001265 case MRT_ASSERT:
1266 {
1267 int v;
1268 if (get_user(v,(int __user *)optval))
1269 return -EFAULT;
Patrick McHardy0c122952010-04-13 05:03:22 +00001270 mrt->mroute_do_assert = (v) ? 1 : 0;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001271 return 0;
1272 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273#ifdef CONFIG_IP_PIMSM
Stephen Hemminger132adf52007-03-08 20:44:43 -08001274 case MRT_PIM:
1275 {
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001276 int v;
1277
Stephen Hemminger132adf52007-03-08 20:44:43 -08001278 if (get_user(v,(int __user *)optval))
1279 return -EFAULT;
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001280 v = (v) ? 1 : 0;
1281
Stephen Hemminger132adf52007-03-08 20:44:43 -08001282 rtnl_lock();
1283 ret = 0;
Patrick McHardy0c122952010-04-13 05:03:22 +00001284 if (v != mrt->mroute_do_pim) {
1285 mrt->mroute_do_pim = v;
1286 mrt->mroute_do_assert = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287 }
Stephen Hemminger132adf52007-03-08 20:44:43 -08001288 rtnl_unlock();
1289 return ret;
1290 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291#endif
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001292#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1293 case MRT_TABLE:
1294 {
1295 u32 v;
1296
1297 if (optlen != sizeof(u32))
1298 return -EINVAL;
1299 if (get_user(v, (u32 __user *)optval))
1300 return -EFAULT;
1301 if (sk == mrt->mroute_sk)
1302 return -EBUSY;
1303
1304 rtnl_lock();
1305 ret = 0;
1306 if (!ipmr_new_table(net, v))
1307 ret = -ENOMEM;
1308 raw_sk(sk)->ipmr_table = v;
1309 rtnl_unlock();
1310 return ret;
1311 }
1312#endif
Stephen Hemminger132adf52007-03-08 20:44:43 -08001313 /*
1314 * Spurious command, or MRT_VERSION which you cannot
1315 * set.
1316 */
1317 default:
1318 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319 }
1320}
1321
1322/*
1323 * Getsock opt support for the multicast routing system.
1324 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001325
Jianjun Kongc354e122008-11-03 00:28:02 -08001326int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327{
1328 int olr;
1329 int val;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001330 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001331 struct mr_table *mrt;
1332
1333 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1334 if (mrt == NULL)
1335 return -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336
Jianjun Kongc354e122008-11-03 00:28:02 -08001337 if (optname != MRT_VERSION &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338#ifdef CONFIG_IP_PIMSM
1339 optname!=MRT_PIM &&
1340#endif
1341 optname!=MRT_ASSERT)
1342 return -ENOPROTOOPT;
1343
1344 if (get_user(olr, optlen))
1345 return -EFAULT;
1346
1347 olr = min_t(unsigned int, olr, sizeof(int));
1348 if (olr < 0)
1349 return -EINVAL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001350
Jianjun Kongc354e122008-11-03 00:28:02 -08001351 if (put_user(olr, optlen))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001352 return -EFAULT;
Jianjun Kongc354e122008-11-03 00:28:02 -08001353 if (optname == MRT_VERSION)
1354 val = 0x0305;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355#ifdef CONFIG_IP_PIMSM
Jianjun Kongc354e122008-11-03 00:28:02 -08001356 else if (optname == MRT_PIM)
Patrick McHardy0c122952010-04-13 05:03:22 +00001357 val = mrt->mroute_do_pim;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358#endif
1359 else
Patrick McHardy0c122952010-04-13 05:03:22 +00001360 val = mrt->mroute_do_assert;
Jianjun Kongc354e122008-11-03 00:28:02 -08001361 if (copy_to_user(optval, &val, olr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001362 return -EFAULT;
1363 return 0;
1364}
1365
1366/*
1367 * The IP multicast ioctl support routines.
1368 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001369
Linus Torvalds1da177e2005-04-16 15:20:36 -07001370int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1371{
1372 struct sioc_sg_req sr;
1373 struct sioc_vif_req vr;
1374 struct vif_device *vif;
1375 struct mfc_cache *c;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001376 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001377 struct mr_table *mrt;
1378
1379 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1380 if (mrt == NULL)
1381 return -ENOENT;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001382
Stephen Hemminger132adf52007-03-08 20:44:43 -08001383 switch (cmd) {
1384 case SIOCGETVIFCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001385 if (copy_from_user(&vr, arg, sizeof(vr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001386 return -EFAULT;
Patrick McHardy0c122952010-04-13 05:03:22 +00001387 if (vr.vifi >= mrt->maxvif)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001388 return -EINVAL;
1389 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001390 vif = &mrt->vif_table[vr.vifi];
1391 if (VIF_EXISTS(mrt, vr.vifi)) {
Jianjun Kongc354e122008-11-03 00:28:02 -08001392 vr.icount = vif->pkt_in;
1393 vr.ocount = vif->pkt_out;
1394 vr.ibytes = vif->bytes_in;
1395 vr.obytes = vif->bytes_out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001397
Jianjun Kongc354e122008-11-03 00:28:02 -08001398 if (copy_to_user(arg, &vr, sizeof(vr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399 return -EFAULT;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001400 return 0;
1401 }
1402 read_unlock(&mrt_lock);
1403 return -EADDRNOTAVAIL;
1404 case SIOCGETSGCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001405 if (copy_from_user(&sr, arg, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001406 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407
Stephen Hemminger132adf52007-03-08 20:44:43 -08001408 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001409 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001410 if (c) {
1411 sr.pktcnt = c->mfc_un.res.pkt;
1412 sr.bytecnt = c->mfc_un.res.bytes;
1413 sr.wrong_if = c->mfc_un.res.wrong_if;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001415
Jianjun Kongc354e122008-11-03 00:28:02 -08001416 if (copy_to_user(arg, &sr, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001417 return -EFAULT;
1418 return 0;
1419 }
1420 read_unlock(&mrt_lock);
1421 return -EADDRNOTAVAIL;
1422 default:
1423 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424 }
1425}
1426
1427
1428static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1429{
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001430 struct net_device *dev = ptr;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001431 struct net *net = dev_net(dev);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001432 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433 struct vif_device *v;
1434 int ct;
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001435 LIST_HEAD(list);
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001436
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437 if (event != NETDEV_UNREGISTER)
1438 return NOTIFY_DONE;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001439
1440 ipmr_for_each_table(mrt, net) {
1441 v = &mrt->vif_table[0];
1442 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1443 if (v->dev == dev)
1444 vif_delete(mrt, ct, 1, &list);
1445 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446 }
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001447 unregister_netdevice_many(&list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448 return NOTIFY_DONE;
1449}
1450
1451
Jianjun Kongc354e122008-11-03 00:28:02 -08001452static struct notifier_block ip_mr_notifier = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453 .notifier_call = ipmr_device_event,
1454};
1455
1456/*
1457 * Encapsulate a packet by attaching a valid IPIP header to it.
1458 * This avoids tunnel drivers and other mess and gives us the speed so
1459 * important for multicast video.
1460 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001461
Al Viro114c7842006-09-27 18:39:29 -07001462static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463{
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001464 struct iphdr *iph;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001465 struct iphdr *old_iph = ip_hdr(skb);
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001466
1467 skb_push(skb, sizeof(struct iphdr));
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001468 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001469 skb_reset_network_header(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001470 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471
1472 iph->version = 4;
Arnaldo Carvalho de Meloe023dd62007-03-12 20:09:36 -03001473 iph->tos = old_iph->tos;
1474 iph->ttl = old_iph->ttl;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475 iph->frag_off = 0;
1476 iph->daddr = daddr;
1477 iph->saddr = saddr;
1478 iph->protocol = IPPROTO_IPIP;
1479 iph->ihl = 5;
1480 iph->tot_len = htons(skb->len);
Eric Dumazetadf30902009-06-02 05:19:30 +00001481 ip_select_ident(iph, skb_dst(skb), NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 ip_send_check(iph);
1483
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1485 nf_reset(skb);
1486}
1487
1488static inline int ipmr_forward_finish(struct sk_buff *skb)
1489{
1490 struct ip_options * opt = &(IPCB(skb)->opt);
1491
Eric Dumazetadf30902009-06-02 05:19:30 +00001492 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493
1494 if (unlikely(opt->optlen))
1495 ip_forward_options(skb);
1496
1497 return dst_output(skb);
1498}
1499
1500/*
1501 * Processing handlers for ipmr_forward
1502 */
1503
Patrick McHardy0c122952010-04-13 05:03:22 +00001504static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1505 struct sk_buff *skb, struct mfc_cache *c, int vifi)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001507 const struct iphdr *iph = ip_hdr(skb);
Patrick McHardy0c122952010-04-13 05:03:22 +00001508 struct vif_device *vif = &mrt->vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509 struct net_device *dev;
1510 struct rtable *rt;
1511 int encap = 0;
1512
1513 if (vif->dev == NULL)
1514 goto out_free;
1515
1516#ifdef CONFIG_IP_PIMSM
1517 if (vif->flags & VIFF_REGISTER) {
1518 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001519 vif->bytes_out += skb->len;
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001520 vif->dev->stats.tx_bytes += skb->len;
1521 vif->dev->stats.tx_packets++;
Patrick McHardy0c122952010-04-13 05:03:22 +00001522 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
Ilpo Järvinen69ebbf52009-02-06 23:46:51 -08001523 goto out_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001524 }
1525#endif
1526
1527 if (vif->flags&VIFF_TUNNEL) {
1528 struct flowi fl = { .oif = vif->link,
1529 .nl_u = { .ip4_u =
1530 { .daddr = vif->remote,
1531 .saddr = vif->local,
1532 .tos = RT_TOS(iph->tos) } },
1533 .proto = IPPROTO_IPIP };
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001534 if (ip_route_output_key(net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535 goto out_free;
1536 encap = sizeof(struct iphdr);
1537 } else {
1538 struct flowi fl = { .oif = vif->link,
1539 .nl_u = { .ip4_u =
1540 { .daddr = iph->daddr,
1541 .tos = RT_TOS(iph->tos) } },
1542 .proto = IPPROTO_IPIP };
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001543 if (ip_route_output_key(net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001544 goto out_free;
1545 }
1546
1547 dev = rt->u.dst.dev;
1548
1549 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1550 /* Do not fragment multicasts. Alas, IPv4 does not
1551 allow to send ICMP, so that packets will disappear
1552 to blackhole.
1553 */
1554
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -07001555 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001556 ip_rt_put(rt);
1557 goto out_free;
1558 }
1559
1560 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1561
1562 if (skb_cow(skb, encap)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001563 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564 goto out_free;
1565 }
1566
1567 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001568 vif->bytes_out += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569
Eric Dumazetadf30902009-06-02 05:19:30 +00001570 skb_dst_drop(skb);
1571 skb_dst_set(skb, &rt->u.dst);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001572 ip_decrease_ttl(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573
1574 /* FIXME: forward and output firewalls used to be called here.
1575 * What do we do with netfilter? -- RR */
1576 if (vif->flags & VIFF_TUNNEL) {
1577 ip_encap(skb, vif->local, vif->remote);
1578 /* FIXME: extra output firewall step used to be here. --RR */
Pavel Emelyanov2f4c02d2008-05-21 14:16:14 -07001579 vif->dev->stats.tx_packets++;
1580 vif->dev->stats.tx_bytes += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581 }
1582
1583 IPCB(skb)->flags |= IPSKB_FORWARDED;
1584
1585 /*
1586 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1587 * not only before forwarding, but after forwarding on all output
1588 * interfaces. It is clear, if mrouter runs a multicasting
1589 * program, it should receive packets not depending to what interface
1590 * program is joined.
1591 * If we will not make it, the program will have to join on all
1592 * interfaces. On the other hand, multihoming host (or router, but
1593 * not mrouter) cannot join to more than one interface - it will
1594 * result in receiving multiple packets.
1595 */
Patrick McHardy6e23ae22007-11-19 18:53:30 -08001596 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597 ipmr_forward_finish);
1598 return;
1599
1600out_free:
1601 kfree_skb(skb);
1602 return;
1603}
1604
Patrick McHardy0c122952010-04-13 05:03:22 +00001605static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606{
1607 int ct;
Patrick McHardy0c122952010-04-13 05:03:22 +00001608
1609 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1610 if (mrt->vif_table[ct].dev == dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611 break;
1612 }
1613 return ct;
1614}
1615
1616/* "local" means that we should preserve one skb (for local delivery) */
1617
Patrick McHardy0c122952010-04-13 05:03:22 +00001618static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1619 struct sk_buff *skb, struct mfc_cache *cache,
1620 int local)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621{
1622 int psend = -1;
1623 int vif, ct;
1624
1625 vif = cache->mfc_parent;
1626 cache->mfc_un.res.pkt++;
1627 cache->mfc_un.res.bytes += skb->len;
1628
1629 /*
1630 * Wrong interface: drop packet and (maybe) send PIM assert.
1631 */
Patrick McHardy0c122952010-04-13 05:03:22 +00001632 if (mrt->vif_table[vif].dev != skb->dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633 int true_vifi;
1634
Eric Dumazet511c3f92009-06-02 05:14:27 +00001635 if (skb_rtable(skb)->fl.iif == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636 /* It is our own packet, looped back.
1637 Very complicated situation...
1638
1639 The best workaround until routing daemons will be
1640 fixed is not to redistribute packet, if it was
1641 send through wrong interface. It means, that
1642 multicast applications WILL NOT work for
1643 (S,G), which have default multicast route pointing
1644 to wrong oif. In any case, it is not a good
1645 idea to use multicasting applications on router.
1646 */
1647 goto dont_forward;
1648 }
1649
1650 cache->mfc_un.res.wrong_if++;
Patrick McHardy0c122952010-04-13 05:03:22 +00001651 true_vifi = ipmr_find_vif(mrt, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001652
Patrick McHardy0c122952010-04-13 05:03:22 +00001653 if (true_vifi >= 0 && mrt->mroute_do_assert &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654 /* pimsm uses asserts, when switching from RPT to SPT,
1655 so that we cannot check that packet arrived on an oif.
1656 It is bad, but otherwise we would need to move pretty
1657 large chunk of pimd to kernel. Ough... --ANK
1658 */
Patrick McHardy0c122952010-04-13 05:03:22 +00001659 (mrt->mroute_do_pim ||
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001660 cache->mfc_un.res.ttls[true_vifi] < 255) &&
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001661 time_after(jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1663 cache->mfc_un.res.last_assert = jiffies;
Patrick McHardy0c122952010-04-13 05:03:22 +00001664 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665 }
1666 goto dont_forward;
1667 }
1668
Patrick McHardy0c122952010-04-13 05:03:22 +00001669 mrt->vif_table[vif].pkt_in++;
1670 mrt->vif_table[vif].bytes_in += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671
1672 /*
1673 * Forward the frame
1674 */
1675 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001676 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001677 if (psend != -1) {
1678 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1679 if (skb2)
Patrick McHardy0c122952010-04-13 05:03:22 +00001680 ipmr_queue_xmit(net, mrt, skb2, cache,
1681 psend);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682 }
Jianjun Kongc354e122008-11-03 00:28:02 -08001683 psend = ct;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 }
1685 }
1686 if (psend != -1) {
1687 if (local) {
1688 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1689 if (skb2)
Patrick McHardy0c122952010-04-13 05:03:22 +00001690 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 } else {
Patrick McHardy0c122952010-04-13 05:03:22 +00001692 ipmr_queue_xmit(net, mrt, skb, cache, psend);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693 return 0;
1694 }
1695 }
1696
1697dont_forward:
1698 if (!local)
1699 kfree_skb(skb);
1700 return 0;
1701}
1702
1703
1704/*
1705 * Multicast packets for forwarding arrive here
1706 */
1707
1708int ip_mr_input(struct sk_buff *skb)
1709{
1710 struct mfc_cache *cache;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001711 struct net *net = dev_net(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +00001712 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001713 struct mr_table *mrt;
1714 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001715
1716 /* Packet is looped back after forward, it should not be
1717 forwarded second time, but still can be delivered locally.
1718 */
1719 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1720 goto dont_forward;
1721
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001722 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1723 if (err < 0)
1724 return err;
1725
Linus Torvalds1da177e2005-04-16 15:20:36 -07001726 if (!local) {
1727 if (IPCB(skb)->opt.router_alert) {
1728 if (ip_call_ra_chain(skb))
1729 return 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001730 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731 /* IGMPv1 (and broken IGMPv2 implementations sort of
1732 Cisco IOS <= 11.2(8)) do not put router alert
1733 option to IGMP packets destined to routable
1734 groups. It is very bad, because it means
1735 that we can forward NO IGMP messages.
1736 */
1737 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001738 if (mrt->mroute_sk) {
Patrick McHardy2715bcf2005-06-21 14:06:24 -07001739 nf_reset(skb);
Patrick McHardy0c122952010-04-13 05:03:22 +00001740 raw_rcv(mrt->mroute_sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741 read_unlock(&mrt_lock);
1742 return 0;
1743 }
1744 read_unlock(&mrt_lock);
1745 }
1746 }
1747
1748 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001749 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750
1751 /*
1752 * No usable cache entry
1753 */
Jianjun Kongc354e122008-11-03 00:28:02 -08001754 if (cache == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755 int vif;
1756
1757 if (local) {
1758 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1759 ip_local_deliver(skb);
1760 if (skb2 == NULL) {
1761 read_unlock(&mrt_lock);
1762 return -ENOBUFS;
1763 }
1764 skb = skb2;
1765 }
1766
Patrick McHardy0c122952010-04-13 05:03:22 +00001767 vif = ipmr_find_vif(mrt, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768 if (vif >= 0) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001769 int err = ipmr_cache_unresolved(mrt, vif, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001770 read_unlock(&mrt_lock);
1771
1772 return err;
1773 }
1774 read_unlock(&mrt_lock);
1775 kfree_skb(skb);
1776 return -ENODEV;
1777 }
1778
Patrick McHardy0c122952010-04-13 05:03:22 +00001779 ip_mr_forward(net, mrt, skb, cache, local);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780
1781 read_unlock(&mrt_lock);
1782
1783 if (local)
1784 return ip_local_deliver(skb);
1785
1786 return 0;
1787
1788dont_forward:
1789 if (local)
1790 return ip_local_deliver(skb);
1791 kfree_skb(skb);
1792 return 0;
1793}
1794
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001795#ifdef CONFIG_IP_PIMSM
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001796static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1797 unsigned int pimlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798{
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001799 struct net_device *reg_dev = NULL;
1800 struct iphdr *encap;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001802 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001803 /*
1804 Check that:
1805 a. packet is really destinted to a multicast group
1806 b. packet is not a NULL-REGISTER
1807 c. packet is not truncated
1808 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001809 if (!ipv4_is_multicast(encap->daddr) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001810 encap->tot_len == 0 ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001811 ntohs(encap->tot_len) + pimlen > skb->len)
1812 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813
1814 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001815 if (mrt->mroute_reg_vif_num >= 0)
1816 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817 if (reg_dev)
1818 dev_hold(reg_dev);
1819 read_unlock(&mrt_lock);
1820
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001821 if (reg_dev == NULL)
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001822 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001824 skb->mac_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825 skb_pull(skb, (u8*)encap - skb->data);
Arnaldo Carvalho de Melo31c77112007-03-10 19:04:55 -03001826 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827 skb->dev = reg_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001828 skb->protocol = htons(ETH_P_IP);
1829 skb->ip_summed = 0;
1830 skb->pkt_type = PACKET_HOST;
Eric Dumazetadf30902009-06-02 05:19:30 +00001831 skb_dst_drop(skb);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001832 reg_dev->stats.rx_bytes += skb->len;
1833 reg_dev->stats.rx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834 nf_reset(skb);
1835 netif_rx(skb);
1836 dev_put(reg_dev);
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001837
Linus Torvalds1da177e2005-04-16 15:20:36 -07001838 return 0;
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001839}
1840#endif
1841
1842#ifdef CONFIG_IP_PIMSM_V1
1843/*
1844 * Handle IGMP messages of PIMv1
1845 */
1846
1847int pim_rcv_v1(struct sk_buff * skb)
1848{
1849 struct igmphdr *pim;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001850 struct net *net = dev_net(skb->dev);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001851 struct mr_table *mrt;
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001852
1853 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1854 goto drop;
1855
1856 pim = igmp_hdr(skb);
1857
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001858 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1859 goto drop;
1860
Patrick McHardy0c122952010-04-13 05:03:22 +00001861 if (!mrt->mroute_do_pim ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001862 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1863 goto drop;
1864
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001865 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001866drop:
1867 kfree_skb(skb);
1868 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869 return 0;
1870}
1871#endif
1872
1873#ifdef CONFIG_IP_PIMSM_V2
1874static int pim_rcv(struct sk_buff * skb)
1875{
1876 struct pimreghdr *pim;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001877 struct net *net = dev_net(skb->dev);
1878 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001879
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001880 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001881 goto drop;
1882
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001883 pim = (struct pimreghdr *)skb_transport_header(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001884 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885 (pim->flags&PIM_NULL_REGISTER) ||
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001886 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
Al Virod3bc23e2006-11-14 21:24:49 -08001887 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888 goto drop;
1889
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001890 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1891 goto drop;
1892
1893 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001894drop:
1895 kfree_skb(skb);
1896 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001897 return 0;
1898}
1899#endif
1900
1901static int
Patrick McHardy0c122952010-04-13 05:03:22 +00001902ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c,
Patrick McHardyd658f8a2010-04-13 05:03:20 +00001903 struct rtmsg *rtm)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904{
1905 int ct;
1906 struct rtnexthop *nhp;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001907 u8 *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908 struct rtattr *mp_head;
1909
Nicolas Dichtel74381892010-03-25 23:45:35 +00001910 /* If cache is unresolved, don't try to parse IIF and OIF */
1911 if (c->mfc_parent > MAXVIFS)
1912 return -ENOENT;
1913
Patrick McHardy0c122952010-04-13 05:03:22 +00001914 if (VIF_EXISTS(mrt, c->mfc_parent))
1915 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001916
Jianjun Kongc354e122008-11-03 00:28:02 -08001917 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918
1919 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001920 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001921 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1922 goto rtattr_failure;
Jianjun Kongc354e122008-11-03 00:28:02 -08001923 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924 nhp->rtnh_flags = 0;
1925 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
Patrick McHardy0c122952010-04-13 05:03:22 +00001926 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927 nhp->rtnh_len = sizeof(*nhp);
1928 }
1929 }
1930 mp_head->rta_type = RTA_MULTIPATH;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001931 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001932 rtm->rtm_type = RTN_MULTICAST;
1933 return 1;
1934
1935rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001936 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001937 return -EMSGSIZE;
1938}
1939
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001940int ipmr_get_route(struct net *net,
1941 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001942{
1943 int err;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001944 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945 struct mfc_cache *cache;
Eric Dumazet511c3f92009-06-02 05:14:27 +00001946 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001948 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1949 if (mrt == NULL)
1950 return -ENOENT;
1951
Linus Torvalds1da177e2005-04-16 15:20:36 -07001952 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001953 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954
Jianjun Kongc354e122008-11-03 00:28:02 -08001955 if (cache == NULL) {
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001956 struct sk_buff *skb2;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001957 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958 struct net_device *dev;
1959 int vif;
1960
1961 if (nowait) {
1962 read_unlock(&mrt_lock);
1963 return -EAGAIN;
1964 }
1965
1966 dev = skb->dev;
Patrick McHardy0c122952010-04-13 05:03:22 +00001967 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001968 read_unlock(&mrt_lock);
1969 return -ENODEV;
1970 }
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001971 skb2 = skb_clone(skb, GFP_ATOMIC);
1972 if (!skb2) {
1973 read_unlock(&mrt_lock);
1974 return -ENOMEM;
1975 }
1976
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -07001977 skb_push(skb2, sizeof(struct iphdr));
1978 skb_reset_network_header(skb2);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001979 iph = ip_hdr(skb2);
1980 iph->ihl = sizeof(struct iphdr) >> 2;
1981 iph->saddr = rt->rt_src;
1982 iph->daddr = rt->rt_dst;
1983 iph->version = 0;
Patrick McHardy0c122952010-04-13 05:03:22 +00001984 err = ipmr_cache_unresolved(mrt, vif, skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985 read_unlock(&mrt_lock);
1986 return err;
1987 }
1988
1989 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1990 cache->mfc_flags |= MFC_NOTIFY;
Patrick McHardy0c122952010-04-13 05:03:22 +00001991 err = ipmr_fill_mroute(mrt, skb, cache, rtm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001992 read_unlock(&mrt_lock);
1993 return err;
1994}
1995
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001996#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997/*
1998 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1999 */
2000struct ipmr_vif_iter {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002001 struct seq_net_private p;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002002 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002003 int ct;
2004};
2005
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002006static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2007 struct ipmr_vif_iter *iter,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008 loff_t pos)
2009{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002010 struct mr_table *mrt = iter->mrt;
Patrick McHardy0c122952010-04-13 05:03:22 +00002011
2012 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2013 if (!VIF_EXISTS(mrt, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014 continue;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002015 if (pos-- == 0)
Patrick McHardy0c122952010-04-13 05:03:22 +00002016 return &mrt->vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002017 }
2018 return NULL;
2019}
2020
2021static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08002022 __acquires(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002024 struct ipmr_vif_iter *iter = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002025 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002026 struct mr_table *mrt;
2027
2028 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2029 if (mrt == NULL)
2030 return ERR_PTR(-ENOENT);
2031
2032 iter->mrt = mrt;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002033
Linus Torvalds1da177e2005-04-16 15:20:36 -07002034 read_lock(&mrt_lock);
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002035 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002036 : SEQ_START_TOKEN;
2037}
2038
2039static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2040{
2041 struct ipmr_vif_iter *iter = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002042 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002043 struct mr_table *mrt = iter->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002044
2045 ++*pos;
2046 if (v == SEQ_START_TOKEN)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002047 return ipmr_vif_seq_idx(net, iter, 0);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002048
Patrick McHardy0c122952010-04-13 05:03:22 +00002049 while (++iter->ct < mrt->maxvif) {
2050 if (!VIF_EXISTS(mrt, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051 continue;
Patrick McHardy0c122952010-04-13 05:03:22 +00002052 return &mrt->vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053 }
2054 return NULL;
2055}
2056
2057static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08002058 __releases(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002059{
2060 read_unlock(&mrt_lock);
2061}
2062
2063static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2064{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002065 struct ipmr_vif_iter *iter = seq->private;
2066 struct mr_table *mrt = iter->mrt;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002067
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002069 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002070 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2071 } else {
2072 const struct vif_device *vif = v;
2073 const char *name = vif->dev ? vif->dev->name : "none";
2074
2075 seq_printf(seq,
2076 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
Patrick McHardy0c122952010-04-13 05:03:22 +00002077 vif - mrt->vif_table,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002078 name, vif->bytes_in, vif->pkt_in,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079 vif->bytes_out, vif->pkt_out,
2080 vif->flags, vif->local, vif->remote);
2081 }
2082 return 0;
2083}
2084
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002085static const struct seq_operations ipmr_vif_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002086 .start = ipmr_vif_seq_start,
2087 .next = ipmr_vif_seq_next,
2088 .stop = ipmr_vif_seq_stop,
2089 .show = ipmr_vif_seq_show,
2090};
2091
2092static int ipmr_vif_open(struct inode *inode, struct file *file)
2093{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002094 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2095 sizeof(struct ipmr_vif_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002096}
2097
Arjan van de Ven9a321442007-02-12 00:55:35 -08002098static const struct file_operations ipmr_vif_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099 .owner = THIS_MODULE,
2100 .open = ipmr_vif_open,
2101 .read = seq_read,
2102 .llseek = seq_lseek,
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002103 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104};
2105
2106struct ipmr_mfc_iter {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002107 struct seq_net_private p;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002108 struct mr_table *mrt;
Patrick McHardy862465f2010-04-13 05:03:21 +00002109 struct list_head *cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002110 int ct;
2111};
2112
2113
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002114static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2115 struct ipmr_mfc_iter *it, loff_t pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002116{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002117 struct mr_table *mrt = it->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002118 struct mfc_cache *mfc;
2119
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120 read_lock(&mrt_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00002121 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
Patrick McHardy0c122952010-04-13 05:03:22 +00002122 it->cache = &mrt->mfc_cache_array[it->ct];
Patrick McHardy862465f2010-04-13 05:03:21 +00002123 list_for_each_entry(mfc, it->cache, list)
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002124 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002125 return mfc;
Patrick McHardy862465f2010-04-13 05:03:21 +00002126 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127 read_unlock(&mrt_lock);
2128
Linus Torvalds1da177e2005-04-16 15:20:36 -07002129 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00002130 it->cache = &mrt->mfc_unres_queue;
Patrick McHardy862465f2010-04-13 05:03:21 +00002131 list_for_each_entry(mfc, it->cache, list)
Patrick McHardye258beb2010-04-13 05:03:19 +00002132 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002133 return mfc;
2134 spin_unlock_bh(&mfc_unres_lock);
2135
2136 it->cache = NULL;
2137 return NULL;
2138}
2139
2140
2141static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2142{
2143 struct ipmr_mfc_iter *it = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002144 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002145 struct mr_table *mrt;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002146
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002147 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2148 if (mrt == NULL)
2149 return ERR_PTR(-ENOENT);
2150
2151 it->mrt = mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002152 it->cache = NULL;
2153 it->ct = 0;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002154 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155 : SEQ_START_TOKEN;
2156}
2157
2158static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2159{
2160 struct mfc_cache *mfc = v;
2161 struct ipmr_mfc_iter *it = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002162 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002163 struct mr_table *mrt = it->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164
2165 ++*pos;
2166
2167 if (v == SEQ_START_TOKEN)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002168 return ipmr_mfc_seq_idx(net, seq->private, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002169
Patrick McHardy862465f2010-04-13 05:03:21 +00002170 if (mfc->list.next != it->cache)
2171 return list_entry(mfc->list.next, struct mfc_cache, list);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002172
Patrick McHardy0c122952010-04-13 05:03:22 +00002173 if (it->cache == &mrt->mfc_unres_queue)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002174 goto end_of_list;
2175
Patrick McHardy0c122952010-04-13 05:03:22 +00002176 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002177
2178 while (++it->ct < MFC_LINES) {
Patrick McHardy0c122952010-04-13 05:03:22 +00002179 it->cache = &mrt->mfc_cache_array[it->ct];
Patrick McHardy862465f2010-04-13 05:03:21 +00002180 if (list_empty(it->cache))
2181 continue;
2182 return list_first_entry(it->cache, struct mfc_cache, list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002183 }
2184
2185 /* exhausted cache_array, show unresolved */
2186 read_unlock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00002187 it->cache = &mrt->mfc_unres_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002188 it->ct = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002189
Linus Torvalds1da177e2005-04-16 15:20:36 -07002190 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00002191 if (!list_empty(it->cache))
2192 return list_first_entry(it->cache, struct mfc_cache, list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002193
2194 end_of_list:
2195 spin_unlock_bh(&mfc_unres_lock);
2196 it->cache = NULL;
2197
2198 return NULL;
2199}
2200
2201static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2202{
2203 struct ipmr_mfc_iter *it = seq->private;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002204 struct mr_table *mrt = it->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002205
Patrick McHardy0c122952010-04-13 05:03:22 +00002206 if (it->cache == &mrt->mfc_unres_queue)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002207 spin_unlock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00002208 else if (it->cache == &mrt->mfc_cache_array[it->ct])
Linus Torvalds1da177e2005-04-16 15:20:36 -07002209 read_unlock(&mrt_lock);
2210}
2211
2212static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2213{
2214 int n;
2215
2216 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002217 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2219 } else {
2220 const struct mfc_cache *mfc = v;
2221 const struct ipmr_mfc_iter *it = seq->private;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002222 const struct mr_table *mrt = it->mrt;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002223
Benjamin Thery999890b2008-12-03 22:22:16 -08002224 seq_printf(seq, "%08lX %08lX %-3hd",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002225 (unsigned long) mfc->mfc_mcastgrp,
2226 (unsigned long) mfc->mfc_origin,
Benjamin Thery1ea472e2008-12-03 22:21:47 -08002227 mfc->mfc_parent);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228
Patrick McHardy0c122952010-04-13 05:03:22 +00002229 if (it->cache != &mrt->mfc_unres_queue) {
Benjamin Thery1ea472e2008-12-03 22:21:47 -08002230 seq_printf(seq, " %8lu %8lu %8lu",
2231 mfc->mfc_un.res.pkt,
2232 mfc->mfc_un.res.bytes,
2233 mfc->mfc_un.res.wrong_if);
Stephen Hemminger132adf52007-03-08 20:44:43 -08002234 for (n = mfc->mfc_un.res.minvif;
2235 n < mfc->mfc_un.res.maxvif; n++ ) {
Patrick McHardy0c122952010-04-13 05:03:22 +00002236 if (VIF_EXISTS(mrt, n) &&
Benjamin Therycf958ae32009-01-22 04:56:16 +00002237 mfc->mfc_un.res.ttls[n] < 255)
2238 seq_printf(seq,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002239 " %2d:%-3d",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002240 n, mfc->mfc_un.res.ttls[n]);
2241 }
Benjamin Thery1ea472e2008-12-03 22:21:47 -08002242 } else {
2243 /* unresolved mfc_caches don't contain
2244 * pkt, bytes and wrong_if values
2245 */
2246 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002247 }
2248 seq_putc(seq, '\n');
2249 }
2250 return 0;
2251}
2252
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002253static const struct seq_operations ipmr_mfc_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254 .start = ipmr_mfc_seq_start,
2255 .next = ipmr_mfc_seq_next,
2256 .stop = ipmr_mfc_seq_stop,
2257 .show = ipmr_mfc_seq_show,
2258};
2259
2260static int ipmr_mfc_open(struct inode *inode, struct file *file)
2261{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002262 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2263 sizeof(struct ipmr_mfc_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264}
2265
Arjan van de Ven9a321442007-02-12 00:55:35 -08002266static const struct file_operations ipmr_mfc_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002267 .owner = THIS_MODULE,
2268 .open = ipmr_mfc_open,
2269 .read = seq_read,
2270 .llseek = seq_lseek,
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002271 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002272};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002273#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274
2275#ifdef CONFIG_IP_PIMSM_V2
Alexey Dobriyan32613092009-09-14 12:21:47 +00002276static const struct net_protocol pim_protocol = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002277 .handler = pim_rcv,
Tom Goff403dbb92009-06-14 03:16:13 -07002278 .netns_ok = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279};
2280#endif
2281
2282
2283/*
2284 * Setup for IP multicast routing
2285 */
Benjamin Therycf958ae32009-01-22 04:56:16 +00002286static int __net_init ipmr_net_init(struct net *net)
2287{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002288 int err;
Benjamin Therycf958ae32009-01-22 04:56:16 +00002289
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002290 err = ipmr_rules_init(net);
2291 if (err < 0)
Benjamin Therycf958ae32009-01-22 04:56:16 +00002292 goto fail;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002293
2294#ifdef CONFIG_PROC_FS
2295 err = -ENOMEM;
2296 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2297 goto proc_vif_fail;
2298 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2299 goto proc_cache_fail;
2300#endif
Benjamin Thery2bb8b262009-01-22 04:56:18 +00002301 return 0;
2302
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002303#ifdef CONFIG_PROC_FS
2304proc_cache_fail:
2305 proc_net_remove(net, "ip_mr_vif");
2306proc_vif_fail:
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002307 ipmr_rules_exit(net);
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002308#endif
Benjamin Therycf958ae32009-01-22 04:56:16 +00002309fail:
2310 return err;
2311}
2312
2313static void __net_exit ipmr_net_exit(struct net *net)
2314{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002315#ifdef CONFIG_PROC_FS
2316 proc_net_remove(net, "ip_mr_cache");
2317 proc_net_remove(net, "ip_mr_vif");
2318#endif
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002319 ipmr_rules_exit(net);
Benjamin Therycf958ae32009-01-22 04:56:16 +00002320}
2321
2322static struct pernet_operations ipmr_net_ops = {
2323 .init = ipmr_net_init,
2324 .exit = ipmr_net_exit,
2325};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002326
Wang Chen03d2f892008-07-03 12:13:36 +08002327int __init ip_mr_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328{
Wang Chen03d2f892008-07-03 12:13:36 +08002329 int err;
2330
Linus Torvalds1da177e2005-04-16 15:20:36 -07002331 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2332 sizeof(struct mfc_cache),
Alexey Dobriyane5d679f2006-08-26 19:25:52 -07002333 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
Paul Mundt20c2df82007-07-20 10:11:58 +09002334 NULL);
Wang Chen03d2f892008-07-03 12:13:36 +08002335 if (!mrt_cachep)
2336 return -ENOMEM;
2337
Benjamin Therycf958ae32009-01-22 04:56:16 +00002338 err = register_pernet_subsys(&ipmr_net_ops);
2339 if (err)
2340 goto reg_pernet_fail;
2341
Wang Chen03d2f892008-07-03 12:13:36 +08002342 err = register_netdevice_notifier(&ip_mr_notifier);
2343 if (err)
2344 goto reg_notif_fail;
Tom Goff403dbb92009-06-14 03:16:13 -07002345#ifdef CONFIG_IP_PIMSM_V2
2346 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2347 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2348 err = -EAGAIN;
2349 goto add_proto_fail;
2350 }
2351#endif
Wang Chen03d2f892008-07-03 12:13:36 +08002352 return 0;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002353
Tom Goff403dbb92009-06-14 03:16:13 -07002354#ifdef CONFIG_IP_PIMSM_V2
2355add_proto_fail:
2356 unregister_netdevice_notifier(&ip_mr_notifier);
2357#endif
Benjamin Theryc3e38892008-11-19 14:07:41 -08002358reg_notif_fail:
Benjamin Therycf958ae32009-01-22 04:56:16 +00002359 unregister_pernet_subsys(&ipmr_net_ops);
2360reg_pernet_fail:
Benjamin Theryc3e38892008-11-19 14:07:41 -08002361 kmem_cache_destroy(mrt_cachep);
Wang Chen03d2f892008-07-03 12:13:36 +08002362 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002363}