blob: d6a28acc0683d0d225f03d1a3da5c40806e18132 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
Alan Cox113aa832008-10-13 19:01:08 -07004 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070012 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080032#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080049#include <linux/if_ether.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020050#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <net/ip.h>
52#include <net/protocol.h>
53#include <linux/skbuff.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020054#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070055#include <net/sock.h>
56#include <net/icmp.h>
57#include <net/udp.h>
58#include <net/raw.h>
59#include <linux/notifier.h>
60#include <linux/if_arp.h>
61#include <linux/netfilter_ipv4.h>
62#include <net/ipip.h>
63#include <net/checksum.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070064#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070065
66#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67#define CONFIG_IP_PIMSM 1
68#endif
69
Linus Torvalds1da177e2005-04-16 15:20:36 -070070/* Big lock, protecting vif table, mrt cache and mroute socket state.
71 Note that the changes are semaphored via rtnl_lock.
72 */
73
74static DEFINE_RWLOCK(mrt_lock);
75
76/*
77 * Multicast router control variables
78 */
79
Benjamin Therycf958ae32009-01-22 04:56:16 +000080#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -070081
Linus Torvalds1da177e2005-04-16 15:20:36 -070082static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
84/* Special spinlock for queue of unresolved entries */
85static DEFINE_SPINLOCK(mfc_unres_lock);
86
87/* We return to original Alan's scheme. Hash table of resolved
88 entries is changed only in process context and protected
89 with weak lock mrt_lock. Queue of unresolved entries is protected
90 with strong spinlock mfc_unres_lock.
91
92 In this case data path is free of exclusive locks at all.
93 */
94
Christoph Lametere18b8902006-12-06 20:33:20 -080095static struct kmem_cache *mrt_cachep __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -070096
97static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
98static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
99static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
100
101#ifdef CONFIG_IP_PIMSM_V2
102static struct net_protocol pim_protocol;
103#endif
104
105static struct timer_list ipmr_expire_timer;
106
107/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
108
Wang Chend6070322008-07-14 20:55:26 -0700109static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
110{
111 dev_close(dev);
112
113 dev = __dev_get_by_name(&init_net, "tunl0");
114 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800115 const struct net_device_ops *ops = dev->netdev_ops;
Wang Chend6070322008-07-14 20:55:26 -0700116 struct ifreq ifr;
Wang Chend6070322008-07-14 20:55:26 -0700117 struct ip_tunnel_parm p;
118
119 memset(&p, 0, sizeof(p));
120 p.iph.daddr = v->vifc_rmt_addr.s_addr;
121 p.iph.saddr = v->vifc_lcl_addr.s_addr;
122 p.iph.version = 4;
123 p.iph.ihl = 5;
124 p.iph.protocol = IPPROTO_IPIP;
125 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
126 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
127
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800128 if (ops->ndo_do_ioctl) {
129 mm_segment_t oldfs = get_fs();
130
131 set_fs(KERNEL_DS);
132 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
133 set_fs(oldfs);
134 }
Wang Chend6070322008-07-14 20:55:26 -0700135 }
136}
137
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138static
139struct net_device *ipmr_new_tunnel(struct vifctl *v)
140{
141 struct net_device *dev;
142
Eric W. Biederman881d9662007-09-17 11:56:21 -0700143 dev = __dev_get_by_name(&init_net, "tunl0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144
145 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800146 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147 int err;
148 struct ifreq ifr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149 struct ip_tunnel_parm p;
150 struct in_device *in_dev;
151
152 memset(&p, 0, sizeof(p));
153 p.iph.daddr = v->vifc_rmt_addr.s_addr;
154 p.iph.saddr = v->vifc_lcl_addr.s_addr;
155 p.iph.version = 4;
156 p.iph.ihl = 5;
157 p.iph.protocol = IPPROTO_IPIP;
158 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
Stephen Hemmingerba93ef72008-01-21 17:28:59 -0800159 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800161 if (ops->ndo_do_ioctl) {
162 mm_segment_t oldfs = get_fs();
163
164 set_fs(KERNEL_DS);
165 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
166 set_fs(oldfs);
167 } else
168 err = -EOPNOTSUPP;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169
170 dev = NULL;
171
Eric W. Biederman881d9662007-09-17 11:56:21 -0700172 if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173 dev->flags |= IFF_MULTICAST;
174
Herbert Xue5ed6392005-10-03 14:35:55 -0700175 in_dev = __in_dev_get_rtnl(dev);
Herbert Xu71e27da2007-06-04 23:36:06 -0700176 if (in_dev == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700178
179 ipv4_devconf_setall(in_dev);
180 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181
182 if (dev_open(dev))
183 goto failure;
Wang Chen7dc00c82008-07-14 20:56:34 -0700184 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185 }
186 }
187 return dev;
188
189failure:
190 /* allow the register to be completed before unregistering. */
191 rtnl_unlock();
192 rtnl_lock();
193
194 unregister_netdevice(dev);
195 return NULL;
196}
197
198#ifdef CONFIG_IP_PIMSM
199
200static int reg_vif_num = -1;
201
202static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
203{
204 read_lock(&mrt_lock);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700205 dev->stats.tx_bytes += skb->len;
206 dev->stats.tx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
208 read_unlock(&mrt_lock);
209 kfree_skb(skb);
210 return 0;
211}
212
Stephen Hemminger007c3832008-11-20 20:28:35 -0800213static const struct net_device_ops reg_vif_netdev_ops = {
214 .ndo_start_xmit = reg_vif_xmit,
215};
216
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217static void reg_vif_setup(struct net_device *dev)
218{
219 dev->type = ARPHRD_PIMREG;
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800220 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221 dev->flags = IFF_NOARP;
Stephen Hemminger007c3832008-11-20 20:28:35 -0800222 dev->netdev_ops = &reg_vif_netdev_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223 dev->destructor = free_netdev;
224}
225
226static struct net_device *ipmr_reg_vif(void)
227{
228 struct net_device *dev;
229 struct in_device *in_dev;
230
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700231 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232
233 if (dev == NULL)
234 return NULL;
235
236 if (register_netdevice(dev)) {
237 free_netdev(dev);
238 return NULL;
239 }
240 dev->iflink = 0;
241
Herbert Xu71e27da2007-06-04 23:36:06 -0700242 rcu_read_lock();
243 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
244 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700246 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247
Herbert Xu71e27da2007-06-04 23:36:06 -0700248 ipv4_devconf_setall(in_dev);
249 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
250 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251
252 if (dev_open(dev))
253 goto failure;
254
Wang Chen7dc00c82008-07-14 20:56:34 -0700255 dev_hold(dev);
256
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 return dev;
258
259failure:
260 /* allow the register to be completed before unregistering. */
261 rtnl_unlock();
262 rtnl_lock();
263
264 unregister_netdevice(dev);
265 return NULL;
266}
267#endif
268
269/*
270 * Delete a VIF entry
Wang Chen7dc00c82008-07-14 20:56:34 -0700271 * @notify: Set to 1, if the caller is a notifier_call
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900273
Wang Chen7dc00c82008-07-14 20:56:34 -0700274static int vif_delete(int vifi, int notify)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275{
276 struct vif_device *v;
277 struct net_device *dev;
278 struct in_device *in_dev;
279
Benjamin Therycf958ae32009-01-22 04:56:16 +0000280 if (vifi < 0 || vifi >= init_net.ipv4.maxvif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 return -EADDRNOTAVAIL;
282
Benjamin Therycf958ae32009-01-22 04:56:16 +0000283 v = &init_net.ipv4.vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284
285 write_lock_bh(&mrt_lock);
286 dev = v->dev;
287 v->dev = NULL;
288
289 if (!dev) {
290 write_unlock_bh(&mrt_lock);
291 return -EADDRNOTAVAIL;
292 }
293
294#ifdef CONFIG_IP_PIMSM
295 if (vifi == reg_vif_num)
296 reg_vif_num = -1;
297#endif
298
Benjamin Therycf958ae32009-01-22 04:56:16 +0000299 if (vifi+1 == init_net.ipv4.maxvif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300 int tmp;
301 for (tmp=vifi-1; tmp>=0; tmp--) {
Benjamin Therycf958ae32009-01-22 04:56:16 +0000302 if (VIF_EXISTS(&init_net, tmp))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 break;
304 }
Benjamin Therycf958ae32009-01-22 04:56:16 +0000305 init_net.ipv4.maxvif = tmp+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 }
307
308 write_unlock_bh(&mrt_lock);
309
310 dev_set_allmulti(dev, -1);
311
Herbert Xue5ed6392005-10-03 14:35:55 -0700312 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
Herbert Xu42f811b2007-06-04 23:34:44 -0700313 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314 ip_rt_multicast_event(in_dev);
315 }
316
Wang Chen7dc00c82008-07-14 20:56:34 -0700317 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318 unregister_netdevice(dev);
319
320 dev_put(dev);
321 return 0;
322}
323
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000324static inline void ipmr_cache_free(struct mfc_cache *c)
325{
326 release_net(mfc_net(c));
327 kmem_cache_free(mrt_cachep, c);
328}
329
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330/* Destroy an unresolved cache entry, killing queued skbs
331 and reporting error to netlink readers.
332 */
333
334static void ipmr_destroy_unres(struct mfc_cache *c)
335{
336 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700337 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000339 atomic_dec(&init_net.ipv4.cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340
Jianjun Kongc354e122008-11-03 00:28:02 -0800341 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700342 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
344 nlh->nlmsg_type = NLMSG_ERROR;
345 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
346 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700347 e = NLMSG_DATA(nlh);
348 e->error = -ETIMEDOUT;
349 memset(&e->msg, 0, sizeof(e->msg));
Thomas Graf2942e902006-08-15 00:30:25 -0700350
Denis V. Lunev97c53ca2007-11-19 22:26:51 -0800351 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352 } else
353 kfree_skb(skb);
354 }
355
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000356 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357}
358
359
360/* Single timer process for all the unresolved queue. */
361
362static void ipmr_expire_process(unsigned long dummy)
363{
364 unsigned long now;
365 unsigned long expires;
366 struct mfc_cache *c, **cp;
367
368 if (!spin_trylock(&mfc_unres_lock)) {
369 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
370 return;
371 }
372
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000373 if (mfc_unres_queue == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 goto out;
375
376 now = jiffies;
377 expires = 10*HZ;
378 cp = &mfc_unres_queue;
379
380 while ((c=*cp) != NULL) {
381 if (time_after(c->mfc_un.unres.expires, now)) {
382 unsigned long interval = c->mfc_un.unres.expires - now;
383 if (interval < expires)
384 expires = interval;
385 cp = &c->next;
386 continue;
387 }
388
389 *cp = c->next;
390
391 ipmr_destroy_unres(c);
392 }
393
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000394 if (mfc_unres_queue != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 mod_timer(&ipmr_expire_timer, jiffies + expires);
396
397out:
398 spin_unlock(&mfc_unres_lock);
399}
400
401/* Fill oifs list. It is called under write locked mrt_lock. */
402
Baruch Evend1b04c02005-07-30 17:41:59 -0700403static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404{
405 int vifi;
406
407 cache->mfc_un.res.minvif = MAXVIFS;
408 cache->mfc_un.res.maxvif = 0;
409 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
410
Benjamin Therycf958ae32009-01-22 04:56:16 +0000411 for (vifi = 0; vifi < init_net.ipv4.maxvif; vifi++) {
412 if (VIF_EXISTS(&init_net, vifi) &&
413 ttls[vifi] && ttls[vifi] < 255) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
415 if (cache->mfc_un.res.minvif > vifi)
416 cache->mfc_un.res.minvif = vifi;
417 if (cache->mfc_un.res.maxvif <= vifi)
418 cache->mfc_un.res.maxvif = vifi + 1;
419 }
420 }
421}
422
423static int vif_add(struct vifctl *vifc, int mrtsock)
424{
425 int vifi = vifc->vifc_vifi;
Benjamin Therycf958ae32009-01-22 04:56:16 +0000426 struct vif_device *v = &init_net.ipv4.vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 struct net_device *dev;
428 struct in_device *in_dev;
Wang Chend6070322008-07-14 20:55:26 -0700429 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430
431 /* Is vif busy ? */
Benjamin Therycf958ae32009-01-22 04:56:16 +0000432 if (VIF_EXISTS(&init_net, vifi))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 return -EADDRINUSE;
434
435 switch (vifc->vifc_flags) {
436#ifdef CONFIG_IP_PIMSM
437 case VIFF_REGISTER:
438 /*
439 * Special Purpose VIF in PIM
440 * All the packets will be sent to the daemon
441 */
442 if (reg_vif_num >= 0)
443 return -EADDRINUSE;
444 dev = ipmr_reg_vif();
445 if (!dev)
446 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700447 err = dev_set_allmulti(dev, 1);
448 if (err) {
449 unregister_netdevice(dev);
Wang Chen7dc00c82008-07-14 20:56:34 -0700450 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700451 return err;
452 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 break;
454#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900455 case VIFF_TUNNEL:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 dev = ipmr_new_tunnel(vifc);
457 if (!dev)
458 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700459 err = dev_set_allmulti(dev, 1);
460 if (err) {
461 ipmr_del_tunnel(dev, vifc);
Wang Chen7dc00c82008-07-14 20:56:34 -0700462 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700463 return err;
464 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 break;
466 case 0:
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800467 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 if (!dev)
469 return -EADDRNOTAVAIL;
Wang Chend6070322008-07-14 20:55:26 -0700470 err = dev_set_allmulti(dev, 1);
Wang Chen7dc00c82008-07-14 20:56:34 -0700471 if (err) {
472 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700473 return err;
Wang Chen7dc00c82008-07-14 20:56:34 -0700474 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 break;
476 default:
477 return -EINVAL;
478 }
479
Herbert Xue5ed6392005-10-03 14:35:55 -0700480 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481 return -EADDRNOTAVAIL;
Herbert Xu42f811b2007-06-04 23:34:44 -0700482 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 ip_rt_multicast_event(in_dev);
484
485 /*
486 * Fill in the VIF structures
487 */
Jianjun Kongc354e122008-11-03 00:28:02 -0800488 v->rate_limit = vifc->vifc_rate_limit;
489 v->local = vifc->vifc_lcl_addr.s_addr;
490 v->remote = vifc->vifc_rmt_addr.s_addr;
491 v->flags = vifc->vifc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492 if (!mrtsock)
493 v->flags |= VIFF_STATIC;
Jianjun Kongc354e122008-11-03 00:28:02 -0800494 v->threshold = vifc->vifc_threshold;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495 v->bytes_in = 0;
496 v->bytes_out = 0;
497 v->pkt_in = 0;
498 v->pkt_out = 0;
499 v->link = dev->ifindex;
500 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
501 v->link = dev->iflink;
502
503 /* And finish update writing critical data */
504 write_lock_bh(&mrt_lock);
Jianjun Kongc354e122008-11-03 00:28:02 -0800505 v->dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506#ifdef CONFIG_IP_PIMSM
507 if (v->flags&VIFF_REGISTER)
508 reg_vif_num = vifi;
509#endif
Benjamin Therycf958ae32009-01-22 04:56:16 +0000510 if (vifi+1 > init_net.ipv4.maxvif)
511 init_net.ipv4.maxvif = vifi+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512 write_unlock_bh(&mrt_lock);
513 return 0;
514}
515
Al Viro114c7842006-09-27 18:39:29 -0700516static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517{
Jianjun Kongc354e122008-11-03 00:28:02 -0800518 int line = MFC_HASH(mcastgrp, origin);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 struct mfc_cache *c;
520
Benjamin Thery2bb8b262009-01-22 04:56:18 +0000521 for (c = init_net.ipv4.mfc_cache_array[line]; c; c = c->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
523 break;
524 }
525 return c;
526}
527
528/*
529 * Allocate a multicast cache entry
530 */
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000531static struct mfc_cache *ipmr_cache_alloc(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532{
Jianjun Kongc354e122008-11-03 00:28:02 -0800533 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
534 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 c->mfc_un.res.minvif = MAXVIFS;
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000537 mfc_net_set(c, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 return c;
539}
540
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000541static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542{
Jianjun Kongc354e122008-11-03 00:28:02 -0800543 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
544 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546 skb_queue_head_init(&c->mfc_un.unres.unresolved);
547 c->mfc_un.unres.expires = jiffies + 10*HZ;
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000548 mfc_net_set(c, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 return c;
550}
551
552/*
553 * A cache entry has gone into a resolved state from queued
554 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900555
Linus Torvalds1da177e2005-04-16 15:20:36 -0700556static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
557{
558 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700559 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560
561 /*
562 * Play the pending entries through our router
563 */
564
Jianjun Kongc354e122008-11-03 00:28:02 -0800565 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700566 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
568
569 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700570 nlh->nlmsg_len = (skb_tail_pointer(skb) -
571 (u8 *)nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572 } else {
573 nlh->nlmsg_type = NLMSG_ERROR;
574 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
575 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700576 e = NLMSG_DATA(nlh);
577 e->error = -EMSGSIZE;
578 memset(&e->msg, 0, sizeof(e->msg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579 }
Thomas Graf2942e902006-08-15 00:30:25 -0700580
Denis V. Lunev97c53ca2007-11-19 22:26:51 -0800581 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582 } else
583 ip_mr_forward(skb, c, 0);
584 }
585}
586
587/*
588 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
589 * expects the following bizarre scheme.
590 *
591 * Called under mrt_lock.
592 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900593
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
595{
596 struct sk_buff *skb;
Arnaldo Carvalho de Meloc9bdd4b2007-03-12 20:09:15 -0300597 const int ihl = ip_hdrlen(pkt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 struct igmphdr *igmp;
599 struct igmpmsg *msg;
600 int ret;
601
602#ifdef CONFIG_IP_PIMSM
603 if (assert == IGMPMSG_WHOLEPKT)
604 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
605 else
606#endif
607 skb = alloc_skb(128, GFP_ATOMIC);
608
Stephen Hemminger132adf52007-03-08 20:44:43 -0800609 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610 return -ENOBUFS;
611
612#ifdef CONFIG_IP_PIMSM
613 if (assert == IGMPMSG_WHOLEPKT) {
614 /* Ugly, but we have no choice with this interface.
615 Duplicate old header, fix ihl, length etc.
616 And all this only to mangle msg->im_msgtype and
617 to set msg->im_mbz to "mbz" :-)
618 */
Arnaldo Carvalho de Melo878c8142007-03-11 22:38:29 -0300619 skb_push(skb, sizeof(struct iphdr));
620 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -0300621 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melo0272ffc2007-03-12 20:05:39 -0300622 msg = (struct igmpmsg *)skb_network_header(skb);
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700623 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624 msg->im_msgtype = IGMPMSG_WHOLEPKT;
625 msg->im_mbz = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900626 msg->im_vif = reg_vif_num;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700627 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
628 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
629 sizeof(struct iphdr));
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900630 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900632 {
633
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634 /*
635 * Copy the IP header
636 */
637
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700638 skb->network_header = skb->tail;
Arnaldo Carvalho de Meloddc7b8e2007-03-15 21:42:27 -0300639 skb_put(skb, ihl);
Arnaldo Carvalho de Melo27d7ff42007-03-31 11:55:19 -0300640 skb_copy_to_linear_data(skb, pkt->data, ihl);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700641 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
642 msg = (struct igmpmsg *)skb_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643 msg->im_vif = vifi;
644 skb->dst = dst_clone(pkt->dst);
645
646 /*
647 * Add our header
648 */
649
Jianjun Kongc354e122008-11-03 00:28:02 -0800650 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651 igmp->type =
652 msg->im_msgtype = assert;
653 igmp->code = 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700654 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700655 skb->transport_header = skb->network_header;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900656 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657
Benjamin Thery70a269e2009-01-22 04:56:15 +0000658 if (init_net.ipv4.mroute_sk == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659 kfree_skb(skb);
660 return -EINVAL;
661 }
662
663 /*
664 * Deliver to mrouted
665 */
Benjamin Thery70a269e2009-01-22 04:56:15 +0000666 ret = sock_queue_rcv_skb(init_net.ipv4.mroute_sk, skb);
667 if (ret < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 if (net_ratelimit())
669 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
670 kfree_skb(skb);
671 }
672
673 return ret;
674}
675
676/*
677 * Queue a packet for resolution. It gets locked cache entry!
678 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900679
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680static int
681ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
682{
683 int err;
684 struct mfc_cache *c;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700685 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686
687 spin_lock_bh(&mfc_unres_lock);
688 for (c=mfc_unres_queue; c; c=c->next) {
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000689 if (net_eq(mfc_net(c), &init_net) &&
690 c->mfc_mcastgrp == iph->daddr &&
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700691 c->mfc_origin == iph->saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692 break;
693 }
694
695 if (c == NULL) {
696 /*
697 * Create a new entry if allowable
698 */
699
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000700 if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) >= 10 ||
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000701 (c = ipmr_cache_alloc_unres(&init_net)) == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 spin_unlock_bh(&mfc_unres_lock);
703
704 kfree_skb(skb);
705 return -ENOBUFS;
706 }
707
708 /*
709 * Fill in the new cache entry
710 */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700711 c->mfc_parent = -1;
712 c->mfc_origin = iph->saddr;
713 c->mfc_mcastgrp = iph->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714
715 /*
716 * Reflect first query at mrouted.
717 */
718 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900719 /* If the report failed throw the cache entry
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 out - Brad Parker
721 */
722 spin_unlock_bh(&mfc_unres_lock);
723
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000724 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725 kfree_skb(skb);
726 return err;
727 }
728
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000729 atomic_inc(&init_net.ipv4.cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 c->next = mfc_unres_queue;
731 mfc_unres_queue = c;
732
733 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
734 }
735
736 /*
737 * See if we can append the packet
738 */
739 if (c->mfc_un.unres.unresolved.qlen>3) {
740 kfree_skb(skb);
741 err = -ENOBUFS;
742 } else {
Jianjun Kongc354e122008-11-03 00:28:02 -0800743 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744 err = 0;
745 }
746
747 spin_unlock_bh(&mfc_unres_lock);
748 return err;
749}
750
751/*
752 * MFC cache manipulation by user space mroute daemon
753 */
754
755static int ipmr_mfc_delete(struct mfcctl *mfc)
756{
757 int line;
758 struct mfc_cache *c, **cp;
759
Jianjun Kongc354e122008-11-03 00:28:02 -0800760 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761
Benjamin Thery2bb8b262009-01-22 04:56:18 +0000762 for (cp = &init_net.ipv4.mfc_cache_array[line];
763 (c = *cp) != NULL; cp = &c->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
765 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
766 write_lock_bh(&mrt_lock);
767 *cp = c->next;
768 write_unlock_bh(&mrt_lock);
769
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000770 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771 return 0;
772 }
773 }
774 return -ENOENT;
775}
776
777static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
778{
779 int line;
780 struct mfc_cache *uc, *c, **cp;
781
Jianjun Kongc354e122008-11-03 00:28:02 -0800782 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783
Benjamin Thery2bb8b262009-01-22 04:56:18 +0000784 for (cp = &init_net.ipv4.mfc_cache_array[line];
785 (c = *cp) != NULL; cp = &c->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
787 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
788 break;
789 }
790
791 if (c != NULL) {
792 write_lock_bh(&mrt_lock);
793 c->mfc_parent = mfc->mfcc_parent;
Baruch Evend1b04c02005-07-30 17:41:59 -0700794 ipmr_update_thresholds(c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795 if (!mrtsock)
796 c->mfc_flags |= MFC_STATIC;
797 write_unlock_bh(&mrt_lock);
798 return 0;
799 }
800
Joe Perchesf97c1e02007-12-16 13:45:43 -0800801 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 return -EINVAL;
803
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000804 c = ipmr_cache_alloc(&init_net);
Jianjun Kongc354e122008-11-03 00:28:02 -0800805 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 return -ENOMEM;
807
Jianjun Kongc354e122008-11-03 00:28:02 -0800808 c->mfc_origin = mfc->mfcc_origin.s_addr;
809 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
810 c->mfc_parent = mfc->mfcc_parent;
Baruch Evend1b04c02005-07-30 17:41:59 -0700811 ipmr_update_thresholds(c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812 if (!mrtsock)
813 c->mfc_flags |= MFC_STATIC;
814
815 write_lock_bh(&mrt_lock);
Benjamin Thery2bb8b262009-01-22 04:56:18 +0000816 c->next = init_net.ipv4.mfc_cache_array[line];
817 init_net.ipv4.mfc_cache_array[line] = c;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818 write_unlock_bh(&mrt_lock);
819
820 /*
821 * Check to see if we resolved a queued list. If so we
822 * need to send on the frames and tidy up.
823 */
824 spin_lock_bh(&mfc_unres_lock);
825 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
826 cp = &uc->next) {
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000827 if (net_eq(mfc_net(uc), &init_net) &&
828 uc->mfc_origin == c->mfc_origin &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
830 *cp = uc->next;
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000831 atomic_dec(&init_net.ipv4.cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832 break;
833 }
834 }
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000835 if (mfc_unres_queue == NULL)
836 del_timer(&ipmr_expire_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 spin_unlock_bh(&mfc_unres_lock);
838
839 if (uc) {
840 ipmr_cache_resolve(uc, c);
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000841 ipmr_cache_free(uc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842 }
843 return 0;
844}
845
846/*
847 * Close the multicast socket, and clear the vif tables etc
848 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900849
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850static void mroute_clean_tables(struct sock *sk)
851{
852 int i;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900853
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854 /*
855 * Shut down all active vif entries
856 */
Benjamin Therycf958ae32009-01-22 04:56:16 +0000857 for (i = 0; i < init_net.ipv4.maxvif; i++) {
858 if (!(init_net.ipv4.vif_table[i].flags&VIFF_STATIC))
Wang Chen7dc00c82008-07-14 20:56:34 -0700859 vif_delete(i, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860 }
861
862 /*
863 * Wipe the cache
864 */
Jianjun Kongc354e122008-11-03 00:28:02 -0800865 for (i=0; i<MFC_LINES; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866 struct mfc_cache *c, **cp;
867
Benjamin Thery2bb8b262009-01-22 04:56:18 +0000868 cp = &init_net.ipv4.mfc_cache_array[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 while ((c = *cp) != NULL) {
870 if (c->mfc_flags&MFC_STATIC) {
871 cp = &c->next;
872 continue;
873 }
874 write_lock_bh(&mrt_lock);
875 *cp = c->next;
876 write_unlock_bh(&mrt_lock);
877
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000878 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879 }
880 }
881
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000882 if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) != 0) {
883 struct mfc_cache *c, **cp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884
885 spin_lock_bh(&mfc_unres_lock);
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000886 cp = &mfc_unres_queue;
887 while ((c = *cp) != NULL) {
888 if (!net_eq(mfc_net(c), &init_net)) {
889 cp = &c->next;
890 continue;
891 }
892 *cp = c->next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893
894 ipmr_destroy_unres(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 }
896 spin_unlock_bh(&mfc_unres_lock);
897 }
898}
899
900static void mrtsock_destruct(struct sock *sk)
901{
902 rtnl_lock();
Benjamin Thery70a269e2009-01-22 04:56:15 +0000903 if (sk == init_net.ipv4.mroute_sk) {
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900904 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905
906 write_lock_bh(&mrt_lock);
Benjamin Thery70a269e2009-01-22 04:56:15 +0000907 init_net.ipv4.mroute_sk = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908 write_unlock_bh(&mrt_lock);
909
910 mroute_clean_tables(sk);
911 }
912 rtnl_unlock();
913}
914
915/*
916 * Socket options and virtual interface manipulation. The whole
917 * virtual interface system is a complete heap, but unfortunately
918 * that's how BSD mrouted happens to think. Maybe one day with a proper
919 * MOSPF/PIM router set up we can clean this up.
920 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900921
Jianjun Kongc354e122008-11-03 00:28:02 -0800922int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923{
924 int ret;
925 struct vifctl vif;
926 struct mfcctl mfc;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900927
Stephen Hemminger132adf52007-03-08 20:44:43 -0800928 if (optname != MRT_INIT) {
Benjamin Thery70a269e2009-01-22 04:56:15 +0000929 if (sk != init_net.ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 return -EACCES;
931 }
932
Stephen Hemminger132adf52007-03-08 20:44:43 -0800933 switch (optname) {
934 case MRT_INIT:
935 if (sk->sk_type != SOCK_RAW ||
936 inet_sk(sk)->num != IPPROTO_IGMP)
937 return -EOPNOTSUPP;
Jianjun Kongc354e122008-11-03 00:28:02 -0800938 if (optlen != sizeof(int))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800939 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940
Stephen Hemminger132adf52007-03-08 20:44:43 -0800941 rtnl_lock();
Benjamin Thery70a269e2009-01-22 04:56:15 +0000942 if (init_net.ipv4.mroute_sk) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943 rtnl_unlock();
Stephen Hemminger132adf52007-03-08 20:44:43 -0800944 return -EADDRINUSE;
945 }
946
947 ret = ip_ra_control(sk, 1, mrtsock_destruct);
948 if (ret == 0) {
949 write_lock_bh(&mrt_lock);
Benjamin Thery70a269e2009-01-22 04:56:15 +0000950 init_net.ipv4.mroute_sk = sk;
Stephen Hemminger132adf52007-03-08 20:44:43 -0800951 write_unlock_bh(&mrt_lock);
952
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900953 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
Stephen Hemminger132adf52007-03-08 20:44:43 -0800954 }
955 rtnl_unlock();
956 return ret;
957 case MRT_DONE:
Benjamin Thery70a269e2009-01-22 04:56:15 +0000958 if (sk != init_net.ipv4.mroute_sk)
Stephen Hemminger132adf52007-03-08 20:44:43 -0800959 return -EACCES;
960 return ip_ra_control(sk, 0, NULL);
961 case MRT_ADD_VIF:
962 case MRT_DEL_VIF:
Jianjun Kongc354e122008-11-03 00:28:02 -0800963 if (optlen != sizeof(vif))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800964 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -0800965 if (copy_from_user(&vif, optval, sizeof(vif)))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800966 return -EFAULT;
967 if (vif.vifc_vifi >= MAXVIFS)
968 return -ENFILE;
969 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -0800970 if (optname == MRT_ADD_VIF) {
Benjamin Thery70a269e2009-01-22 04:56:15 +0000971 ret = vif_add(&vif, sk == init_net.ipv4.mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800972 } else {
Wang Chen7dc00c82008-07-14 20:56:34 -0700973 ret = vif_delete(vif.vifc_vifi, 0);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800974 }
975 rtnl_unlock();
976 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977
978 /*
979 * Manipulate the forwarding caches. These live
980 * in a sort of kernel/user symbiosis.
981 */
Stephen Hemminger132adf52007-03-08 20:44:43 -0800982 case MRT_ADD_MFC:
983 case MRT_DEL_MFC:
Jianjun Kongc354e122008-11-03 00:28:02 -0800984 if (optlen != sizeof(mfc))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800985 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -0800986 if (copy_from_user(&mfc, optval, sizeof(mfc)))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800987 return -EFAULT;
988 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -0800989 if (optname == MRT_DEL_MFC)
Stephen Hemminger132adf52007-03-08 20:44:43 -0800990 ret = ipmr_mfc_delete(&mfc);
991 else
Benjamin Thery70a269e2009-01-22 04:56:15 +0000992 ret = ipmr_mfc_add(&mfc, sk == init_net.ipv4.mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800993 rtnl_unlock();
994 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995 /*
996 * Control PIM assert.
997 */
Stephen Hemminger132adf52007-03-08 20:44:43 -0800998 case MRT_ASSERT:
999 {
1000 int v;
1001 if (get_user(v,(int __user *)optval))
1002 return -EFAULT;
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001003 init_net.ipv4.mroute_do_assert = (v) ? 1 : 0;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001004 return 0;
1005 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006#ifdef CONFIG_IP_PIMSM
Stephen Hemminger132adf52007-03-08 20:44:43 -08001007 case MRT_PIM:
1008 {
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001009 int v;
1010
Stephen Hemminger132adf52007-03-08 20:44:43 -08001011 if (get_user(v,(int __user *)optval))
1012 return -EFAULT;
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001013 v = (v) ? 1 : 0;
1014
Stephen Hemminger132adf52007-03-08 20:44:43 -08001015 rtnl_lock();
1016 ret = 0;
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001017 if (v != init_net.ipv4.mroute_do_pim) {
1018 init_net.ipv4.mroute_do_pim = v;
1019 init_net.ipv4.mroute_do_assert = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020#ifdef CONFIG_IP_PIMSM_V2
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001021 if (init_net.ipv4.mroute_do_pim)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001022 ret = inet_add_protocol(&pim_protocol,
1023 IPPROTO_PIM);
1024 else
1025 ret = inet_del_protocol(&pim_protocol,
1026 IPPROTO_PIM);
1027 if (ret < 0)
1028 ret = -EAGAIN;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030 }
Stephen Hemminger132adf52007-03-08 20:44:43 -08001031 rtnl_unlock();
1032 return ret;
1033 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034#endif
Stephen Hemminger132adf52007-03-08 20:44:43 -08001035 /*
1036 * Spurious command, or MRT_VERSION which you cannot
1037 * set.
1038 */
1039 default:
1040 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041 }
1042}
1043
1044/*
1045 * Getsock opt support for the multicast routing system.
1046 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001047
Jianjun Kongc354e122008-11-03 00:28:02 -08001048int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049{
1050 int olr;
1051 int val;
1052
Jianjun Kongc354e122008-11-03 00:28:02 -08001053 if (optname != MRT_VERSION &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054#ifdef CONFIG_IP_PIMSM
1055 optname!=MRT_PIM &&
1056#endif
1057 optname!=MRT_ASSERT)
1058 return -ENOPROTOOPT;
1059
1060 if (get_user(olr, optlen))
1061 return -EFAULT;
1062
1063 olr = min_t(unsigned int, olr, sizeof(int));
1064 if (olr < 0)
1065 return -EINVAL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001066
Jianjun Kongc354e122008-11-03 00:28:02 -08001067 if (put_user(olr, optlen))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 return -EFAULT;
Jianjun Kongc354e122008-11-03 00:28:02 -08001069 if (optname == MRT_VERSION)
1070 val = 0x0305;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071#ifdef CONFIG_IP_PIMSM
Jianjun Kongc354e122008-11-03 00:28:02 -08001072 else if (optname == MRT_PIM)
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001073 val = init_net.ipv4.mroute_do_pim;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074#endif
1075 else
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001076 val = init_net.ipv4.mroute_do_assert;
Jianjun Kongc354e122008-11-03 00:28:02 -08001077 if (copy_to_user(optval, &val, olr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 return -EFAULT;
1079 return 0;
1080}
1081
1082/*
1083 * The IP multicast ioctl support routines.
1084 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001085
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1087{
1088 struct sioc_sg_req sr;
1089 struct sioc_vif_req vr;
1090 struct vif_device *vif;
1091 struct mfc_cache *c;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001092
Stephen Hemminger132adf52007-03-08 20:44:43 -08001093 switch (cmd) {
1094 case SIOCGETVIFCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001095 if (copy_from_user(&vr, arg, sizeof(vr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001096 return -EFAULT;
Benjamin Therycf958ae32009-01-22 04:56:16 +00001097 if (vr.vifi >= init_net.ipv4.maxvif)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001098 return -EINVAL;
1099 read_lock(&mrt_lock);
Benjamin Therycf958ae32009-01-22 04:56:16 +00001100 vif = &init_net.ipv4.vif_table[vr.vifi];
1101 if (VIF_EXISTS(&init_net, vr.vifi)) {
Jianjun Kongc354e122008-11-03 00:28:02 -08001102 vr.icount = vif->pkt_in;
1103 vr.ocount = vif->pkt_out;
1104 vr.ibytes = vif->bytes_in;
1105 vr.obytes = vif->bytes_out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001107
Jianjun Kongc354e122008-11-03 00:28:02 -08001108 if (copy_to_user(arg, &vr, sizeof(vr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109 return -EFAULT;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001110 return 0;
1111 }
1112 read_unlock(&mrt_lock);
1113 return -EADDRNOTAVAIL;
1114 case SIOCGETSGCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001115 if (copy_from_user(&sr, arg, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001116 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117
Stephen Hemminger132adf52007-03-08 20:44:43 -08001118 read_lock(&mrt_lock);
1119 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1120 if (c) {
1121 sr.pktcnt = c->mfc_un.res.pkt;
1122 sr.bytecnt = c->mfc_un.res.bytes;
1123 sr.wrong_if = c->mfc_un.res.wrong_if;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001125
Jianjun Kongc354e122008-11-03 00:28:02 -08001126 if (copy_to_user(arg, &sr, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001127 return -EFAULT;
1128 return 0;
1129 }
1130 read_unlock(&mrt_lock);
1131 return -EADDRNOTAVAIL;
1132 default:
1133 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134 }
1135}
1136
1137
1138static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1139{
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001140 struct net_device *dev = ptr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141 struct vif_device *v;
1142 int ct;
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001143
YOSHIFUJI Hideaki721499e2008-07-19 22:34:43 -07001144 if (!net_eq(dev_net(dev), &init_net))
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001145 return NOTIFY_DONE;
1146
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 if (event != NETDEV_UNREGISTER)
1148 return NOTIFY_DONE;
Benjamin Therycf958ae32009-01-22 04:56:16 +00001149 v = &init_net.ipv4.vif_table[0];
1150 for (ct = 0; ct < init_net.ipv4.maxvif; ct++, v++) {
Jianjun Kongc354e122008-11-03 00:28:02 -08001151 if (v->dev == dev)
Wang Chen7dc00c82008-07-14 20:56:34 -07001152 vif_delete(ct, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153 }
1154 return NOTIFY_DONE;
1155}
1156
1157
Jianjun Kongc354e122008-11-03 00:28:02 -08001158static struct notifier_block ip_mr_notifier = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159 .notifier_call = ipmr_device_event,
1160};
1161
1162/*
1163 * Encapsulate a packet by attaching a valid IPIP header to it.
1164 * This avoids tunnel drivers and other mess and gives us the speed so
1165 * important for multicast video.
1166 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001167
Al Viro114c7842006-09-27 18:39:29 -07001168static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169{
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001170 struct iphdr *iph;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001171 struct iphdr *old_iph = ip_hdr(skb);
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001172
1173 skb_push(skb, sizeof(struct iphdr));
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001174 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001175 skb_reset_network_header(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001176 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177
1178 iph->version = 4;
Arnaldo Carvalho de Meloe023dd62007-03-12 20:09:36 -03001179 iph->tos = old_iph->tos;
1180 iph->ttl = old_iph->ttl;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181 iph->frag_off = 0;
1182 iph->daddr = daddr;
1183 iph->saddr = saddr;
1184 iph->protocol = IPPROTO_IPIP;
1185 iph->ihl = 5;
1186 iph->tot_len = htons(skb->len);
1187 ip_select_ident(iph, skb->dst, NULL);
1188 ip_send_check(iph);
1189
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1191 nf_reset(skb);
1192}
1193
1194static inline int ipmr_forward_finish(struct sk_buff *skb)
1195{
1196 struct ip_options * opt = &(IPCB(skb)->opt);
1197
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -07001198 IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199
1200 if (unlikely(opt->optlen))
1201 ip_forward_options(skb);
1202
1203 return dst_output(skb);
1204}
1205
1206/*
1207 * Processing handlers for ipmr_forward
1208 */
1209
1210static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1211{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001212 const struct iphdr *iph = ip_hdr(skb);
Benjamin Therycf958ae32009-01-22 04:56:16 +00001213 struct vif_device *vif = &init_net.ipv4.vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214 struct net_device *dev;
1215 struct rtable *rt;
1216 int encap = 0;
1217
1218 if (vif->dev == NULL)
1219 goto out_free;
1220
1221#ifdef CONFIG_IP_PIMSM
1222 if (vif->flags & VIFF_REGISTER) {
1223 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001224 vif->bytes_out += skb->len;
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001225 vif->dev->stats.tx_bytes += skb->len;
1226 vif->dev->stats.tx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1228 kfree_skb(skb);
1229 return;
1230 }
1231#endif
1232
1233 if (vif->flags&VIFF_TUNNEL) {
1234 struct flowi fl = { .oif = vif->link,
1235 .nl_u = { .ip4_u =
1236 { .daddr = vif->remote,
1237 .saddr = vif->local,
1238 .tos = RT_TOS(iph->tos) } },
1239 .proto = IPPROTO_IPIP };
Denis V. Lunevf2063512008-01-22 22:07:34 -08001240 if (ip_route_output_key(&init_net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241 goto out_free;
1242 encap = sizeof(struct iphdr);
1243 } else {
1244 struct flowi fl = { .oif = vif->link,
1245 .nl_u = { .ip4_u =
1246 { .daddr = iph->daddr,
1247 .tos = RT_TOS(iph->tos) } },
1248 .proto = IPPROTO_IPIP };
Denis V. Lunevf2063512008-01-22 22:07:34 -08001249 if (ip_route_output_key(&init_net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250 goto out_free;
1251 }
1252
1253 dev = rt->u.dst.dev;
1254
1255 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1256 /* Do not fragment multicasts. Alas, IPv4 does not
1257 allow to send ICMP, so that packets will disappear
1258 to blackhole.
1259 */
1260
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -07001261 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262 ip_rt_put(rt);
1263 goto out_free;
1264 }
1265
1266 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1267
1268 if (skb_cow(skb, encap)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001269 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 goto out_free;
1271 }
1272
1273 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001274 vif->bytes_out += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275
1276 dst_release(skb->dst);
1277 skb->dst = &rt->u.dst;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001278 ip_decrease_ttl(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279
1280 /* FIXME: forward and output firewalls used to be called here.
1281 * What do we do with netfilter? -- RR */
1282 if (vif->flags & VIFF_TUNNEL) {
1283 ip_encap(skb, vif->local, vif->remote);
1284 /* FIXME: extra output firewall step used to be here. --RR */
Pavel Emelyanov2f4c02d2008-05-21 14:16:14 -07001285 vif->dev->stats.tx_packets++;
1286 vif->dev->stats.tx_bytes += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287 }
1288
1289 IPCB(skb)->flags |= IPSKB_FORWARDED;
1290
1291 /*
1292 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1293 * not only before forwarding, but after forwarding on all output
1294 * interfaces. It is clear, if mrouter runs a multicasting
1295 * program, it should receive packets not depending to what interface
1296 * program is joined.
1297 * If we will not make it, the program will have to join on all
1298 * interfaces. On the other hand, multihoming host (or router, but
1299 * not mrouter) cannot join to more than one interface - it will
1300 * result in receiving multiple packets.
1301 */
Patrick McHardy6e23ae22007-11-19 18:53:30 -08001302 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303 ipmr_forward_finish);
1304 return;
1305
1306out_free:
1307 kfree_skb(skb);
1308 return;
1309}
1310
1311static int ipmr_find_vif(struct net_device *dev)
1312{
1313 int ct;
Benjamin Therycf958ae32009-01-22 04:56:16 +00001314 for (ct = init_net.ipv4.maxvif-1; ct >= 0; ct--) {
1315 if (init_net.ipv4.vif_table[ct].dev == dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316 break;
1317 }
1318 return ct;
1319}
1320
1321/* "local" means that we should preserve one skb (for local delivery) */
1322
1323static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1324{
1325 int psend = -1;
1326 int vif, ct;
1327
1328 vif = cache->mfc_parent;
1329 cache->mfc_un.res.pkt++;
1330 cache->mfc_un.res.bytes += skb->len;
1331
1332 /*
1333 * Wrong interface: drop packet and (maybe) send PIM assert.
1334 */
Benjamin Therycf958ae32009-01-22 04:56:16 +00001335 if (init_net.ipv4.vif_table[vif].dev != skb->dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336 int true_vifi;
1337
Eric Dumazetee6b9672008-03-05 18:30:47 -08001338 if (skb->rtable->fl.iif == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339 /* It is our own packet, looped back.
1340 Very complicated situation...
1341
1342 The best workaround until routing daemons will be
1343 fixed is not to redistribute packet, if it was
1344 send through wrong interface. It means, that
1345 multicast applications WILL NOT work for
1346 (S,G), which have default multicast route pointing
1347 to wrong oif. In any case, it is not a good
1348 idea to use multicasting applications on router.
1349 */
1350 goto dont_forward;
1351 }
1352
1353 cache->mfc_un.res.wrong_if++;
1354 true_vifi = ipmr_find_vif(skb->dev);
1355
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001356 if (true_vifi >= 0 && init_net.ipv4.mroute_do_assert &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357 /* pimsm uses asserts, when switching from RPT to SPT,
1358 so that we cannot check that packet arrived on an oif.
1359 It is bad, but otherwise we would need to move pretty
1360 large chunk of pimd to kernel. Ough... --ANK
1361 */
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001362 (init_net.ipv4.mroute_do_pim ||
1363 cache->mfc_un.res.ttls[true_vifi] < 255) &&
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001364 time_after(jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1366 cache->mfc_un.res.last_assert = jiffies;
1367 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1368 }
1369 goto dont_forward;
1370 }
1371
Benjamin Therycf958ae32009-01-22 04:56:16 +00001372 init_net.ipv4.vif_table[vif].pkt_in++;
1373 init_net.ipv4.vif_table[vif].bytes_in += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374
1375 /*
1376 * Forward the frame
1377 */
1378 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001379 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 if (psend != -1) {
1381 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1382 if (skb2)
1383 ipmr_queue_xmit(skb2, cache, psend);
1384 }
Jianjun Kongc354e122008-11-03 00:28:02 -08001385 psend = ct;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001386 }
1387 }
1388 if (psend != -1) {
1389 if (local) {
1390 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1391 if (skb2)
1392 ipmr_queue_xmit(skb2, cache, psend);
1393 } else {
1394 ipmr_queue_xmit(skb, cache, psend);
1395 return 0;
1396 }
1397 }
1398
1399dont_forward:
1400 if (!local)
1401 kfree_skb(skb);
1402 return 0;
1403}
1404
1405
1406/*
1407 * Multicast packets for forwarding arrive here
1408 */
1409
1410int ip_mr_input(struct sk_buff *skb)
1411{
1412 struct mfc_cache *cache;
Eric Dumazetee6b9672008-03-05 18:30:47 -08001413 int local = skb->rtable->rt_flags&RTCF_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414
1415 /* Packet is looped back after forward, it should not be
1416 forwarded second time, but still can be delivered locally.
1417 */
1418 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1419 goto dont_forward;
1420
1421 if (!local) {
1422 if (IPCB(skb)->opt.router_alert) {
1423 if (ip_call_ra_chain(skb))
1424 return 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001425 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426 /* IGMPv1 (and broken IGMPv2 implementations sort of
1427 Cisco IOS <= 11.2(8)) do not put router alert
1428 option to IGMP packets destined to routable
1429 groups. It is very bad, because it means
1430 that we can forward NO IGMP messages.
1431 */
1432 read_lock(&mrt_lock);
Benjamin Thery70a269e2009-01-22 04:56:15 +00001433 if (init_net.ipv4.mroute_sk) {
Patrick McHardy2715bcf2005-06-21 14:06:24 -07001434 nf_reset(skb);
Benjamin Thery70a269e2009-01-22 04:56:15 +00001435 raw_rcv(init_net.ipv4.mroute_sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001436 read_unlock(&mrt_lock);
1437 return 0;
1438 }
1439 read_unlock(&mrt_lock);
1440 }
1441 }
1442
1443 read_lock(&mrt_lock);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001444 cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445
1446 /*
1447 * No usable cache entry
1448 */
Jianjun Kongc354e122008-11-03 00:28:02 -08001449 if (cache == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450 int vif;
1451
1452 if (local) {
1453 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1454 ip_local_deliver(skb);
1455 if (skb2 == NULL) {
1456 read_unlock(&mrt_lock);
1457 return -ENOBUFS;
1458 }
1459 skb = skb2;
1460 }
1461
1462 vif = ipmr_find_vif(skb->dev);
1463 if (vif >= 0) {
1464 int err = ipmr_cache_unresolved(vif, skb);
1465 read_unlock(&mrt_lock);
1466
1467 return err;
1468 }
1469 read_unlock(&mrt_lock);
1470 kfree_skb(skb);
1471 return -ENODEV;
1472 }
1473
1474 ip_mr_forward(skb, cache, local);
1475
1476 read_unlock(&mrt_lock);
1477
1478 if (local)
1479 return ip_local_deliver(skb);
1480
1481 return 0;
1482
1483dont_forward:
1484 if (local)
1485 return ip_local_deliver(skb);
1486 kfree_skb(skb);
1487 return 0;
1488}
1489
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001490#ifdef CONFIG_IP_PIMSM
1491static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492{
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001493 struct net_device *reg_dev = NULL;
1494 struct iphdr *encap;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001496 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497 /*
1498 Check that:
1499 a. packet is really destinted to a multicast group
1500 b. packet is not a NULL-REGISTER
1501 c. packet is not truncated
1502 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001503 if (!ipv4_is_multicast(encap->daddr) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504 encap->tot_len == 0 ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001505 ntohs(encap->tot_len) + pimlen > skb->len)
1506 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507
1508 read_lock(&mrt_lock);
1509 if (reg_vif_num >= 0)
Benjamin Therycf958ae32009-01-22 04:56:16 +00001510 reg_dev = init_net.ipv4.vif_table[reg_vif_num].dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511 if (reg_dev)
1512 dev_hold(reg_dev);
1513 read_unlock(&mrt_lock);
1514
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001515 if (reg_dev == NULL)
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001516 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001518 skb->mac_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519 skb_pull(skb, (u8*)encap - skb->data);
Arnaldo Carvalho de Melo31c77112007-03-10 19:04:55 -03001520 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521 skb->dev = reg_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001522 skb->protocol = htons(ETH_P_IP);
1523 skb->ip_summed = 0;
1524 skb->pkt_type = PACKET_HOST;
1525 dst_release(skb->dst);
1526 skb->dst = NULL;
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001527 reg_dev->stats.rx_bytes += skb->len;
1528 reg_dev->stats.rx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529 nf_reset(skb);
1530 netif_rx(skb);
1531 dev_put(reg_dev);
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001532
Linus Torvalds1da177e2005-04-16 15:20:36 -07001533 return 0;
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001534}
1535#endif
1536
1537#ifdef CONFIG_IP_PIMSM_V1
1538/*
1539 * Handle IGMP messages of PIMv1
1540 */
1541
1542int pim_rcv_v1(struct sk_buff * skb)
1543{
1544 struct igmphdr *pim;
1545
1546 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1547 goto drop;
1548
1549 pim = igmp_hdr(skb);
1550
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001551 if (!init_net.ipv4.mroute_do_pim ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001552 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1553 goto drop;
1554
1555 if (__pim_rcv(skb, sizeof(*pim))) {
1556drop:
1557 kfree_skb(skb);
1558 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001559 return 0;
1560}
1561#endif
1562
1563#ifdef CONFIG_IP_PIMSM_V2
1564static int pim_rcv(struct sk_buff * skb)
1565{
1566 struct pimreghdr *pim;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001568 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569 goto drop;
1570
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001571 pim = (struct pimreghdr *)skb_transport_header(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001572 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573 (pim->flags&PIM_NULL_REGISTER) ||
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001574 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
Al Virod3bc23e2006-11-14 21:24:49 -08001575 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576 goto drop;
1577
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001578 if (__pim_rcv(skb, sizeof(*pim))) {
1579drop:
1580 kfree_skb(skb);
1581 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001582 return 0;
1583}
1584#endif
1585
1586static int
1587ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1588{
1589 int ct;
1590 struct rtnexthop *nhp;
Benjamin Therycf958ae32009-01-22 04:56:16 +00001591 struct net_device *dev = init_net.ipv4.vif_table[c->mfc_parent].dev;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001592 u8 *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593 struct rtattr *mp_head;
1594
1595 if (dev)
1596 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1597
Jianjun Kongc354e122008-11-03 00:28:02 -08001598 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599
1600 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1601 if (c->mfc_un.res.ttls[ct] < 255) {
1602 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1603 goto rtattr_failure;
Jianjun Kongc354e122008-11-03 00:28:02 -08001604 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001605 nhp->rtnh_flags = 0;
1606 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
Benjamin Therycf958ae32009-01-22 04:56:16 +00001607 nhp->rtnh_ifindex = init_net.ipv4.vif_table[ct].dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001608 nhp->rtnh_len = sizeof(*nhp);
1609 }
1610 }
1611 mp_head->rta_type = RTA_MULTIPATH;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001612 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613 rtm->rtm_type = RTN_MULTICAST;
1614 return 1;
1615
1616rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001617 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618 return -EMSGSIZE;
1619}
1620
1621int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1622{
1623 int err;
1624 struct mfc_cache *cache;
Eric Dumazetee6b9672008-03-05 18:30:47 -08001625 struct rtable *rt = skb->rtable;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626
1627 read_lock(&mrt_lock);
1628 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1629
Jianjun Kongc354e122008-11-03 00:28:02 -08001630 if (cache == NULL) {
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001631 struct sk_buff *skb2;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001632 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633 struct net_device *dev;
1634 int vif;
1635
1636 if (nowait) {
1637 read_unlock(&mrt_lock);
1638 return -EAGAIN;
1639 }
1640
1641 dev = skb->dev;
1642 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1643 read_unlock(&mrt_lock);
1644 return -ENODEV;
1645 }
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001646 skb2 = skb_clone(skb, GFP_ATOMIC);
1647 if (!skb2) {
1648 read_unlock(&mrt_lock);
1649 return -ENOMEM;
1650 }
1651
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -07001652 skb_push(skb2, sizeof(struct iphdr));
1653 skb_reset_network_header(skb2);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001654 iph = ip_hdr(skb2);
1655 iph->ihl = sizeof(struct iphdr) >> 2;
1656 iph->saddr = rt->rt_src;
1657 iph->daddr = rt->rt_dst;
1658 iph->version = 0;
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001659 err = ipmr_cache_unresolved(vif, skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660 read_unlock(&mrt_lock);
1661 return err;
1662 }
1663
1664 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1665 cache->mfc_flags |= MFC_NOTIFY;
1666 err = ipmr_fill_mroute(skb, cache, rtm);
1667 read_unlock(&mrt_lock);
1668 return err;
1669}
1670
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001671#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672/*
1673 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1674 */
1675struct ipmr_vif_iter {
1676 int ct;
1677};
1678
1679static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1680 loff_t pos)
1681{
Benjamin Therycf958ae32009-01-22 04:56:16 +00001682 for (iter->ct = 0; iter->ct < init_net.ipv4.maxvif; ++iter->ct) {
1683 if (!VIF_EXISTS(&init_net, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 continue;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001685 if (pos-- == 0)
Benjamin Therycf958ae32009-01-22 04:56:16 +00001686 return &init_net.ipv4.vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687 }
1688 return NULL;
1689}
1690
1691static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001692 __acquires(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693{
1694 read_lock(&mrt_lock);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001695 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696 : SEQ_START_TOKEN;
1697}
1698
1699static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1700{
1701 struct ipmr_vif_iter *iter = seq->private;
1702
1703 ++*pos;
1704 if (v == SEQ_START_TOKEN)
1705 return ipmr_vif_seq_idx(iter, 0);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001706
Benjamin Therycf958ae32009-01-22 04:56:16 +00001707 while (++iter->ct < init_net.ipv4.maxvif) {
1708 if (!VIF_EXISTS(&init_net, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709 continue;
Benjamin Therycf958ae32009-01-22 04:56:16 +00001710 return &init_net.ipv4.vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 }
1712 return NULL;
1713}
1714
1715static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001716 __releases(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717{
1718 read_unlock(&mrt_lock);
1719}
1720
1721static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1722{
1723 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001724 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1726 } else {
1727 const struct vif_device *vif = v;
1728 const char *name = vif->dev ? vif->dev->name : "none";
1729
1730 seq_printf(seq,
1731 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
Benjamin Therycf958ae32009-01-22 04:56:16 +00001732 vif - init_net.ipv4.vif_table,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001733 name, vif->bytes_in, vif->pkt_in,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734 vif->bytes_out, vif->pkt_out,
1735 vif->flags, vif->local, vif->remote);
1736 }
1737 return 0;
1738}
1739
Stephen Hemmingerf6908082007-03-12 14:34:29 -07001740static const struct seq_operations ipmr_vif_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741 .start = ipmr_vif_seq_start,
1742 .next = ipmr_vif_seq_next,
1743 .stop = ipmr_vif_seq_stop,
1744 .show = ipmr_vif_seq_show,
1745};
1746
1747static int ipmr_vif_open(struct inode *inode, struct file *file)
1748{
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001749 return seq_open_private(file, &ipmr_vif_seq_ops,
1750 sizeof(struct ipmr_vif_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001751}
1752
Arjan van de Ven9a321442007-02-12 00:55:35 -08001753static const struct file_operations ipmr_vif_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001754 .owner = THIS_MODULE,
1755 .open = ipmr_vif_open,
1756 .read = seq_read,
1757 .llseek = seq_lseek,
1758 .release = seq_release_private,
1759};
1760
1761struct ipmr_mfc_iter {
1762 struct mfc_cache **cache;
1763 int ct;
1764};
1765
1766
1767static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1768{
1769 struct mfc_cache *mfc;
1770
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001771 it->cache = init_net.ipv4.mfc_cache_array;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772 read_lock(&mrt_lock);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001773 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001774 for (mfc = init_net.ipv4.mfc_cache_array[it->ct];
1775 mfc; mfc = mfc->next)
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001776 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777 return mfc;
1778 read_unlock(&mrt_lock);
1779
1780 it->cache = &mfc_unres_queue;
1781 spin_lock_bh(&mfc_unres_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001782 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783 if (pos-- == 0)
1784 return mfc;
1785 spin_unlock_bh(&mfc_unres_lock);
1786
1787 it->cache = NULL;
1788 return NULL;
1789}
1790
1791
1792static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1793{
1794 struct ipmr_mfc_iter *it = seq->private;
1795 it->cache = NULL;
1796 it->ct = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001797 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798 : SEQ_START_TOKEN;
1799}
1800
1801static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1802{
1803 struct mfc_cache *mfc = v;
1804 struct ipmr_mfc_iter *it = seq->private;
1805
1806 ++*pos;
1807
1808 if (v == SEQ_START_TOKEN)
1809 return ipmr_mfc_seq_idx(seq->private, 0);
1810
1811 if (mfc->next)
1812 return mfc->next;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001813
1814 if (it->cache == &mfc_unres_queue)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815 goto end_of_list;
1816
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001817 BUG_ON(it->cache != init_net.ipv4.mfc_cache_array);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001818
1819 while (++it->ct < MFC_LINES) {
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001820 mfc = init_net.ipv4.mfc_cache_array[it->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821 if (mfc)
1822 return mfc;
1823 }
1824
1825 /* exhausted cache_array, show unresolved */
1826 read_unlock(&mrt_lock);
1827 it->cache = &mfc_unres_queue;
1828 it->ct = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001829
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830 spin_lock_bh(&mfc_unres_lock);
1831 mfc = mfc_unres_queue;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001832 if (mfc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833 return mfc;
1834
1835 end_of_list:
1836 spin_unlock_bh(&mfc_unres_lock);
1837 it->cache = NULL;
1838
1839 return NULL;
1840}
1841
1842static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1843{
1844 struct ipmr_mfc_iter *it = seq->private;
1845
1846 if (it->cache == &mfc_unres_queue)
1847 spin_unlock_bh(&mfc_unres_lock);
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001848 else if (it->cache == init_net.ipv4.mfc_cache_array)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001849 read_unlock(&mrt_lock);
1850}
1851
1852static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1853{
1854 int n;
1855
1856 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001857 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001858 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1859 } else {
1860 const struct mfc_cache *mfc = v;
1861 const struct ipmr_mfc_iter *it = seq->private;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001862
Benjamin Thery999890b2008-12-03 22:22:16 -08001863 seq_printf(seq, "%08lX %08lX %-3hd",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864 (unsigned long) mfc->mfc_mcastgrp,
1865 (unsigned long) mfc->mfc_origin,
Benjamin Thery1ea472e2008-12-03 22:21:47 -08001866 mfc->mfc_parent);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867
1868 if (it->cache != &mfc_unres_queue) {
Benjamin Thery1ea472e2008-12-03 22:21:47 -08001869 seq_printf(seq, " %8lu %8lu %8lu",
1870 mfc->mfc_un.res.pkt,
1871 mfc->mfc_un.res.bytes,
1872 mfc->mfc_un.res.wrong_if);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001873 for (n = mfc->mfc_un.res.minvif;
1874 n < mfc->mfc_un.res.maxvif; n++ ) {
Benjamin Therycf958ae32009-01-22 04:56:16 +00001875 if (VIF_EXISTS(&init_net, n) &&
1876 mfc->mfc_un.res.ttls[n] < 255)
1877 seq_printf(seq,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001878 " %2d:%-3d",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001879 n, mfc->mfc_un.res.ttls[n]);
1880 }
Benjamin Thery1ea472e2008-12-03 22:21:47 -08001881 } else {
1882 /* unresolved mfc_caches don't contain
1883 * pkt, bytes and wrong_if values
1884 */
1885 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001886 }
1887 seq_putc(seq, '\n');
1888 }
1889 return 0;
1890}
1891
Stephen Hemmingerf6908082007-03-12 14:34:29 -07001892static const struct seq_operations ipmr_mfc_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893 .start = ipmr_mfc_seq_start,
1894 .next = ipmr_mfc_seq_next,
1895 .stop = ipmr_mfc_seq_stop,
1896 .show = ipmr_mfc_seq_show,
1897};
1898
1899static int ipmr_mfc_open(struct inode *inode, struct file *file)
1900{
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001901 return seq_open_private(file, &ipmr_mfc_seq_ops,
1902 sizeof(struct ipmr_mfc_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001903}
1904
Arjan van de Ven9a321442007-02-12 00:55:35 -08001905static const struct file_operations ipmr_mfc_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001906 .owner = THIS_MODULE,
1907 .open = ipmr_mfc_open,
1908 .read = seq_read,
1909 .llseek = seq_lseek,
1910 .release = seq_release_private,
1911};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001912#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913
1914#ifdef CONFIG_IP_PIMSM_V2
1915static struct net_protocol pim_protocol = {
1916 .handler = pim_rcv,
1917};
1918#endif
1919
1920
1921/*
1922 * Setup for IP multicast routing
1923 */
Benjamin Therycf958ae32009-01-22 04:56:16 +00001924static int __net_init ipmr_net_init(struct net *net)
1925{
1926 int err = 0;
1927
1928 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1929 GFP_KERNEL);
1930 if (!net->ipv4.vif_table) {
1931 err = -ENOMEM;
1932 goto fail;
1933 }
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001934
1935 /* Forwarding cache */
1936 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1937 sizeof(struct mfc_cache *),
1938 GFP_KERNEL);
1939 if (!net->ipv4.mfc_cache_array) {
1940 err = -ENOMEM;
1941 goto fail_mfc_cache;
1942 }
1943 return 0;
1944
1945fail_mfc_cache:
1946 kfree(net->ipv4.vif_table);
Benjamin Therycf958ae32009-01-22 04:56:16 +00001947fail:
1948 return err;
1949}
1950
1951static void __net_exit ipmr_net_exit(struct net *net)
1952{
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001953 kfree(net->ipv4.mfc_cache_array);
Benjamin Therycf958ae32009-01-22 04:56:16 +00001954 kfree(net->ipv4.vif_table);
1955}
1956
1957static struct pernet_operations ipmr_net_ops = {
1958 .init = ipmr_net_init,
1959 .exit = ipmr_net_exit,
1960};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001961
Wang Chen03d2f892008-07-03 12:13:36 +08001962int __init ip_mr_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963{
Wang Chen03d2f892008-07-03 12:13:36 +08001964 int err;
1965
Linus Torvalds1da177e2005-04-16 15:20:36 -07001966 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1967 sizeof(struct mfc_cache),
Alexey Dobriyane5d679f2006-08-26 19:25:52 -07001968 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
Paul Mundt20c2df82007-07-20 10:11:58 +09001969 NULL);
Wang Chen03d2f892008-07-03 12:13:36 +08001970 if (!mrt_cachep)
1971 return -ENOMEM;
1972
Benjamin Therycf958ae32009-01-22 04:56:16 +00001973 err = register_pernet_subsys(&ipmr_net_ops);
1974 if (err)
1975 goto reg_pernet_fail;
1976
Pavel Emelyanovb24b8a22008-01-23 21:20:07 -08001977 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
Wang Chen03d2f892008-07-03 12:13:36 +08001978 err = register_netdevice_notifier(&ip_mr_notifier);
1979 if (err)
1980 goto reg_notif_fail;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001981#ifdef CONFIG_PROC_FS
Wang Chen03d2f892008-07-03 12:13:36 +08001982 err = -ENOMEM;
1983 if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
1984 goto proc_vif_fail;
1985 if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1986 goto proc_cache_fail;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001987#endif
Wang Chen03d2f892008-07-03 12:13:36 +08001988 return 0;
Wang Chen03d2f892008-07-03 12:13:36 +08001989#ifdef CONFIG_PROC_FS
Wang Chen03d2f892008-07-03 12:13:36 +08001990proc_cache_fail:
1991 proc_net_remove(&init_net, "ip_mr_vif");
Benjamin Theryc3e38892008-11-19 14:07:41 -08001992proc_vif_fail:
1993 unregister_netdevice_notifier(&ip_mr_notifier);
Wang Chen03d2f892008-07-03 12:13:36 +08001994#endif
Benjamin Theryc3e38892008-11-19 14:07:41 -08001995reg_notif_fail:
1996 del_timer(&ipmr_expire_timer);
Benjamin Therycf958ae32009-01-22 04:56:16 +00001997 unregister_pernet_subsys(&ipmr_net_ops);
1998reg_pernet_fail:
Benjamin Theryc3e38892008-11-19 14:07:41 -08001999 kmem_cache_destroy(mrt_cachep);
Wang Chen03d2f892008-07-03 12:13:36 +08002000 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002001}