blob: fa014d701c1addc87491077a9da31ea932b3cea3 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070025 * Ville Nuorvala
26 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070027 */
28
Randy Dunlap4fc268d2006-01-11 12:17:47 -080029#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080043#include <linux/nsproxy.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020044#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080075#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
Linus Torvalds1da177e2005-04-16 15:20:36 -070077static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080083static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080090#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080091static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080093 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080095static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080097 struct in6_addr *gwaddr, int ifindex);
98#endif
99
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800100static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101 .family = AF_INET6,
102 .protocol = __constant_htons(ETH_P_IPV6),
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu862b82c2007-11-13 21:43:11 -0800111 .local_out = ip6_local_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112 .entry_size = sizeof(struct rt6_info),
Eric Dumazete2422972008-01-30 20:07:45 -0800113 .entries = ATOMIC_INIT(0),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114};
115
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800116static struct dst_ops *ip6_dst_ops;
117
David S. Miller14e50e52007-05-24 18:17:54 -0700118static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
119{
120}
121
122static struct dst_ops ip6_dst_blackhole_ops = {
123 .family = AF_INET6,
124 .protocol = __constant_htons(ETH_P_IPV6),
125 .destroy = ip6_dst_destroy,
126 .check = ip6_dst_check,
127 .update_pmtu = ip6_rt_blackhole_update_pmtu,
128 .entry_size = sizeof(struct rt6_info),
Eric Dumazete2422972008-01-30 20:07:45 -0800129 .entries = ATOMIC_INIT(0),
David S. Miller14e50e52007-05-24 18:17:54 -0700130};
131
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800132static struct rt6_info ip6_null_entry_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133 .u = {
134 .dst = {
135 .__refcnt = ATOMIC_INIT(1),
136 .__use = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137 .obsolete = -1,
138 .error = -ENETUNREACH,
139 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
140 .input = ip6_pkt_discard,
141 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142 }
143 },
144 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
145 .rt6i_metric = ~(u32) 0,
146 .rt6i_ref = ATOMIC_INIT(1),
147};
148
Thomas Graf101367c2006-08-04 03:39:02 -0700149#ifdef CONFIG_IPV6_MULTIPLE_TABLES
150
David S. Miller6723ab52006-10-18 21:20:57 -0700151static int ip6_pkt_prohibit(struct sk_buff *skb);
152static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700153
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800154struct rt6_info ip6_prohibit_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700155 .u = {
156 .dst = {
157 .__refcnt = ATOMIC_INIT(1),
158 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700159 .obsolete = -1,
160 .error = -EACCES,
161 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700162 .input = ip6_pkt_prohibit,
163 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700164 }
165 },
166 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
167 .rt6i_metric = ~(u32) 0,
168 .rt6i_ref = ATOMIC_INIT(1),
169};
170
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800171static struct rt6_info ip6_blk_hole_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700172 .u = {
173 .dst = {
174 .__refcnt = ATOMIC_INIT(1),
175 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700176 .obsolete = -1,
177 .error = -EINVAL,
178 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Herbert Xu352e5122007-11-13 21:34:06 -0800179 .input = dst_discard,
180 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700181 }
182 },
183 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
184 .rt6i_metric = ~(u32) 0,
185 .rt6i_ref = ATOMIC_INIT(1),
186};
187
188#endif
189
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190/* allocate dst with ip6_dst_ops */
191static __inline__ struct rt6_info *ip6_dst_alloc(void)
192{
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800193 return (struct rt6_info *)dst_alloc(ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194}
195
196static void ip6_dst_destroy(struct dst_entry *dst)
197{
198 struct rt6_info *rt = (struct rt6_info *)dst;
199 struct inet6_dev *idev = rt->rt6i_idev;
200
201 if (idev != NULL) {
202 rt->rt6i_idev = NULL;
203 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900204 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205}
206
207static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
208 int how)
209{
210 struct rt6_info *rt = (struct rt6_info *)dst;
211 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800212 struct net_device *loopback_dev =
213 dev->nd_net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800215 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
216 struct inet6_dev *loopback_idev =
217 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218 if (loopback_idev != NULL) {
219 rt->rt6i_idev = loopback_idev;
220 in6_dev_put(idev);
221 }
222 }
223}
224
225static __inline__ int rt6_check_expired(const struct rt6_info *rt)
226{
227 return (rt->rt6i_flags & RTF_EXPIRES &&
228 time_after(jiffies, rt->rt6i_expires));
229}
230
Thomas Grafc71099a2006-08-04 23:20:06 -0700231static inline int rt6_need_strict(struct in6_addr *daddr)
232{
233 return (ipv6_addr_type(daddr) &
234 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
235}
236
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700238 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239 */
240
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800241static inline struct rt6_info *rt6_device_match(struct net *net,
242 struct rt6_info *rt,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 int oif,
244 int strict)
245{
246 struct rt6_info *local = NULL;
247 struct rt6_info *sprt;
248
249 if (oif) {
Eric Dumazet7cc48262007-02-09 16:22:57 -0800250 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251 struct net_device *dev = sprt->rt6i_dev;
252 if (dev->ifindex == oif)
253 return sprt;
254 if (dev->flags & IFF_LOOPBACK) {
255 if (sprt->rt6i_idev == NULL ||
256 sprt->rt6i_idev->dev->ifindex != oif) {
257 if (strict && oif)
258 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900259 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 local->rt6i_idev->dev->ifindex == oif))
261 continue;
262 }
263 local = sprt;
264 }
265 }
266
267 if (local)
268 return local;
269
270 if (strict)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800271 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 }
273 return rt;
274}
275
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800276#ifdef CONFIG_IPV6_ROUTER_PREF
277static void rt6_probe(struct rt6_info *rt)
278{
279 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
280 /*
281 * Okay, this does not seem to be appropriate
282 * for now, however, we need to check if it
283 * is really so; aka Router Reachability Probing.
284 *
285 * Router Reachability Probe MUST be rate-limited
286 * to no more than one per minute.
287 */
288 if (!neigh || (neigh->nud_state & NUD_VALID))
289 return;
290 read_lock_bh(&neigh->lock);
291 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800292 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800293 struct in6_addr mcaddr;
294 struct in6_addr *target;
295
296 neigh->updated = jiffies;
297 read_unlock_bh(&neigh->lock);
298
299 target = (struct in6_addr *)&neigh->primary_key;
300 addrconf_addr_solict_mult(target, &mcaddr);
301 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
302 } else
303 read_unlock_bh(&neigh->lock);
304}
305#else
306static inline void rt6_probe(struct rt6_info *rt)
307{
308 return;
309}
310#endif
311
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800313 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700315static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800317 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700318 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800319 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700320 if ((dev->flags & IFF_LOOPBACK) &&
321 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
322 return 1;
323 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324}
325
Dave Jonesb6f99a22007-03-22 12:27:49 -0700326static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800328 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800329 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700330 if (rt->rt6i_flags & RTF_NONEXTHOP ||
331 !(rt->rt6i_flags & RTF_GATEWAY))
332 m = 1;
333 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800334 read_lock_bh(&neigh->lock);
335 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700336 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800337#ifdef CONFIG_IPV6_ROUTER_PREF
338 else if (neigh->nud_state & NUD_FAILED)
339 m = 0;
340#endif
341 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800342 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800343 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800344 } else
345 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800346 return m;
347}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800349static int rt6_score_route(struct rt6_info *rt, int oif,
350 int strict)
351{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700352 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900353
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700354 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700355 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800356 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800357#ifdef CONFIG_IPV6_ROUTER_PREF
358 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
359#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700360 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800361 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800362 return -1;
363 return m;
364}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365
David S. Millerf11e6652007-03-24 20:36:25 -0700366static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
367 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800368{
David S. Millerf11e6652007-03-24 20:36:25 -0700369 int m;
370
371 if (rt6_check_expired(rt))
372 goto out;
373
374 m = rt6_score_route(rt, oif, strict);
375 if (m < 0)
376 goto out;
377
378 if (m > *mpri) {
379 if (strict & RT6_LOOKUP_F_REACHABLE)
380 rt6_probe(match);
381 *mpri = m;
382 match = rt;
383 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
384 rt6_probe(rt);
385 }
386
387out:
388 return match;
389}
390
391static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
392 struct rt6_info *rr_head,
393 u32 metric, int oif, int strict)
394{
395 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800396 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397
David S. Millerf11e6652007-03-24 20:36:25 -0700398 match = NULL;
399 for (rt = rr_head; rt && rt->rt6i_metric == metric;
400 rt = rt->u.dst.rt6_next)
401 match = find_match(rt, oif, strict, &mpri, match);
402 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
403 rt = rt->u.dst.rt6_next)
404 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800405
David S. Millerf11e6652007-03-24 20:36:25 -0700406 return match;
407}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800408
David S. Millerf11e6652007-03-24 20:36:25 -0700409static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
410{
411 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800412 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413
David S. Millerf11e6652007-03-24 20:36:25 -0700414 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
415 __FUNCTION__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416
David S. Millerf11e6652007-03-24 20:36:25 -0700417 rt0 = fn->rr_ptr;
418 if (!rt0)
419 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420
David S. Millerf11e6652007-03-24 20:36:25 -0700421 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800423 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700424 (strict & RT6_LOOKUP_F_REACHABLE)) {
425 struct rt6_info *next = rt0->u.dst.rt6_next;
426
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800427 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700428 if (!next || next->rt6i_metric != rt0->rt6i_metric)
429 next = fn->leaf;
430
431 if (next != rt0)
432 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 }
434
David S. Millerf11e6652007-03-24 20:36:25 -0700435 RT6_TRACE("%s() => %p\n",
436 __FUNCTION__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800438 net = rt0->rt6i_dev->nd_net;
439 return (match ? match : net->ipv6.ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440}
441
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800442#ifdef CONFIG_IPV6_ROUTE_INFO
443int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
444 struct in6_addr *gwaddr)
445{
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800446 struct net *net = dev->nd_net;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800447 struct route_info *rinfo = (struct route_info *) opt;
448 struct in6_addr prefix_buf, *prefix;
449 unsigned int pref;
450 u32 lifetime;
451 struct rt6_info *rt;
452
453 if (len < sizeof(struct route_info)) {
454 return -EINVAL;
455 }
456
457 /* Sanity check for prefix_len and length */
458 if (rinfo->length > 3) {
459 return -EINVAL;
460 } else if (rinfo->prefix_len > 128) {
461 return -EINVAL;
462 } else if (rinfo->prefix_len > 64) {
463 if (rinfo->length < 2) {
464 return -EINVAL;
465 }
466 } else if (rinfo->prefix_len > 0) {
467 if (rinfo->length < 1) {
468 return -EINVAL;
469 }
470 }
471
472 pref = rinfo->route_pref;
473 if (pref == ICMPV6_ROUTER_PREF_INVALID)
474 pref = ICMPV6_ROUTER_PREF_MEDIUM;
475
Al Viroe69a4ad2006-11-14 20:56:00 -0800476 lifetime = ntohl(rinfo->lifetime);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800477 if (lifetime == 0xffffffff) {
478 /* infinity */
479 } else if (lifetime > 0x7fffffff/HZ) {
480 /* Avoid arithmetic overflow */
481 lifetime = 0x7fffffff/HZ - 1;
482 }
483
484 if (rinfo->length == 3)
485 prefix = (struct in6_addr *)rinfo->prefix;
486 else {
487 /* this function is safe */
488 ipv6_addr_prefix(&prefix_buf,
489 (struct in6_addr *)rinfo->prefix,
490 rinfo->prefix_len);
491 prefix = &prefix_buf;
492 }
493
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800494 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
495 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800496
497 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700498 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800499 rt = NULL;
500 }
501
502 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800503 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800504 pref);
505 else if (rt)
506 rt->rt6i_flags = RTF_ROUTEINFO |
507 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
508
509 if (rt) {
510 if (lifetime == 0xffffffff) {
511 rt->rt6i_flags &= ~RTF_EXPIRES;
512 } else {
513 rt->rt6i_expires = jiffies + HZ * lifetime;
514 rt->rt6i_flags |= RTF_EXPIRES;
515 }
516 dst_release(&rt->u.dst);
517 }
518 return 0;
519}
520#endif
521
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800522#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700523do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800524 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700525 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700526 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700527 if (fn->fn_flags & RTN_TL_ROOT) \
528 goto out; \
529 pn = fn->parent; \
530 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800531 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700532 else \
533 fn = pn; \
534 if (fn->fn_flags & RTN_RTINFO) \
535 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700536 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700537 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700538} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700539
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800540static struct rt6_info *ip6_pol_route_lookup(struct net *net,
541 struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700542 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543{
544 struct fib6_node *fn;
545 struct rt6_info *rt;
546
Thomas Grafc71099a2006-08-04 23:20:06 -0700547 read_lock_bh(&table->tb6_lock);
548 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
549restart:
550 rt = fn->leaf;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800551 rt = rt6_device_match(net, rt, fl->oif, flags);
552 BACKTRACK(net, &fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700553out:
Pavel Emelyanov03f49f32007-11-10 21:28:34 -0800554 dst_use(&rt->u.dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700555 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700556 return rt;
557
558}
559
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800560struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr,
561 struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700562{
563 struct flowi fl = {
564 .oif = oif,
565 .nl_u = {
566 .ip6_u = {
567 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700568 },
569 },
570 };
571 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700572 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700573
Thomas Grafadaa70b2006-10-13 15:01:03 -0700574 if (saddr) {
575 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
576 flags |= RT6_LOOKUP_F_HAS_SADDR;
577 }
578
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800579 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700580 if (dst->error == 0)
581 return (struct rt6_info *) dst;
582
583 dst_release(dst);
584
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585 return NULL;
586}
587
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900588EXPORT_SYMBOL(rt6_lookup);
589
Thomas Grafc71099a2006-08-04 23:20:06 -0700590/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 It takes new route entry, the addition fails by any reason the
592 route is freed. In any case, if caller does not hold it, it may
593 be destroyed.
594 */
595
Thomas Graf86872cb2006-08-22 00:01:08 -0700596static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597{
598 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700599 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600
Thomas Grafc71099a2006-08-04 23:20:06 -0700601 table = rt->rt6i_table;
602 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700603 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700604 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605
606 return err;
607}
608
Thomas Graf40e22e82006-08-22 00:00:45 -0700609int ip6_ins_rt(struct rt6_info *rt)
610{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800611 struct nl_info info = {
Daniel Lezcano55786892008-03-04 13:47:47 -0800612 .nl_net = rt->rt6i_dev->nd_net,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800613 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800614 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700615}
616
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800617static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
618 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 struct rt6_info *rt;
621
622 /*
623 * Clone the route.
624 */
625
626 rt = ip6_rt_copy(ort);
627
628 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900629 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
630 if (rt->rt6i_dst.plen != 128 &&
631 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
632 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900634 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900636 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637 rt->rt6i_dst.plen = 128;
638 rt->rt6i_flags |= RTF_CACHE;
639 rt->u.dst.flags |= DST_HOST;
640
641#ifdef CONFIG_IPV6_SUBTREES
642 if (rt->rt6i_src.plen && saddr) {
643 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
644 rt->rt6i_src.plen = 128;
645 }
646#endif
647
648 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
649
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800650 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800652 return rt;
653}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800655static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
656{
657 struct rt6_info *rt = ip6_rt_copy(ort);
658 if (rt) {
659 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
660 rt->rt6i_dst.plen = 128;
661 rt->rt6i_flags |= RTF_CACHE;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800662 rt->u.dst.flags |= DST_HOST;
663 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
664 }
665 return rt;
666}
667
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800668static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
669 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670{
671 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800672 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700673 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800675 int err;
YOSHIFUJI Hideakiea659e02006-11-06 09:45:45 -0800676 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700678 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679
680relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700681 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800683restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700684 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685
686restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700687 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800688
689 BACKTRACK(net, &fl->fl6_src);
690 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800691 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef042006-03-20 17:01:24 -0800692 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800694 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700695 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800696
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800697 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800698 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800699 else {
700#if CLONE_OFFLINK_ROUTE
701 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
702#else
703 goto out2;
704#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800706
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800707 dst_release(&rt->u.dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800708 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800709
710 dst_hold(&rt->u.dst);
711 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700712 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800713 if (!err)
714 goto out2;
715 }
716
717 if (--attempts <= 0)
718 goto out2;
719
720 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700721 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800722 * released someone could insert this route. Relookup.
723 */
724 dst_release(&rt->u.dst);
725 goto relookup;
726
727out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800728 if (reachable) {
729 reachable = 0;
730 goto restart_2;
731 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800732 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700733 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734out2:
735 rt->u.dst.lastuse = jiffies;
736 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700737
738 return rt;
739}
740
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800741static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700742 struct flowi *fl, int flags)
743{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800744 return ip6_pol_route(net, table, fl->iif, fl, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700745}
746
Thomas Grafc71099a2006-08-04 23:20:06 -0700747void ip6_route_input(struct sk_buff *skb)
748{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700749 struct ipv6hdr *iph = ipv6_hdr(skb);
Daniel Lezcano55786892008-03-04 13:47:47 -0800750 struct net *net = skb->dev->nd_net;
Thomas Grafadaa70b2006-10-13 15:01:03 -0700751 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700752 struct flowi fl = {
753 .iif = skb->dev->ifindex,
754 .nl_u = {
755 .ip6_u = {
756 .daddr = iph->daddr,
757 .saddr = iph->saddr,
Al Viro90bcaf72006-11-08 00:25:17 -0800758 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
Thomas Grafc71099a2006-08-04 23:20:06 -0700759 },
760 },
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900761 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700762 .proto = iph->nexthdr,
763 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700764
765 if (rt6_need_strict(&iph->daddr))
766 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700767
Daniel Lezcano55786892008-03-04 13:47:47 -0800768 skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
Thomas Grafc71099a2006-08-04 23:20:06 -0700769}
770
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800771static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700772 struct flowi *fl, int flags)
773{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800774 return ip6_pol_route(net, table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700775}
776
777struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
778{
779 int flags = 0;
780
781 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700782 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700783
Thomas Grafadaa70b2006-10-13 15:01:03 -0700784 if (!ipv6_addr_any(&fl->fl6_src))
785 flags |= RT6_LOOKUP_F_HAS_SADDR;
786
Daniel Lezcano58f09b72008-03-03 23:25:27 -0800787 return fib6_rule_lookup(&init_net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788}
789
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900790EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791
David S. Miller14e50e52007-05-24 18:17:54 -0700792int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
793{
794 struct rt6_info *ort = (struct rt6_info *) *dstp;
795 struct rt6_info *rt = (struct rt6_info *)
796 dst_alloc(&ip6_dst_blackhole_ops);
797 struct dst_entry *new = NULL;
798
799 if (rt) {
800 new = &rt->u.dst;
801
802 atomic_set(&new->__refcnt, 1);
803 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800804 new->input = dst_discard;
805 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700806
807 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
808 new->dev = ort->u.dst.dev;
809 if (new->dev)
810 dev_hold(new->dev);
811 rt->rt6i_idev = ort->rt6i_idev;
812 if (rt->rt6i_idev)
813 in6_dev_hold(rt->rt6i_idev);
814 rt->rt6i_expires = 0;
815
816 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
817 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
818 rt->rt6i_metric = 0;
819
820 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
821#ifdef CONFIG_IPV6_SUBTREES
822 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
823#endif
824
825 dst_free(new);
826 }
827
828 dst_release(*dstp);
829 *dstp = new;
830 return (new ? 0 : -ENOMEM);
831}
832EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
833
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834/*
835 * Destination cache support functions
836 */
837
838static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
839{
840 struct rt6_info *rt;
841
842 rt = (struct rt6_info *) dst;
843
844 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
845 return dst;
846
847 return NULL;
848}
849
850static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
851{
852 struct rt6_info *rt = (struct rt6_info *) dst;
853
854 if (rt) {
855 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700856 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 else
858 dst_release(dst);
859 }
860 return NULL;
861}
862
863static void ip6_link_failure(struct sk_buff *skb)
864{
865 struct rt6_info *rt;
866
867 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
868
869 rt = (struct rt6_info *) skb->dst;
870 if (rt) {
871 if (rt->rt6i_flags&RTF_CACHE) {
872 dst_set_expires(&rt->u.dst, 0);
873 rt->rt6i_flags |= RTF_EXPIRES;
874 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
875 rt->rt6i_node->fn_sernum = -1;
876 }
877}
878
879static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
880{
881 struct rt6_info *rt6 = (struct rt6_info*)dst;
882
883 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
884 rt6->rt6i_flags |= RTF_MODIFIED;
885 if (mtu < IPV6_MIN_MTU) {
886 mtu = IPV6_MIN_MTU;
887 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
888 }
889 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700890 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700891 }
892}
893
Linus Torvalds1da177e2005-04-16 15:20:36 -0700894static int ipv6_get_mtu(struct net_device *dev);
895
Daniel Lezcano55786892008-03-04 13:47:47 -0800896static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897{
898 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
899
Daniel Lezcano55786892008-03-04 13:47:47 -0800900 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
901 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902
903 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900904 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
905 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
906 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907 * rely only on pmtu discovery"
908 */
909 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
910 mtu = IPV6_MAXPLEN;
911 return mtu;
912}
913
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800914static struct dst_entry *icmp6_dst_gc_list;
915static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700916
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800917struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 struct neighbour *neigh,
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800919 struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920{
921 struct rt6_info *rt;
922 struct inet6_dev *idev = in6_dev_get(dev);
Daniel Lezcano55786892008-03-04 13:47:47 -0800923 struct net *net = dev->nd_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924
925 if (unlikely(idev == NULL))
926 return NULL;
927
928 rt = ip6_dst_alloc();
929 if (unlikely(rt == NULL)) {
930 in6_dev_put(idev);
931 goto out;
932 }
933
934 dev_hold(dev);
935 if (neigh)
936 neigh_hold(neigh);
937 else
938 neigh = ndisc_get_neigh(dev, addr);
939
940 rt->rt6i_dev = dev;
941 rt->rt6i_idev = idev;
942 rt->rt6i_nexthop = neigh;
943 atomic_set(&rt->u.dst.__refcnt, 1);
944 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
945 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -0800946 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800947 rt->u.dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948
949#if 0 /* there's no chance to use these for ndisc */
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900950 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
951 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 : 0;
953 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
954 rt->rt6i_dst.plen = 128;
955#endif
956
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800957 spin_lock_bh(&icmp6_dst_lock);
958 rt->u.dst.next = icmp6_dst_gc_list;
959 icmp6_dst_gc_list = &rt->u.dst;
960 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961
Daniel Lezcano55786892008-03-04 13:47:47 -0800962 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963
964out:
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +0900965 return &rt->u.dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966}
967
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800968int icmp6_dst_gc(int *more)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969{
970 struct dst_entry *dst, *next, **pprev;
971 int freed;
972
973 next = NULL;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900974 freed = 0;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700975
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800976 spin_lock_bh(&icmp6_dst_lock);
977 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700978
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979 while ((dst = *pprev) != NULL) {
980 if (!atomic_read(&dst->__refcnt)) {
981 *pprev = dst->next;
982 dst_free(dst);
983 freed++;
984 } else {
985 pprev = &dst->next;
986 (*more)++;
987 }
988 }
989
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800990 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700991
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 return freed;
993}
994
Daniel Lezcano569d3642008-01-18 03:56:57 -0800995static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996{
997 static unsigned expire = 30*HZ;
998 static unsigned long last_gc;
999 unsigned long now = jiffies;
1000
Daniel Lezcano49905092008-01-10 03:01:01 -08001001 if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08001002 atomic_read(&ip6_dst_ops->entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 goto out;
1004
1005 expire++;
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08001006 fib6_run_gc(expire, &init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007 last_gc = now;
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08001008 if (atomic_read(&ip6_dst_ops->entries) < ip6_dst_ops->gc_thresh)
Daniel Lezcano49905092008-01-10 03:01:01 -08001009 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010
1011out:
Daniel Lezcano49905092008-01-10 03:01:01 -08001012 expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08001013 return (atomic_read(&ip6_dst_ops->entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014}
1015
1016/* Clean host part of a prefix. Not necessary in radix tree,
1017 but results in cleaner routing tables.
1018
1019 Remove it only when all the things will work!
1020 */
1021
1022static int ipv6_get_mtu(struct net_device *dev)
1023{
1024 int mtu = IPV6_MIN_MTU;
1025 struct inet6_dev *idev;
1026
1027 idev = in6_dev_get(dev);
1028 if (idev) {
1029 mtu = idev->cnf.mtu6;
1030 in6_dev_put(idev);
1031 }
1032 return mtu;
1033}
1034
1035int ipv6_get_hoplimit(struct net_device *dev)
1036{
1037 int hoplimit = ipv6_devconf.hop_limit;
1038 struct inet6_dev *idev;
1039
1040 idev = in6_dev_get(dev);
1041 if (idev) {
1042 hoplimit = idev->cnf.hop_limit;
1043 in6_dev_put(idev);
1044 }
1045 return hoplimit;
1046}
1047
1048/*
1049 *
1050 */
1051
Thomas Graf86872cb2006-08-22 00:01:08 -07001052int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053{
1054 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001055 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056 struct rt6_info *rt = NULL;
1057 struct net_device *dev = NULL;
1058 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001059 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060 int addr_type;
1061
Thomas Graf86872cb2006-08-22 00:01:08 -07001062 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063 return -EINVAL;
1064#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001065 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001066 return -EINVAL;
1067#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001068 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001070 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 if (!dev)
1072 goto out;
1073 idev = in6_dev_get(dev);
1074 if (!idev)
1075 goto out;
1076 }
1077
Thomas Graf86872cb2006-08-22 00:01:08 -07001078 if (cfg->fc_metric == 0)
1079 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080
Daniel Lezcano55786892008-03-04 13:47:47 -08001081 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001082 if (table == NULL) {
1083 err = -ENOBUFS;
1084 goto out;
1085 }
1086
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087 rt = ip6_dst_alloc();
1088
1089 if (rt == NULL) {
1090 err = -ENOMEM;
1091 goto out;
1092 }
1093
1094 rt->u.dst.obsolete = -1;
Thomas Graf86872cb2006-08-22 00:01:08 -07001095 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096
Thomas Graf86872cb2006-08-22 00:01:08 -07001097 if (cfg->fc_protocol == RTPROT_UNSPEC)
1098 cfg->fc_protocol = RTPROT_BOOT;
1099 rt->rt6i_protocol = cfg->fc_protocol;
1100
1101 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102
1103 if (addr_type & IPV6_ADDR_MULTICAST)
1104 rt->u.dst.input = ip6_mc_input;
1105 else
1106 rt->u.dst.input = ip6_forward;
1107
1108 rt->u.dst.output = ip6_output;
1109
Thomas Graf86872cb2006-08-22 00:01:08 -07001110 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1111 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112 if (rt->rt6i_dst.plen == 128)
1113 rt->u.dst.flags = DST_HOST;
1114
1115#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001116 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1117 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118#endif
1119
Thomas Graf86872cb2006-08-22 00:01:08 -07001120 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121
1122 /* We cannot add true routes via loopback here,
1123 they would result in kernel looping; promote them to reject routes
1124 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001125 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1127 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001128 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 if (dev) {
1130 dev_put(dev);
1131 in6_dev_put(idev);
1132 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001133 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134 dev_hold(dev);
1135 idev = in6_dev_get(dev);
1136 if (!idev) {
1137 err = -ENODEV;
1138 goto out;
1139 }
1140 }
1141 rt->u.dst.output = ip6_pkt_discard_out;
1142 rt->u.dst.input = ip6_pkt_discard;
1143 rt->u.dst.error = -ENETUNREACH;
1144 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1145 goto install_route;
1146 }
1147
Thomas Graf86872cb2006-08-22 00:01:08 -07001148 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149 struct in6_addr *gw_addr;
1150 int gwa_type;
1151
Thomas Graf86872cb2006-08-22 00:01:08 -07001152 gw_addr = &cfg->fc_gateway;
1153 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154 gwa_type = ipv6_addr_type(gw_addr);
1155
1156 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1157 struct rt6_info *grt;
1158
1159 /* IPv6 strictly inhibits using not link-local
1160 addresses as nexthop address.
1161 Otherwise, router will not able to send redirects.
1162 It is very good, but in some (rare!) circumstances
1163 (SIT, PtP, NBMA NOARP links) it is handy to allow
1164 some exceptions. --ANK
1165 */
1166 err = -EINVAL;
1167 if (!(gwa_type&IPV6_ADDR_UNICAST))
1168 goto out;
1169
Daniel Lezcano55786892008-03-04 13:47:47 -08001170 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171
1172 err = -EHOSTUNREACH;
1173 if (grt == NULL)
1174 goto out;
1175 if (dev) {
1176 if (dev != grt->rt6i_dev) {
1177 dst_release(&grt->u.dst);
1178 goto out;
1179 }
1180 } else {
1181 dev = grt->rt6i_dev;
1182 idev = grt->rt6i_idev;
1183 dev_hold(dev);
1184 in6_dev_hold(grt->rt6i_idev);
1185 }
1186 if (!(grt->rt6i_flags&RTF_GATEWAY))
1187 err = 0;
1188 dst_release(&grt->u.dst);
1189
1190 if (err)
1191 goto out;
1192 }
1193 err = -EINVAL;
1194 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1195 goto out;
1196 }
1197
1198 err = -ENODEV;
1199 if (dev == NULL)
1200 goto out;
1201
Thomas Graf86872cb2006-08-22 00:01:08 -07001202 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1204 if (IS_ERR(rt->rt6i_nexthop)) {
1205 err = PTR_ERR(rt->rt6i_nexthop);
1206 rt->rt6i_nexthop = NULL;
1207 goto out;
1208 }
1209 }
1210
Thomas Graf86872cb2006-08-22 00:01:08 -07001211 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212
1213install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001214 if (cfg->fc_mx) {
1215 struct nlattr *nla;
1216 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217
Thomas Graf86872cb2006-08-22 00:01:08 -07001218 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001219 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001220
1221 if (type) {
1222 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223 err = -EINVAL;
1224 goto out;
1225 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001226
1227 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229 }
1230 }
1231
1232 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1233 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1234 if (!rt->u.dst.metrics[RTAX_MTU-1])
1235 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1236 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
Daniel Lezcano55786892008-03-04 13:47:47 -08001237 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238 rt->u.dst.dev = dev;
1239 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001240 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001241
1242 cfg->fc_nlinfo.nl_net = dev->nd_net;
1243
Thomas Graf86872cb2006-08-22 00:01:08 -07001244 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245
1246out:
1247 if (dev)
1248 dev_put(dev);
1249 if (idev)
1250 in6_dev_put(idev);
1251 if (rt)
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001252 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253 return err;
1254}
1255
Thomas Graf86872cb2006-08-22 00:01:08 -07001256static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257{
1258 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001259 struct fib6_table *table;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001260 struct net *net = rt->rt6i_dev->nd_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001262 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001263 return -ENOENT;
1264
Thomas Grafc71099a2006-08-04 23:20:06 -07001265 table = rt->rt6i_table;
1266 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267
Thomas Graf86872cb2006-08-22 00:01:08 -07001268 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269 dst_release(&rt->u.dst);
1270
Thomas Grafc71099a2006-08-04 23:20:06 -07001271 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272
1273 return err;
1274}
1275
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001276int ip6_del_rt(struct rt6_info *rt)
1277{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001278 struct nl_info info = {
Daniel Lezcano55786892008-03-04 13:47:47 -08001279 .nl_net = rt->rt6i_dev->nd_net,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001280 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001281 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001282}
1283
Thomas Graf86872cb2006-08-22 00:01:08 -07001284static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285{
Thomas Grafc71099a2006-08-04 23:20:06 -07001286 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287 struct fib6_node *fn;
1288 struct rt6_info *rt;
1289 int err = -ESRCH;
1290
Daniel Lezcano55786892008-03-04 13:47:47 -08001291 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001292 if (table == NULL)
1293 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294
Thomas Grafc71099a2006-08-04 23:20:06 -07001295 read_lock_bh(&table->tb6_lock);
1296
1297 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001298 &cfg->fc_dst, cfg->fc_dst_len,
1299 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001300
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301 if (fn) {
Eric Dumazet7cc48262007-02-09 16:22:57 -08001302 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001303 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001305 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001307 if (cfg->fc_flags & RTF_GATEWAY &&
1308 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001310 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001311 continue;
1312 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001313 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314
Thomas Graf86872cb2006-08-22 00:01:08 -07001315 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316 }
1317 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001318 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319
1320 return err;
1321}
1322
1323/*
1324 * Handle redirects
1325 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001326struct ip6rd_flowi {
1327 struct flowi fl;
1328 struct in6_addr gateway;
1329};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001331static struct rt6_info *__ip6_route_redirect(struct net *net,
1332 struct fib6_table *table,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001333 struct flowi *fl,
1334 int flags)
1335{
1336 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1337 struct rt6_info *rt;
1338 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001339
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001341 * Get the "current" route for this destination and
1342 * check if the redirect has come from approriate router.
1343 *
1344 * RFC 2461 specifies that redirects should only be
1345 * accepted if they come from the nexthop to the target.
1346 * Due to the way the routes are chosen, this notion
1347 * is a bit fuzzy and one might need to check all possible
1348 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350
Thomas Grafc71099a2006-08-04 23:20:06 -07001351 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001352 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001353restart:
Eric Dumazet7cc48262007-02-09 16:22:57 -08001354 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001355 /*
1356 * Current route is on-link; redirect is always invalid.
1357 *
1358 * Seems, previous statement is not true. It could
1359 * be node, which looks for us as on-link (f.e. proxy ndisc)
1360 * But then router serving it might decide, that we should
1361 * know truth 8)8) --ANK (980726).
1362 */
1363 if (rt6_check_expired(rt))
1364 continue;
1365 if (!(rt->rt6i_flags & RTF_GATEWAY))
1366 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001367 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001368 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001369 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001370 continue;
1371 break;
1372 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001373
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001374 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001375 rt = net->ipv6.ip6_null_entry;
1376 BACKTRACK(net, &fl->fl6_src);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001377out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001378 dst_hold(&rt->u.dst);
1379
1380 read_unlock_bh(&table->tb6_lock);
1381
1382 return rt;
1383};
1384
1385static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1386 struct in6_addr *src,
1387 struct in6_addr *gateway,
1388 struct net_device *dev)
1389{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001390 int flags = RT6_LOOKUP_F_HAS_SADDR;
Daniel Lezcano55786892008-03-04 13:47:47 -08001391 struct net *net = dev->nd_net;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001392 struct ip6rd_flowi rdfl = {
1393 .fl = {
1394 .oif = dev->ifindex,
1395 .nl_u = {
1396 .ip6_u = {
1397 .daddr = *dest,
1398 .saddr = *src,
1399 },
1400 },
1401 },
1402 .gateway = *gateway,
1403 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001404
1405 if (rt6_need_strict(dest))
1406 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001407
Daniel Lezcano55786892008-03-04 13:47:47 -08001408 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001409 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001410}
1411
1412void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1413 struct in6_addr *saddr,
1414 struct neighbour *neigh, u8 *lladdr, int on_link)
1415{
1416 struct rt6_info *rt, *nrt = NULL;
1417 struct netevent_redirect netevent;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001418 struct net *net = neigh->dev->nd_net;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001419
1420 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1421
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001422 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423 if (net_ratelimit())
1424 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1425 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001426 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427 }
1428
Linus Torvalds1da177e2005-04-16 15:20:36 -07001429 /*
1430 * We have finally decided to accept it.
1431 */
1432
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001433 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001434 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1435 NEIGH_UPDATE_F_OVERRIDE|
1436 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1437 NEIGH_UPDATE_F_ISROUTER))
1438 );
1439
1440 /*
1441 * Redirect received -> path was valid.
1442 * Look, redirects are sent only in response to data packets,
1443 * so that this nexthop apparently is reachable. --ANK
1444 */
1445 dst_confirm(&rt->u.dst);
1446
1447 /* Duplicate redirect: silently ignore. */
1448 if (neigh == rt->u.dst.neighbour)
1449 goto out;
1450
1451 nrt = ip6_rt_copy(rt);
1452 if (nrt == NULL)
1453 goto out;
1454
1455 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1456 if (on_link)
1457 nrt->rt6i_flags &= ~RTF_GATEWAY;
1458
1459 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1460 nrt->rt6i_dst.plen = 128;
1461 nrt->u.dst.flags |= DST_HOST;
1462
1463 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1464 nrt->rt6i_nexthop = neigh_clone(neigh);
1465 /* Reset pmtu, it may be better */
1466 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
Daniel Lezcano55786892008-03-04 13:47:47 -08001467 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(neigh->dev->nd_net,
1468 dst_mtu(&nrt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469
Thomas Graf40e22e82006-08-22 00:00:45 -07001470 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 goto out;
1472
Tom Tucker8d717402006-07-30 20:43:36 -07001473 netevent.old = &rt->u.dst;
1474 netevent.new = &nrt->u.dst;
1475 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1476
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001478 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479 return;
1480 }
1481
1482out:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001483 dst_release(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484 return;
1485}
1486
1487/*
1488 * Handle ICMP "packet too big" messages
1489 * i.e. Path MTU discovery
1490 */
1491
1492void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1493 struct net_device *dev, u32 pmtu)
1494{
1495 struct rt6_info *rt, *nrt;
Daniel Lezcano55786892008-03-04 13:47:47 -08001496 struct net *net = dev->nd_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497 int allfrag = 0;
1498
Daniel Lezcano55786892008-03-04 13:47:47 -08001499 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 if (rt == NULL)
1501 return;
1502
1503 if (pmtu >= dst_mtu(&rt->u.dst))
1504 goto out;
1505
1506 if (pmtu < IPV6_MIN_MTU) {
1507 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001508 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509 * MTU (1280) and a fragment header should always be included
1510 * after a node receiving Too Big message reporting PMTU is
1511 * less than the IPv6 Minimum Link MTU.
1512 */
1513 pmtu = IPV6_MIN_MTU;
1514 allfrag = 1;
1515 }
1516
1517 /* New mtu received -> path was valid.
1518 They are sent only in response to data packets,
1519 so that this nexthop apparently is reachable. --ANK
1520 */
1521 dst_confirm(&rt->u.dst);
1522
1523 /* Host route. If it is static, it would be better
1524 not to override it, but add new one, so that
1525 when cache entry will expire old pmtu
1526 would return automatically.
1527 */
1528 if (rt->rt6i_flags & RTF_CACHE) {
1529 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1530 if (allfrag)
1531 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
Daniel Lezcano55786892008-03-04 13:47:47 -08001532 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001533 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1534 goto out;
1535 }
1536
1537 /* Network route.
1538 Two cases are possible:
1539 1. It is connected route. Action: COW
1540 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1541 */
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001542 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001543 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001544 else
1545 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001546
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001547 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001548 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1549 if (allfrag)
1550 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1551
1552 /* According to RFC 1981, detecting PMTU increase shouldn't be
1553 * happened within 5 mins, the recommended timer is 10 mins.
1554 * Here this route expiration time is set to ip6_rt_mtu_expires
1555 * which is 10 mins. After 10 mins the decreased pmtu is expired
1556 * and detecting PMTU increase will be automatically happened.
1557 */
Daniel Lezcano55786892008-03-04 13:47:47 -08001558 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001559 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1560
Thomas Graf40e22e82006-08-22 00:00:45 -07001561 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563out:
1564 dst_release(&rt->u.dst);
1565}
1566
1567/*
1568 * Misc support functions
1569 */
1570
1571static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1572{
1573 struct rt6_info *rt = ip6_dst_alloc();
1574
1575 if (rt) {
1576 rt->u.dst.input = ort->u.dst.input;
1577 rt->u.dst.output = ort->u.dst.output;
1578
1579 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
Ville Nuorvala22e1e4d2006-10-16 22:14:26 -07001580 rt->u.dst.error = ort->u.dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581 rt->u.dst.dev = ort->u.dst.dev;
1582 if (rt->u.dst.dev)
1583 dev_hold(rt->u.dst.dev);
1584 rt->rt6i_idev = ort->rt6i_idev;
1585 if (rt->rt6i_idev)
1586 in6_dev_hold(rt->rt6i_idev);
1587 rt->u.dst.lastuse = jiffies;
1588 rt->rt6i_expires = 0;
1589
1590 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1591 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1592 rt->rt6i_metric = 0;
1593
1594 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1595#ifdef CONFIG_IPV6_SUBTREES
1596 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1597#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001598 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599 }
1600 return rt;
1601}
1602
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001603#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001604static struct rt6_info *rt6_get_route_info(struct net *net,
1605 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001606 struct in6_addr *gwaddr, int ifindex)
1607{
1608 struct fib6_node *fn;
1609 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001610 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001611
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001612 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001613 if (table == NULL)
1614 return NULL;
1615
1616 write_lock_bh(&table->tb6_lock);
1617 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001618 if (!fn)
1619 goto out;
1620
Eric Dumazet7cc48262007-02-09 16:22:57 -08001621 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001622 if (rt->rt6i_dev->ifindex != ifindex)
1623 continue;
1624 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1625 continue;
1626 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1627 continue;
1628 dst_hold(&rt->u.dst);
1629 break;
1630 }
1631out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001632 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001633 return rt;
1634}
1635
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001636static struct rt6_info *rt6_add_route_info(struct net *net,
1637 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001638 struct in6_addr *gwaddr, int ifindex,
1639 unsigned pref)
1640{
Thomas Graf86872cb2006-08-22 00:01:08 -07001641 struct fib6_config cfg = {
1642 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001643 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001644 .fc_ifindex = ifindex,
1645 .fc_dst_len = prefixlen,
1646 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1647 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001648 .fc_nlinfo.pid = 0,
1649 .fc_nlinfo.nlh = NULL,
1650 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001651 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001652
Thomas Graf86872cb2006-08-22 00:01:08 -07001653 ipv6_addr_copy(&cfg.fc_dst, prefix);
1654 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1655
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001656 /* We should treat it as a default route if prefix length is 0. */
1657 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001658 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001659
Thomas Graf86872cb2006-08-22 00:01:08 -07001660 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001661
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001662 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001663}
1664#endif
1665
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001667{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001669 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670
Daniel Lezcano55786892008-03-04 13:47:47 -08001671 table = fib6_get_table(dev->nd_net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001672 if (table == NULL)
1673 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674
Thomas Grafc71099a2006-08-04 23:20:06 -07001675 write_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001676 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001677 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001678 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1680 break;
1681 }
1682 if (rt)
1683 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001684 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685 return rt;
1686}
1687
Fred L. Templinc7dc89c2007-11-29 22:11:40 +11001688EXPORT_SYMBOL(rt6_get_dflt_router);
1689
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001691 struct net_device *dev,
1692 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693{
Thomas Graf86872cb2006-08-22 00:01:08 -07001694 struct fib6_config cfg = {
1695 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001696 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001697 .fc_ifindex = dev->ifindex,
1698 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1699 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001700 .fc_nlinfo.pid = 0,
1701 .fc_nlinfo.nlh = NULL,
1702 .fc_nlinfo.nl_net = dev->nd_net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001703 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704
Thomas Graf86872cb2006-08-22 00:01:08 -07001705 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706
Thomas Graf86872cb2006-08-22 00:01:08 -07001707 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709 return rt6_get_dflt_router(gwaddr, dev);
1710}
1711
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001712void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713{
1714 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001715 struct fib6_table *table;
1716
1717 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001718 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001719 if (table == NULL)
1720 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721
1722restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001723 read_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001724 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1726 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001727 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001728 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001729 goto restart;
1730 }
1731 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001732 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733}
1734
Daniel Lezcano55786892008-03-04 13:47:47 -08001735static void rtmsg_to_fib6_config(struct net *net,
1736 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001737 struct fib6_config *cfg)
1738{
1739 memset(cfg, 0, sizeof(*cfg));
1740
1741 cfg->fc_table = RT6_TABLE_MAIN;
1742 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1743 cfg->fc_metric = rtmsg->rtmsg_metric;
1744 cfg->fc_expires = rtmsg->rtmsg_info;
1745 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1746 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1747 cfg->fc_flags = rtmsg->rtmsg_flags;
1748
Daniel Lezcano55786892008-03-04 13:47:47 -08001749 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001750
Thomas Graf86872cb2006-08-22 00:01:08 -07001751 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1752 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1753 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1754}
1755
Daniel Lezcano55786892008-03-04 13:47:47 -08001756int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757{
Thomas Graf86872cb2006-08-22 00:01:08 -07001758 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001759 struct in6_rtmsg rtmsg;
1760 int err;
1761
1762 switch(cmd) {
1763 case SIOCADDRT: /* Add a route */
1764 case SIOCDELRT: /* Delete a route */
1765 if (!capable(CAP_NET_ADMIN))
1766 return -EPERM;
1767 err = copy_from_user(&rtmsg, arg,
1768 sizeof(struct in6_rtmsg));
1769 if (err)
1770 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001771
Daniel Lezcano55786892008-03-04 13:47:47 -08001772 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001773
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774 rtnl_lock();
1775 switch (cmd) {
1776 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001777 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778 break;
1779 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001780 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781 break;
1782 default:
1783 err = -EINVAL;
1784 }
1785 rtnl_unlock();
1786
1787 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001788 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789
1790 return -EINVAL;
1791}
1792
1793/*
1794 * Drop the packet on the floor
1795 */
1796
Ilpo Järvinen50eb4312008-01-12 03:21:00 -08001797static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001799 int type;
1800 switch (ipstats_mib_noroutes) {
1801 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001802 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001803 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1804 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1805 break;
1806 }
1807 /* FALLTHROUGH */
1808 case IPSTATS_MIB_OUTNOROUTES:
1809 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1810 break;
1811 }
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001812 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813 kfree_skb(skb);
1814 return 0;
1815}
1816
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001817static int ip6_pkt_discard(struct sk_buff *skb)
1818{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001819 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001820}
1821
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001822static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823{
1824 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001825 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826}
1827
David S. Miller6723ab52006-10-18 21:20:57 -07001828#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1829
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001830static int ip6_pkt_prohibit(struct sk_buff *skb)
1831{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001832 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001833}
1834
1835static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1836{
1837 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001838 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001839}
1840
David S. Miller6723ab52006-10-18 21:20:57 -07001841#endif
1842
Linus Torvalds1da177e2005-04-16 15:20:36 -07001843/*
1844 * Allocate a dst for local (unicast / anycast) address.
1845 */
1846
1847struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1848 const struct in6_addr *addr,
1849 int anycast)
1850{
Daniel Lezcano55786892008-03-04 13:47:47 -08001851 struct net *net = idev->dev->nd_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001852 struct rt6_info *rt = ip6_dst_alloc();
1853
1854 if (rt == NULL)
1855 return ERR_PTR(-ENOMEM);
1856
Daniel Lezcano55786892008-03-04 13:47:47 -08001857 dev_hold(net->loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001858 in6_dev_hold(idev);
1859
1860 rt->u.dst.flags = DST_HOST;
1861 rt->u.dst.input = ip6_input;
1862 rt->u.dst.output = ip6_output;
Daniel Lezcano55786892008-03-04 13:47:47 -08001863 rt->rt6i_dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864 rt->rt6i_idev = idev;
1865 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -08001866 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1868 rt->u.dst.obsolete = -1;
1869
1870 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001871 if (anycast)
1872 rt->rt6i_flags |= RTF_ANYCAST;
1873 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001874 rt->rt6i_flags |= RTF_LOCAL;
1875 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1876 if (rt->rt6i_nexthop == NULL) {
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001877 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001878 return ERR_PTR(-ENOMEM);
1879 }
1880
1881 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1882 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08001883 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001884
1885 atomic_set(&rt->u.dst.__refcnt, 1);
1886
1887 return rt;
1888}
1889
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001890struct arg_dev_net {
1891 struct net_device *dev;
1892 struct net *net;
1893};
1894
Linus Torvalds1da177e2005-04-16 15:20:36 -07001895static int fib6_ifdown(struct rt6_info *rt, void *arg)
1896{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001897 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1898 struct net *net = ((struct arg_dev_net *)arg)->net;
1899
1900 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1901 rt != net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902 RT6_TRACE("deleted by ifdown %p\n", rt);
1903 return -1;
1904 }
1905 return 0;
1906}
1907
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001908void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001909{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001910 struct arg_dev_net adn = {
1911 .dev = dev,
1912 .net = net,
1913 };
1914
1915 fib6_clean_all(net, fib6_ifdown, 0, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001916}
1917
1918struct rt6_mtu_change_arg
1919{
1920 struct net_device *dev;
1921 unsigned mtu;
1922};
1923
1924static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1925{
1926 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1927 struct inet6_dev *idev;
Daniel Lezcano55786892008-03-04 13:47:47 -08001928 struct net *net = arg->dev->nd_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001929
1930 /* In IPv6 pmtu discovery is not optional,
1931 so that RTAX_MTU lock cannot disable it.
1932 We still use this lock to block changes
1933 caused by addrconf/ndisc.
1934 */
1935
1936 idev = __in6_dev_get(arg->dev);
1937 if (idev == NULL)
1938 return 0;
1939
1940 /* For administrative MTU increase, there is no way to discover
1941 IPv6 PMTU increase, so PMTU increase should be updated here.
1942 Since RFC 1981 doesn't include administrative MTU increase
1943 update PMTU increase is a MUST. (i.e. jumbo frame)
1944 */
1945 /*
1946 If new MTU is less than route PMTU, this new MTU will be the
1947 lowest MTU in the path, update the route PMTU to reflect PMTU
1948 decreases; if new MTU is greater than route PMTU, and the
1949 old MTU is the lowest MTU in the path, update the route PMTU
1950 to reflect the increase. In this case if the other nodes' MTU
1951 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1952 PMTU discouvery.
1953 */
1954 if (rt->rt6i_dev == arg->dev &&
1955 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
Jim Paris23717792008-01-31 16:36:25 -08001956 (dst_mtu(&rt->u.dst) >= arg->mtu ||
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001957 (dst_mtu(&rt->u.dst) < arg->mtu &&
Simon Arlott566cfd82007-07-26 00:09:55 -07001958 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001959 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
Daniel Lezcano55786892008-03-04 13:47:47 -08001960 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07001961 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001962 return 0;
1963}
1964
1965void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1966{
Thomas Grafc71099a2006-08-04 23:20:06 -07001967 struct rt6_mtu_change_arg arg = {
1968 .dev = dev,
1969 .mtu = mtu,
1970 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001971
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001972 fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973}
1974
Patrick McHardyef7c79e2007-06-05 12:38:30 -07001975static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07001976 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07001977 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07001978 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07001979 [RTA_PRIORITY] = { .type = NLA_U32 },
1980 [RTA_METRICS] = { .type = NLA_NESTED },
1981};
1982
1983static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1984 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985{
Thomas Graf86872cb2006-08-22 00:01:08 -07001986 struct rtmsg *rtm;
1987 struct nlattr *tb[RTA_MAX+1];
1988 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001989
Thomas Graf86872cb2006-08-22 00:01:08 -07001990 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1991 if (err < 0)
1992 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001993
Thomas Graf86872cb2006-08-22 00:01:08 -07001994 err = -EINVAL;
1995 rtm = nlmsg_data(nlh);
1996 memset(cfg, 0, sizeof(*cfg));
1997
1998 cfg->fc_table = rtm->rtm_table;
1999 cfg->fc_dst_len = rtm->rtm_dst_len;
2000 cfg->fc_src_len = rtm->rtm_src_len;
2001 cfg->fc_flags = RTF_UP;
2002 cfg->fc_protocol = rtm->rtm_protocol;
2003
2004 if (rtm->rtm_type == RTN_UNREACHABLE)
2005 cfg->fc_flags |= RTF_REJECT;
2006
2007 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2008 cfg->fc_nlinfo.nlh = nlh;
Benjamin Thery2216b482008-01-30 19:09:35 -08002009 cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
Thomas Graf86872cb2006-08-22 00:01:08 -07002010
2011 if (tb[RTA_GATEWAY]) {
2012 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2013 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002015
2016 if (tb[RTA_DST]) {
2017 int plen = (rtm->rtm_dst_len + 7) >> 3;
2018
2019 if (nla_len(tb[RTA_DST]) < plen)
2020 goto errout;
2021
2022 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002024
2025 if (tb[RTA_SRC]) {
2026 int plen = (rtm->rtm_src_len + 7) >> 3;
2027
2028 if (nla_len(tb[RTA_SRC]) < plen)
2029 goto errout;
2030
2031 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002032 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002033
2034 if (tb[RTA_OIF])
2035 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2036
2037 if (tb[RTA_PRIORITY])
2038 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2039
2040 if (tb[RTA_METRICS]) {
2041 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2042 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002044
2045 if (tb[RTA_TABLE])
2046 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2047
2048 err = 0;
2049errout:
2050 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051}
2052
Thomas Grafc127ea22007-03-22 11:58:32 -07002053static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002054{
Thomas Graf86872cb2006-08-22 00:01:08 -07002055 struct fib6_config cfg;
2056 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002057
Thomas Graf86872cb2006-08-22 00:01:08 -07002058 err = rtm_to_fib6_config(skb, nlh, &cfg);
2059 if (err < 0)
2060 return err;
2061
2062 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002063}
2064
Thomas Grafc127ea22007-03-22 11:58:32 -07002065static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002066{
Thomas Graf86872cb2006-08-22 00:01:08 -07002067 struct fib6_config cfg;
2068 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002069
Thomas Graf86872cb2006-08-22 00:01:08 -07002070 err = rtm_to_fib6_config(skb, nlh, &cfg);
2071 if (err < 0)
2072 return err;
2073
2074 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002075}
2076
Thomas Graf339bf982006-11-10 14:10:15 -08002077static inline size_t rt6_nlmsg_size(void)
2078{
2079 return NLMSG_ALIGN(sizeof(struct rtmsg))
2080 + nla_total_size(16) /* RTA_SRC */
2081 + nla_total_size(16) /* RTA_DST */
2082 + nla_total_size(16) /* RTA_GATEWAY */
2083 + nla_total_size(16) /* RTA_PREFSRC */
2084 + nla_total_size(4) /* RTA_TABLE */
2085 + nla_total_size(4) /* RTA_IIF */
2086 + nla_total_size(4) /* RTA_OIF */
2087 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002088 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002089 + nla_total_size(sizeof(struct rta_cacheinfo));
2090}
2091
Linus Torvalds1da177e2005-04-16 15:20:36 -07002092static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002093 struct in6_addr *dst, struct in6_addr *src,
2094 int iif, int type, u32 pid, u32 seq,
2095 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002096{
2097 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002098 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002099 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002100 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002101
2102 if (prefix) { /* user wants prefix routes only */
2103 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2104 /* success since this is not a prefix route */
2105 return 1;
2106 }
2107 }
2108
Thomas Graf2d7202b2006-08-22 00:01:27 -07002109 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2110 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002111 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002112
2113 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002114 rtm->rtm_family = AF_INET6;
2115 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2116 rtm->rtm_src_len = rt->rt6i_src.plen;
2117 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002118 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002119 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002120 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002121 table = RT6_TABLE_UNSPEC;
2122 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002123 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002124 if (rt->rt6i_flags&RTF_REJECT)
2125 rtm->rtm_type = RTN_UNREACHABLE;
2126 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2127 rtm->rtm_type = RTN_LOCAL;
2128 else
2129 rtm->rtm_type = RTN_UNICAST;
2130 rtm->rtm_flags = 0;
2131 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2132 rtm->rtm_protocol = rt->rt6i_protocol;
2133 if (rt->rt6i_flags&RTF_DYNAMIC)
2134 rtm->rtm_protocol = RTPROT_REDIRECT;
2135 else if (rt->rt6i_flags & RTF_ADDRCONF)
2136 rtm->rtm_protocol = RTPROT_KERNEL;
2137 else if (rt->rt6i_flags&RTF_DEFAULT)
2138 rtm->rtm_protocol = RTPROT_RA;
2139
2140 if (rt->rt6i_flags&RTF_CACHE)
2141 rtm->rtm_flags |= RTM_F_CLONED;
2142
2143 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002144 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002145 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002146 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002147 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002148#ifdef CONFIG_IPV6_SUBTREES
2149 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002150 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002151 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002152 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002153 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154#endif
2155 if (iif)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002156 NLA_PUT_U32(skb, RTA_IIF, iif);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002157 else if (dst) {
2158 struct in6_addr saddr_buf;
YOSHIFUJI Hideaki5e5f3f02008-03-03 21:44:34 +09002159 if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
2160 dst, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002161 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002163
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002165 goto nla_put_failure;
2166
Linus Torvalds1da177e2005-04-16 15:20:36 -07002167 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002168 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2169
Linus Torvalds1da177e2005-04-16 15:20:36 -07002170 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002171 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2172
2173 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002174
2175 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2176 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2177 expires, rt->u.dst.error) < 0)
2178 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179
Thomas Graf2d7202b2006-08-22 00:01:27 -07002180 return nlmsg_end(skb, nlh);
2181
2182nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002183 nlmsg_cancel(skb, nlh);
2184 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002185}
2186
Patrick McHardy1b43af52006-08-10 23:11:17 -07002187int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002188{
2189 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2190 int prefix;
2191
Thomas Graf2d7202b2006-08-22 00:01:27 -07002192 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2193 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002194 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2195 } else
2196 prefix = 0;
2197
2198 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2199 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002200 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002201}
2202
Thomas Grafc127ea22007-03-22 11:58:32 -07002203static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002204{
Denis V. Lunevb8542722007-12-01 00:21:31 +11002205 struct net *net = in_skb->sk->sk_net;
Thomas Grafab364a62006-08-22 00:01:47 -07002206 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002207 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002208 struct sk_buff *skb;
2209 struct rtmsg *rtm;
2210 struct flowi fl;
2211 int err, iif = 0;
2212
2213 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2214 if (err < 0)
2215 goto errout;
2216
2217 err = -EINVAL;
2218 memset(&fl, 0, sizeof(fl));
2219
2220 if (tb[RTA_SRC]) {
2221 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2222 goto errout;
2223
2224 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2225 }
2226
2227 if (tb[RTA_DST]) {
2228 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2229 goto errout;
2230
2231 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2232 }
2233
2234 if (tb[RTA_IIF])
2235 iif = nla_get_u32(tb[RTA_IIF]);
2236
2237 if (tb[RTA_OIF])
2238 fl.oif = nla_get_u32(tb[RTA_OIF]);
2239
2240 if (iif) {
2241 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002242 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002243 if (!dev) {
2244 err = -ENODEV;
2245 goto errout;
2246 }
2247 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248
2249 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002250 if (skb == NULL) {
2251 err = -ENOBUFS;
2252 goto errout;
2253 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254
2255 /* Reserve room for dummy headers, this skb can pass
2256 through good chunk of routing engine.
2257 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002258 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2260
Thomas Grafab364a62006-08-22 00:01:47 -07002261 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262 skb->dst = &rt->u.dst;
2263
Thomas Grafab364a62006-08-22 00:01:47 -07002264 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002265 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002266 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002267 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002268 kfree_skb(skb);
2269 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002270 }
2271
Daniel Lezcano55786892008-03-04 13:47:47 -08002272 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002273errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002275}
2276
Thomas Graf86872cb2006-08-22 00:01:08 -07002277void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278{
2279 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002280 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002281 u32 seq;
2282 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002283
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002284 err = -ENOBUFS;
2285 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002286
Thomas Graf339bf982006-11-10 14:10:15 -08002287 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002288 if (skb == NULL)
2289 goto errout;
2290
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002291 err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2292 event, info->pid, seq, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002293 if (err < 0) {
2294 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2295 WARN_ON(err == -EMSGSIZE);
2296 kfree_skb(skb);
2297 goto errout;
2298 }
Daniel Lezcano55786892008-03-04 13:47:47 -08002299 err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2300 info->nlh, gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002301errout:
2302 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002303 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002304}
2305
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002306static int ip6_route_dev_notify(struct notifier_block *this,
2307 unsigned long event, void *data)
2308{
2309 struct net_device *dev = (struct net_device *)data;
2310 struct net *net = dev->nd_net;
2311
2312 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2313 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2314 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2315#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2316 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2317 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2318 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2319 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2320#endif
2321 }
2322
2323 return NOTIFY_OK;
2324}
2325
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326/*
2327 * /proc
2328 */
2329
2330#ifdef CONFIG_PROC_FS
2331
2332#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2333
2334struct rt6_proc_arg
2335{
2336 char *buffer;
2337 int offset;
2338 int length;
2339 int skip;
2340 int len;
2341};
2342
2343static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2344{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002345 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002347 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2348 rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002349
2350#ifdef CONFIG_IPV6_SUBTREES
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002351 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2352 rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002353#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002354 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355#endif
2356
2357 if (rt->rt6i_nexthop) {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002358 seq_printf(m, NIP6_SEQFMT,
2359 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002360 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002361 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002362 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002363 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2364 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2365 rt->u.dst.__use, rt->rt6i_flags,
2366 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002367 return 0;
2368}
2369
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002370static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002372 struct net *net = (struct net *)m->private;
2373 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002374 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002375}
2376
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002377static int ipv6_route_open(struct inode *inode, struct file *file)
2378{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002379 struct net *net = get_proc_net(inode);
2380 if (!net)
2381 return -ENXIO;
2382 return single_open(file, ipv6_route_show, net);
2383}
2384
2385static int ipv6_route_release(struct inode *inode, struct file *file)
2386{
2387 struct seq_file *seq = file->private_data;
2388 struct net *net = seq->private;
2389 put_net(net);
2390 return single_release(inode, file);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002391}
2392
2393static const struct file_operations ipv6_route_proc_fops = {
2394 .owner = THIS_MODULE,
2395 .open = ipv6_route_open,
2396 .read = seq_read,
2397 .llseek = seq_lseek,
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002398 .release = ipv6_route_release,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002399};
2400
Linus Torvalds1da177e2005-04-16 15:20:36 -07002401static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2402{
Daniel Lezcano69ddb8052008-03-04 13:46:23 -08002403 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002404 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb8052008-03-04 13:46:23 -08002405 net->ipv6.rt6_stats->fib_nodes,
2406 net->ipv6.rt6_stats->fib_route_nodes,
2407 net->ipv6.rt6_stats->fib_rt_alloc,
2408 net->ipv6.rt6_stats->fib_rt_entries,
2409 net->ipv6.rt6_stats->fib_rt_cache,
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002410 atomic_read(&ip6_dst_ops->entries),
Daniel Lezcano69ddb8052008-03-04 13:46:23 -08002411 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002412
2413 return 0;
2414}
2415
2416static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2417{
Daniel Lezcano69ddb8052008-03-04 13:46:23 -08002418 struct net *net = get_proc_net(inode);
2419 return single_open(file, rt6_stats_seq_show, net);
2420}
2421
2422static int rt6_stats_seq_release(struct inode *inode, struct file *file)
2423{
2424 struct seq_file *seq = file->private_data;
2425 struct net *net = (struct net *)seq->private;
2426 put_net(net);
2427 return single_release(inode, file);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002428}
2429
Arjan van de Ven9a321442007-02-12 00:55:35 -08002430static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002431 .owner = THIS_MODULE,
2432 .open = rt6_stats_seq_open,
2433 .read = seq_read,
2434 .llseek = seq_lseek,
Daniel Lezcano69ddb8052008-03-04 13:46:23 -08002435 .release = rt6_stats_seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002436};
2437#endif /* CONFIG_PROC_FS */
2438
2439#ifdef CONFIG_SYSCTL
2440
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441static
2442int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2443 void __user *buffer, size_t *lenp, loff_t *ppos)
2444{
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002445 struct net *net = current->nsproxy->net_ns;
2446 int delay = net->ipv6.sysctl.flush_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002447 if (write) {
2448 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002449 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450 return 0;
2451 } else
2452 return -EINVAL;
2453}
2454
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002455ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002456 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002458 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002459 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002460 .mode = 0200,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002461 .proc_handler = &ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462 },
2463 {
2464 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2465 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002466 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002467 .maxlen = sizeof(int),
2468 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002469 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470 },
2471 {
2472 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2473 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002474 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002475 .maxlen = sizeof(int),
2476 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002477 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478 },
2479 {
2480 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2481 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002482 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002483 .maxlen = sizeof(int),
2484 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002485 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002486 .strategy = &sysctl_jiffies,
2487 },
2488 {
2489 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2490 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002491 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002492 .maxlen = sizeof(int),
2493 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002494 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002495 .strategy = &sysctl_jiffies,
2496 },
2497 {
2498 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2499 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002500 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002501 .maxlen = sizeof(int),
2502 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002503 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002504 .strategy = &sysctl_jiffies,
2505 },
2506 {
2507 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2508 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002509 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002510 .maxlen = sizeof(int),
2511 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002512 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002513 .strategy = &sysctl_jiffies,
2514 },
2515 {
2516 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2517 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002518 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002519 .maxlen = sizeof(int),
2520 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002521 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002522 .strategy = &sysctl_jiffies,
2523 },
2524 {
2525 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2526 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002527 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002528 .maxlen = sizeof(int),
2529 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002530 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002531 .strategy = &sysctl_jiffies,
2532 },
2533 {
2534 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2535 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002536 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002537 .maxlen = sizeof(int),
2538 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002539 .proc_handler = &proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002540 .strategy = &sysctl_ms_jiffies,
2541 },
2542 { .ctl_name = 0 }
2543};
2544
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002545struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2546{
2547 struct ctl_table *table;
2548
2549 table = kmemdup(ipv6_route_table_template,
2550 sizeof(ipv6_route_table_template),
2551 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002552
2553 if (table) {
2554 table[0].data = &net->ipv6.sysctl.flush_delay;
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002555 table[1].data = &ip6_dst_ops_template.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002556 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2557 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2558 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2559 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2560 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2561 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2562 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2563 }
2564
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002565 return table;
2566}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002567#endif
2568
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002569static int ip6_route_net_init(struct net *net)
2570{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002571 int ret = 0;
2572
2573 ret = -ENOMEM;
2574 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2575 sizeof(*net->ipv6.ip6_null_entry),
2576 GFP_KERNEL);
2577 if (!net->ipv6.ip6_null_entry)
2578 goto out;
2579 net->ipv6.ip6_null_entry->u.dst.path =
2580 (struct dst_entry *)net->ipv6.ip6_null_entry;
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002581 net->ipv6.ip6_null_entry->u.dst.ops = ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002582
2583#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2584 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2585 sizeof(*net->ipv6.ip6_prohibit_entry),
2586 GFP_KERNEL);
2587 if (!net->ipv6.ip6_prohibit_entry) {
2588 kfree(net->ipv6.ip6_null_entry);
2589 goto out;
2590 }
2591 net->ipv6.ip6_prohibit_entry->u.dst.path =
2592 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002593 net->ipv6.ip6_prohibit_entry->u.dst.ops = ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002594
2595 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2596 sizeof(*net->ipv6.ip6_blk_hole_entry),
2597 GFP_KERNEL);
2598 if (!net->ipv6.ip6_blk_hole_entry) {
2599 kfree(net->ipv6.ip6_null_entry);
2600 kfree(net->ipv6.ip6_prohibit_entry);
2601 goto out;
2602 }
2603 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2604 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002605 net->ipv6.ip6_blk_hole_entry->u.dst.ops = ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002606#endif
2607
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002608#ifdef CONFIG_PROC_FS
2609 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2610 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2611#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002612 ret = 0;
2613out:
2614 return ret;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002615}
2616
2617static void ip6_route_net_exit(struct net *net)
2618{
2619#ifdef CONFIG_PROC_FS
2620 proc_net_remove(net, "ipv6_route");
2621 proc_net_remove(net, "rt6_stats");
2622#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002623 kfree(net->ipv6.ip6_null_entry);
2624#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2625 kfree(net->ipv6.ip6_prohibit_entry);
2626 kfree(net->ipv6.ip6_blk_hole_entry);
2627#endif
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002628}
2629
2630static struct pernet_operations ip6_route_net_ops = {
2631 .init = ip6_route_net_init,
2632 .exit = ip6_route_net_exit,
2633};
2634
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002635static struct notifier_block ip6_route_dev_notifier = {
2636 .notifier_call = ip6_route_dev_notify,
2637 .priority = 0,
2638};
2639
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002640int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002641{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002642 int ret;
2643
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002644 ip6_dst_ops = kmemdup(&ip6_dst_ops_template,
2645 sizeof(*ip6_dst_ops), GFP_KERNEL);
2646 if (!ip6_dst_ops)
Daniel Lezcanof845ab62007-12-07 00:45:16 -08002647 return -ENOMEM;
2648
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002649 ret = -ENOMEM;
2650 ip6_dst_ops_template.kmem_cachep =
2651 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2652 SLAB_HWCACHE_ALIGN, NULL);
2653 if (!ip6_dst_ops_template.kmem_cachep)
2654 goto out_ip6_dst_ops;
2655
2656 ip6_dst_ops->kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2657 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
David S. Miller14e50e52007-05-24 18:17:54 -07002658
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002659 ret = register_pernet_subsys(&ip6_route_net_ops);
2660 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002661 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002662
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002663 /* Registering of the loopback is done before this portion of code,
2664 * the loopback reference in rt6_info will not be taken, do it
2665 * manually for init_net */
2666 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2667 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2668 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2669 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2670 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2671 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2672 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2673 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002674 ret = fib6_init();
2675 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002676 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002677
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002678 ret = xfrm6_init();
2679 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002680 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002681
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002682 ret = fib6_rules_init();
2683 if (ret)
2684 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002685
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002686 ret = -ENOBUFS;
2687 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2688 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2689 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2690 goto fib6_rules_init;
2691
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002692 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002693 if (ret)
2694 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002695
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002696out:
2697 return ret;
2698
2699fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002700 fib6_rules_cleanup();
2701xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002702 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002703out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002704 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002705out_register_subsys:
2706 unregister_pernet_subsys(&ip6_route_net_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002707out_kmem_cache:
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002708 kmem_cache_destroy(ip6_dst_ops->kmem_cachep);
2709out_ip6_dst_ops:
2710 kfree(ip6_dst_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002711 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002712}
2713
2714void ip6_route_cleanup(void)
2715{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002716 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002717 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002718 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002719 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002720 unregister_pernet_subsys(&ip6_route_net_ops);
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002721 kmem_cache_destroy(ip6_dst_ops->kmem_cachep);
2722 kfree(ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002723}