blob: b25636c4120d8bdc8b85a0fdf31a3a05542e465a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
Paul Gortmakerbc3b2d72011-07-15 11:47:34 -040029#include <linux/export.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090038#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080043#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090044#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020045#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070056#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070057#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070058
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
Gao feng1716a962012-04-06 00:13:10 +000065static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
Eric Dumazet21efcfa2011-07-19 20:18:36 +000066 const struct in6_addr *dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -070067static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080068static unsigned int ip6_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +000069static unsigned int ip6_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070070static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71static void ip6_dst_destroy(struct dst_entry *);
72static void ip6_dst_ifdown(struct dst_entry *,
73 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080074static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
76static int ip6_pkt_discard(struct sk_buff *skb);
77static int ip6_pkt_discard_out(struct sk_buff *skb);
78static void ip6_link_failure(struct sk_buff *skb);
79static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080081#ifdef CONFIG_IPV6_ROUTE_INFO
Lorenzo Colitti653be182014-03-26 19:35:41 +090082static struct rt6_info *rt6_add_route_info(struct net_device *dev,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000083 const struct in6_addr *prefix, int prefixlen,
Lorenzo Colitti653be182014-03-26 19:35:41 +090084 const struct in6_addr *gwaddr,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080085 unsigned pref);
Lorenzo Colitti653be182014-03-26 19:35:41 +090086static struct rt6_info *rt6_get_route_info(struct net_device *dev,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000087 const struct in6_addr *prefix, int prefixlen,
Lorenzo Colitti653be182014-03-26 19:35:41 +090088 const struct in6_addr *gwaddr);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080089#endif
90
David S. Miller06582542011-01-27 14:58:42 -080091static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92{
93 struct rt6_info *rt = (struct rt6_info *) dst;
94 struct inet_peer *peer;
95 u32 *p = NULL;
96
Yan, Zheng8e2ec632011-09-05 21:34:30 +000097 if (!(rt->dst.flags & DST_HOST))
98 return NULL;
99
David S. Miller06582542011-01-27 14:58:42 -0800100 if (!rt->rt6i_peer)
101 rt6_bind_peer(rt, 1);
102
103 peer = rt->rt6i_peer;
104 if (peer) {
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
107
108 p = peer->metrics;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
114
115 if (prev != old) {
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
118 p = NULL;
119 }
120 }
121 return p;
122}
123
David S. Miller39232972012-01-26 15:22:32 -0500124static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
125{
126 struct in6_addr *p = &rt->rt6i_gateway;
127
David S. Millera7563f32012-01-26 16:29:16 -0500128 if (!ipv6_addr_any(p))
David S. Miller39232972012-01-26 15:22:32 -0500129 return (const void *) p;
130 return daddr;
131}
132
David S. Millerd3aaeb32011-07-18 00:40:17 -0700133static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
134{
David S. Miller39232972012-01-26 15:22:32 -0500135 struct rt6_info *rt = (struct rt6_info *) dst;
136 struct neighbour *n;
137
138 daddr = choose_neigh_daddr(rt, daddr);
139 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
David S. Millerf83c7792011-12-28 15:41:23 -0500140 if (n)
141 return n;
142 return neigh_create(&nd_tbl, daddr, dst->dev);
143}
144
David S. Miller8ade06c2011-12-29 18:51:57 -0500145static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
David S. Millerf83c7792011-12-28 15:41:23 -0500146{
David S. Miller8ade06c2011-12-29 18:51:57 -0500147 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
148 if (!n) {
149 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
150 if (IS_ERR(n))
151 return PTR_ERR(n);
152 }
David S. Millerf83c7792011-12-28 15:41:23 -0500153 dst_set_neighbour(&rt->dst, n);
154
155 return 0;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700156}
157
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800158static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800160 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161 .gc = ip6_dst_gc,
162 .gc_thresh = 1024,
163 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800164 .default_advmss = ip6_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000165 .mtu = ip6_mtu,
David S. Miller06582542011-01-27 14:58:42 -0800166 .cow_metrics = ipv6_cow_metrics,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 .destroy = ip6_dst_destroy,
168 .ifdown = ip6_dst_ifdown,
169 .negative_advice = ip6_negative_advice,
170 .link_failure = ip6_link_failure,
171 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700172 .local_out = __ip6_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700173 .neigh_lookup = ip6_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174};
175
Steffen Klassertebb762f2011-11-23 02:12:51 +0000176static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -0800177{
Steffen Klassert618f9bc2011-11-23 02:13:31 +0000178 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
179
180 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -0800181}
182
David S. Miller14e50e52007-05-24 18:17:54 -0700183static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
184{
185}
186
Held Bernhard0972ddb2011-04-24 22:07:32 +0000187static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
188 unsigned long old)
189{
190 return NULL;
191}
192
David S. Miller14e50e52007-05-24 18:17:54 -0700193static struct dst_ops ip6_dst_blackhole_ops = {
194 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800195 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700196 .destroy = ip6_dst_destroy,
197 .check = ip6_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000198 .mtu = ip6_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -0800199 .default_advmss = ip6_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -0700200 .update_pmtu = ip6_rt_blackhole_update_pmtu,
Held Bernhard0972ddb2011-04-24 22:07:32 +0000201 .cow_metrics = ip6_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700202 .neigh_lookup = ip6_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -0700203};
204
David S. Miller62fa8a82011-01-26 20:51:05 -0800205static const u32 ip6_template_metrics[RTAX_MAX] = {
206 [RTAX_HOPLIMIT - 1] = 255,
207};
208
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800209static struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700210 .dst = {
211 .__refcnt = ATOMIC_INIT(1),
212 .__use = 1,
213 .obsolete = -1,
214 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700215 .input = ip6_pkt_discard,
216 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217 },
218 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700219 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220 .rt6i_metric = ~(u32) 0,
221 .rt6i_ref = ATOMIC_INIT(1),
222};
223
Thomas Graf101367c2006-08-04 03:39:02 -0700224#ifdef CONFIG_IPV6_MULTIPLE_TABLES
225
David S. Miller6723ab52006-10-18 21:20:57 -0700226static int ip6_pkt_prohibit(struct sk_buff *skb);
227static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700228
Adrian Bunk280a34c2008-04-21 02:29:32 -0700229static struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700230 .dst = {
231 .__refcnt = ATOMIC_INIT(1),
232 .__use = 1,
233 .obsolete = -1,
234 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700235 .input = ip6_pkt_prohibit,
236 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700237 },
238 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700239 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700240 .rt6i_metric = ~(u32) 0,
241 .rt6i_ref = ATOMIC_INIT(1),
242};
243
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800244static struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700245 .dst = {
246 .__refcnt = ATOMIC_INIT(1),
247 .__use = 1,
248 .obsolete = -1,
249 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700250 .input = dst_discard,
251 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700252 },
253 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700254 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700255 .rt6i_metric = ~(u32) 0,
256 .rt6i_ref = ATOMIC_INIT(1),
257};
258
259#endif
260
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261/* allocate dst with ip6_dst_ops */
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700262static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
David S. Miller957c6652011-06-24 15:25:00 -0700263 struct net_device *dev,
264 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265{
David S. Miller957c6652011-06-24 15:25:00 -0700266 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
David S. Millercf911662011-04-28 14:31:47 -0700267
David S. Miller38308472011-12-03 18:02:47 -0500268 if (rt)
Madalin Bucurfbe58182011-09-26 07:04:56 +0000269 memset(&rt->rt6i_table, 0,
David S. Miller38308472011-12-03 18:02:47 -0500270 sizeof(*rt) - sizeof(struct dst_entry));
David S. Millercf911662011-04-28 14:31:47 -0700271
272 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273}
274
275static void ip6_dst_destroy(struct dst_entry *dst)
276{
277 struct rt6_info *rt = (struct rt6_info *)dst;
278 struct inet6_dev *idev = rt->rt6i_idev;
David S. Millerb3419362010-11-30 12:27:11 -0800279 struct inet_peer *peer = rt->rt6i_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280
Yan, Zheng8e2ec632011-09-05 21:34:30 +0000281 if (!(rt->dst.flags & DST_HOST))
282 dst_destroy_metrics_generic(dst);
283
David S. Miller38308472011-12-03 18:02:47 -0500284 if (idev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 rt->rt6i_idev = NULL;
286 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900287 }
Gao feng1716a962012-04-06 00:13:10 +0000288
289 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
290 dst_release(dst->from);
291
David S. Millerb3419362010-11-30 12:27:11 -0800292 if (peer) {
David S. Millerb3419362010-11-30 12:27:11 -0800293 rt->rt6i_peer = NULL;
294 inet_putpeer(peer);
295 }
296}
297
David S. Miller6431cbc2011-02-07 20:38:06 -0800298static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
299
300static u32 rt6_peer_genid(void)
301{
302 return atomic_read(&__rt6_peer_genid);
303}
304
David S. Millerb3419362010-11-30 12:27:11 -0800305void rt6_bind_peer(struct rt6_info *rt, int create)
306{
307 struct inet_peer *peer;
308
David S. Millerb3419362010-11-30 12:27:11 -0800309 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
310 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
311 inet_putpeer(peer);
David S. Miller6431cbc2011-02-07 20:38:06 -0800312 else
313 rt->rt6i_peer_genid = rt6_peer_genid();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314}
315
316static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
317 int how)
318{
319 struct rt6_info *rt = (struct rt6_info *)dst;
320 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800321 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900322 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323
David S. Miller38308472011-12-03 18:02:47 -0500324 if (dev != loopback_dev && idev && idev->dev == dev) {
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800325 struct inet6_dev *loopback_idev =
326 in6_dev_get(loopback_dev);
David S. Miller38308472011-12-03 18:02:47 -0500327 if (loopback_idev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328 rt->rt6i_idev = loopback_idev;
329 in6_dev_put(idev);
330 }
331 }
332}
333
334static __inline__ int rt6_check_expired(const struct rt6_info *rt)
335{
Gao feng1716a962012-04-06 00:13:10 +0000336 struct rt6_info *ort = NULL;
337
338 if (rt->rt6i_flags & RTF_EXPIRES) {
339 if (time_after(jiffies, rt->dst.expires))
340 return 1;
341 } else if (rt->dst.from) {
342 ort = (struct rt6_info *) rt->dst.from;
343 return (ort->rt6i_flags & RTF_EXPIRES) &&
344 time_after(jiffies, ort->dst.expires);
345 }
346 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347}
348
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000349static inline int rt6_need_strict(const struct in6_addr *daddr)
Thomas Grafc71099a2006-08-04 23:20:06 -0700350{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000351 return ipv6_addr_type(daddr) &
352 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
Thomas Grafc71099a2006-08-04 23:20:06 -0700353}
354
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700356 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357 */
358
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800359static inline struct rt6_info *rt6_device_match(struct net *net,
360 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000361 const struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700363 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364{
365 struct rt6_info *local = NULL;
366 struct rt6_info *sprt;
367
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900368 if (!oif && ipv6_addr_any(saddr))
369 goto out;
370
Changli Gaod8d1f302010-06-10 23:31:35 -0700371 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
David S. Millerd1918542011-12-28 20:19:20 -0500372 struct net_device *dev = sprt->dst.dev;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900373
374 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375 if (dev->ifindex == oif)
376 return sprt;
377 if (dev->flags & IFF_LOOPBACK) {
David S. Miller38308472011-12-03 18:02:47 -0500378 if (!sprt->rt6i_idev ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700380 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900382 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 local->rt6i_idev->dev->ifindex == oif))
384 continue;
385 }
386 local = sprt;
387 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900388 } else {
389 if (ipv6_chk_addr(net, saddr, dev,
390 flags & RT6_LOOKUP_F_IFACE))
391 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900393 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900395 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 if (local)
397 return local;
398
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700399 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800400 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900402out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403 return rt;
404}
405
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800406#ifdef CONFIG_IPV6_ROUTER_PREF
407static void rt6_probe(struct rt6_info *rt)
408{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000409 struct neighbour *neigh;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800410 /*
411 * Okay, this does not seem to be appropriate
412 * for now, however, we need to check if it
413 * is really so; aka Router Reachability Probing.
414 *
415 * Router Reachability Probe MUST be rate-limited
416 * to no more than one per minute.
417 */
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000418 rcu_read_lock();
David Miller27217452011-12-02 16:52:08 +0000419 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800420 if (!neigh || (neigh->nud_state & NUD_VALID))
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000421 goto out;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800422 read_lock_bh(&neigh->lock);
423 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800424 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800425 struct in6_addr mcaddr;
426 struct in6_addr *target;
427
428 neigh->updated = jiffies;
429 read_unlock_bh(&neigh->lock);
430
431 target = (struct in6_addr *)&neigh->primary_key;
432 addrconf_addr_solict_mult(target, &mcaddr);
David S. Millerd1918542011-12-28 20:19:20 -0500433 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000434 } else {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800435 read_unlock_bh(&neigh->lock);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000436 }
437out:
438 rcu_read_unlock();
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800439}
440#else
441static inline void rt6_probe(struct rt6_info *rt)
442{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800443}
444#endif
445
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800447 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700449static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450{
David S. Millerd1918542011-12-28 20:19:20 -0500451 struct net_device *dev = rt->dst.dev;
David S. Miller161980f2007-04-06 11:42:27 -0700452 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800453 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700454 if ((dev->flags & IFF_LOOPBACK) &&
455 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
456 return 1;
457 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458}
459
Dave Jonesb6f99a22007-03-22 12:27:49 -0700460static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000462 struct neighbour *neigh;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800463 int m;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000464
465 rcu_read_lock();
David Miller27217452011-12-02 16:52:08 +0000466 neigh = dst_get_neighbour_noref(&rt->dst);
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700467 if (rt->rt6i_flags & RTF_NONEXTHOP ||
468 !(rt->rt6i_flags & RTF_GATEWAY))
469 m = 1;
470 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800471 read_lock_bh(&neigh->lock);
472 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700473 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800474#ifdef CONFIG_IPV6_ROUTER_PREF
475 else if (neigh->nud_state & NUD_FAILED)
476 m = 0;
477#endif
478 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800479 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800480 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800481 } else
482 m = 0;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000483 rcu_read_unlock();
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800484 return m;
485}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800487static int rt6_score_route(struct rt6_info *rt, int oif,
488 int strict)
489{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700490 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900491
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700492 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700493 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800494 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800495#ifdef CONFIG_IPV6_ROUTER_PREF
496 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
497#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700498 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800499 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800500 return -1;
501 return m;
502}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503
David S. Millerf11e6652007-03-24 20:36:25 -0700504static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
505 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800506{
David S. Millerf11e6652007-03-24 20:36:25 -0700507 int m;
508
509 if (rt6_check_expired(rt))
510 goto out;
511
512 m = rt6_score_route(rt, oif, strict);
513 if (m < 0)
514 goto out;
515
516 if (m > *mpri) {
517 if (strict & RT6_LOOKUP_F_REACHABLE)
518 rt6_probe(match);
519 *mpri = m;
520 match = rt;
521 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
522 rt6_probe(rt);
523 }
524
525out:
526 return match;
527}
528
529static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
530 struct rt6_info *rr_head,
531 u32 metric, int oif, int strict)
532{
533 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800534 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535
David S. Millerf11e6652007-03-24 20:36:25 -0700536 match = NULL;
537 for (rt = rr_head; rt && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700538 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700539 match = find_match(rt, oif, strict, &mpri, match);
540 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700541 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700542 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800543
David S. Millerf11e6652007-03-24 20:36:25 -0700544 return match;
545}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800546
David S. Millerf11e6652007-03-24 20:36:25 -0700547static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
548{
549 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800550 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551
David S. Millerf11e6652007-03-24 20:36:25 -0700552 rt0 = fn->rr_ptr;
553 if (!rt0)
554 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555
David S. Millerf11e6652007-03-24 20:36:25 -0700556 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800558 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700559 (strict & RT6_LOOKUP_F_REACHABLE)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700560 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700561
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800562 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700563 if (!next || next->rt6i_metric != rt0->rt6i_metric)
564 next = fn->leaf;
565
566 if (next != rt0)
567 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568 }
569
David S. Millerd1918542011-12-28 20:19:20 -0500570 net = dev_net(rt0->dst.dev);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000571 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572}
573
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800574#ifdef CONFIG_IPV6_ROUTE_INFO
575int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000576 const struct in6_addr *gwaddr)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800577{
578 struct route_info *rinfo = (struct route_info *) opt;
579 struct in6_addr prefix_buf, *prefix;
580 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900581 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800582 struct rt6_info *rt;
583
584 if (len < sizeof(struct route_info)) {
585 return -EINVAL;
586 }
587
588 /* Sanity check for prefix_len and length */
589 if (rinfo->length > 3) {
590 return -EINVAL;
591 } else if (rinfo->prefix_len > 128) {
592 return -EINVAL;
593 } else if (rinfo->prefix_len > 64) {
594 if (rinfo->length < 2) {
595 return -EINVAL;
596 }
597 } else if (rinfo->prefix_len > 0) {
598 if (rinfo->length < 1) {
599 return -EINVAL;
600 }
601 }
602
603 pref = rinfo->route_pref;
604 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000605 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800606
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900607 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800608
609 if (rinfo->length == 3)
610 prefix = (struct in6_addr *)rinfo->prefix;
611 else {
612 /* this function is safe */
613 ipv6_addr_prefix(&prefix_buf,
614 (struct in6_addr *)rinfo->prefix,
615 rinfo->prefix_len);
616 prefix = &prefix_buf;
617 }
618
Lorenzo Colitti653be182014-03-26 19:35:41 +0900619 rt = rt6_get_route_info(dev, prefix, rinfo->prefix_len, gwaddr);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800620
621 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700622 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800623 rt = NULL;
624 }
625
626 if (!rt && lifetime)
Lorenzo Colitti653be182014-03-26 19:35:41 +0900627 rt = rt6_add_route_info(dev, prefix, rinfo->prefix_len, gwaddr, pref);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800628 else if (rt)
629 rt->rt6i_flags = RTF_ROUTEINFO |
630 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
631
632 if (rt) {
Gao feng1716a962012-04-06 00:13:10 +0000633 if (!addrconf_finite_timeout(lifetime))
634 rt6_clean_expires(rt);
635 else
636 rt6_set_expires(rt, jiffies + HZ * lifetime);
637
Changli Gaod8d1f302010-06-10 23:31:35 -0700638 dst_release(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800639 }
640 return 0;
641}
642#endif
643
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800644#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700645do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800646 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700647 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700648 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700649 if (fn->fn_flags & RTN_TL_ROOT) \
650 goto out; \
651 pn = fn->parent; \
652 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800653 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700654 else \
655 fn = pn; \
656 if (fn->fn_flags & RTN_RTINFO) \
657 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700658 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700659 } \
David S. Miller38308472011-12-03 18:02:47 -0500660} while (0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700661
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800662static struct rt6_info *ip6_pol_route_lookup(struct net *net,
663 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500664 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665{
666 struct fib6_node *fn;
667 struct rt6_info *rt;
668
Thomas Grafc71099a2006-08-04 23:20:06 -0700669 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -0500670 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700671restart:
672 rt = fn->leaf;
David S. Miller4c9483b2011-03-12 16:22:43 -0500673 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
674 BACKTRACK(net, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700675out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700676 dst_use(&rt->dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700677 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700678 return rt;
679
680}
681
Florian Westphalea6e5742011-09-05 16:05:44 +0200682struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
683 int flags)
684{
685 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
686}
687EXPORT_SYMBOL_GPL(ip6_route_lookup);
688
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900689struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
690 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700691{
David S. Miller4c9483b2011-03-12 16:22:43 -0500692 struct flowi6 fl6 = {
693 .flowi6_oif = oif,
694 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700695 };
696 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700697 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700698
Thomas Grafadaa70b2006-10-13 15:01:03 -0700699 if (saddr) {
David S. Miller4c9483b2011-03-12 16:22:43 -0500700 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
Thomas Grafadaa70b2006-10-13 15:01:03 -0700701 flags |= RT6_LOOKUP_F_HAS_SADDR;
702 }
703
David S. Miller4c9483b2011-03-12 16:22:43 -0500704 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700705 if (dst->error == 0)
706 return (struct rt6_info *) dst;
707
708 dst_release(dst);
709
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710 return NULL;
711}
712
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900713EXPORT_SYMBOL(rt6_lookup);
714
Thomas Grafc71099a2006-08-04 23:20:06 -0700715/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 It takes new route entry, the addition fails by any reason the
717 route is freed. In any case, if caller does not hold it, it may
718 be destroyed.
719 */
720
Thomas Graf86872cb2006-08-22 00:01:08 -0700721static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722{
723 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700724 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725
Thomas Grafc71099a2006-08-04 23:20:06 -0700726 table = rt->rt6i_table;
727 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700728 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700729 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730
731 return err;
732}
733
Thomas Graf40e22e82006-08-22 00:00:45 -0700734int ip6_ins_rt(struct rt6_info *rt)
735{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800736 struct nl_info info = {
David S. Millerd1918542011-12-28 20:19:20 -0500737 .nl_net = dev_net(rt->dst.dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800738 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800739 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700740}
741
Gao feng1716a962012-04-06 00:13:10 +0000742static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000743 const struct in6_addr *daddr,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000744 const struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 struct rt6_info *rt;
747
748 /*
749 * Clone the route.
750 */
751
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000752 rt = ip6_rt_copy(ort, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753
754 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800755 int attempts = !in_softirq();
756
David S. Miller38308472011-12-03 18:02:47 -0500757 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
David S. Millerbb3c3682011-12-13 17:35:06 -0500758 if (ort->rt6i_dst.plen != 128 &&
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000759 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900760 rt->rt6i_flags |= RTF_ANYCAST;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000761 rt->rt6i_gateway = *daddr;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900762 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764 rt->rt6i_flags |= RTF_CACHE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765
766#ifdef CONFIG_IPV6_SUBTREES
767 if (rt->rt6i_src.plen && saddr) {
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000768 rt->rt6i_src.addr = *saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769 rt->rt6i_src.plen = 128;
770 }
771#endif
772
David S. Miller14deae42009-01-04 16:04:39 -0800773 retry:
David S. Miller8ade06c2011-12-29 18:51:57 -0500774 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
David S. Millerd1918542011-12-28 20:19:20 -0500775 struct net *net = dev_net(rt->dst.dev);
David S. Miller14deae42009-01-04 16:04:39 -0800776 int saved_rt_min_interval =
777 net->ipv6.sysctl.ip6_rt_gc_min_interval;
778 int saved_rt_elasticity =
779 net->ipv6.sysctl.ip6_rt_gc_elasticity;
780
781 if (attempts-- > 0) {
782 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
783 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
784
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000785 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800786
787 net->ipv6.sysctl.ip6_rt_gc_elasticity =
788 saved_rt_elasticity;
789 net->ipv6.sysctl.ip6_rt_gc_min_interval =
790 saved_rt_min_interval;
791 goto retry;
792 }
793
794 if (net_ratelimit())
795 printk(KERN_WARNING
Ulrich Weber7e1b33e2010-09-27 15:02:18 -0700796 "ipv6: Neighbour table overflow.\n");
Changli Gaod8d1f302010-06-10 23:31:35 -0700797 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -0800798 return NULL;
799 }
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800800 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800802 return rt;
803}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000805static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
806 const struct in6_addr *daddr)
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800807{
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000808 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
809
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800810 if (rt) {
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800811 rt->rt6i_flags |= RTF_CACHE;
David Miller27217452011-12-02 16:52:08 +0000812 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800813 }
814 return rt;
815}
816
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800817static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
David S. Miller4c9483b2011-03-12 16:22:43 -0500818 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819{
820 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800821 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700822 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800824 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700825 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700827 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828
829relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700830 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800832restart_2:
David S. Miller4c9483b2011-03-12 16:22:43 -0500833 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834
835restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700836 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800837
David S. Miller4c9483b2011-03-12 16:22:43 -0500838 BACKTRACK(net, &fl6->saddr);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800839 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800840 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef042006-03-20 17:01:24 -0800841 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842
Changli Gaod8d1f302010-06-10 23:31:35 -0700843 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700844 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800845
David Miller27217452011-12-02 16:52:08 +0000846 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
David S. Miller4c9483b2011-03-12 16:22:43 -0500847 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800848 else if (!(rt->dst.flags & DST_HOST))
David S. Miller4c9483b2011-03-12 16:22:43 -0500849 nrt = rt6_alloc_clone(rt, &fl6->daddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800850 else
851 goto out2;
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800852
Changli Gaod8d1f302010-06-10 23:31:35 -0700853 dst_release(&rt->dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800854 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800855
Changli Gaod8d1f302010-06-10 23:31:35 -0700856 dst_hold(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800857 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700858 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800859 if (!err)
860 goto out2;
861 }
862
863 if (--attempts <= 0)
864 goto out2;
865
866 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700867 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800868 * released someone could insert this route. Relookup.
869 */
Changli Gaod8d1f302010-06-10 23:31:35 -0700870 dst_release(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800871 goto relookup;
872
873out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800874 if (reachable) {
875 reachable = 0;
876 goto restart_2;
877 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700878 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700879 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880out2:
Changli Gaod8d1f302010-06-10 23:31:35 -0700881 rt->dst.lastuse = jiffies;
882 rt->dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700883
884 return rt;
885}
886
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800887static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500888 struct flowi6 *fl6, int flags)
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700889{
David S. Miller4c9483b2011-03-12 16:22:43 -0500890 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700891}
892
Shmulik Ladkani72331bc2012-04-01 04:03:45 +0000893static struct dst_entry *ip6_route_input_lookup(struct net *net,
894 struct net_device *dev,
895 struct flowi6 *fl6, int flags)
896{
897 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
898 flags |= RT6_LOOKUP_F_IFACE;
899
900 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
901}
902
Thomas Grafc71099a2006-08-04 23:20:06 -0700903void ip6_route_input(struct sk_buff *skb)
904{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000905 const struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900906 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700907 int flags = RT6_LOOKUP_F_HAS_SADDR;
David S. Miller4c9483b2011-03-12 16:22:43 -0500908 struct flowi6 fl6 = {
909 .flowi6_iif = skb->dev->ifindex,
910 .daddr = iph->daddr,
911 .saddr = iph->saddr,
David S. Miller38308472011-12-03 18:02:47 -0500912 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
David S. Miller4c9483b2011-03-12 16:22:43 -0500913 .flowi6_mark = skb->mark,
914 .flowi6_proto = iph->nexthdr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700915 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700916
Shmulik Ladkani72331bc2012-04-01 04:03:45 +0000917 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
Thomas Grafc71099a2006-08-04 23:20:06 -0700918}
919
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800920static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500921 struct flowi6 *fl6, int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -0700922{
David S. Miller4c9483b2011-03-12 16:22:43 -0500923 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700924}
925
Florian Westphal9c7a4f92011-03-22 19:17:36 -0700926struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
David S. Miller4c9483b2011-03-12 16:22:43 -0500927 struct flowi6 *fl6)
Thomas Grafc71099a2006-08-04 23:20:06 -0700928{
929 int flags = 0;
930
David S. Miller4c9483b2011-03-12 16:22:43 -0500931 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700932 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700933
David S. Miller4c9483b2011-03-12 16:22:43 -0500934 if (!ipv6_addr_any(&fl6->saddr))
Thomas Grafadaa70b2006-10-13 15:01:03 -0700935 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000936 else if (sk)
937 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700938
David S. Miller4c9483b2011-03-12 16:22:43 -0500939 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940}
941
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900942EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943
David S. Miller2774c132011-03-01 14:59:04 -0800944struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -0700945{
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700946 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
David S. Miller14e50e52007-05-24 18:17:54 -0700947 struct dst_entry *new = NULL;
948
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700949 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
David S. Miller14e50e52007-05-24 18:17:54 -0700950 if (rt) {
David S. Millercf911662011-04-28 14:31:47 -0700951 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
952
Changli Gaod8d1f302010-06-10 23:31:35 -0700953 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -0700954
David S. Miller14e50e52007-05-24 18:17:54 -0700955 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800956 new->input = dst_discard;
957 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700958
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000959 if (dst_metrics_read_only(&ort->dst))
960 new->_metrics = ort->dst._metrics;
961 else
962 dst_copy_metrics(new, &ort->dst);
David S. Miller14e50e52007-05-24 18:17:54 -0700963 rt->rt6i_idev = ort->rt6i_idev;
964 if (rt->rt6i_idev)
965 in6_dev_hold(rt->rt6i_idev);
David S. Miller14e50e52007-05-24 18:17:54 -0700966
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000967 rt->rt6i_gateway = ort->rt6i_gateway;
Gao feng1716a962012-04-06 00:13:10 +0000968 rt->rt6i_flags = ort->rt6i_flags;
969 rt6_clean_expires(rt);
David S. Miller14e50e52007-05-24 18:17:54 -0700970 rt->rt6i_metric = 0;
971
972 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
973#ifdef CONFIG_IPV6_SUBTREES
974 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
975#endif
976
977 dst_free(new);
978 }
979
David S. Miller69ead7a2011-03-01 14:45:33 -0800980 dst_release(dst_orig);
981 return new ? new : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -0700982}
David S. Miller14e50e52007-05-24 18:17:54 -0700983
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984/*
985 * Destination cache support functions
986 */
987
988static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
989{
990 struct rt6_info *rt;
991
992 rt = (struct rt6_info *) dst;
993
David S. Miller6431cbc2011-02-07 20:38:06 -0800994 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
995 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
996 if (!rt->rt6i_peer)
997 rt6_bind_peer(rt, 0);
998 rt->rt6i_peer_genid = rt6_peer_genid();
999 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 return dst;
David S. Miller6431cbc2011-02-07 20:38:06 -08001001 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002 return NULL;
1003}
1004
1005static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1006{
1007 struct rt6_info *rt = (struct rt6_info *) dst;
1008
1009 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00001010 if (rt->rt6i_flags & RTF_CACHE) {
1011 if (rt6_check_expired(rt)) {
1012 ip6_del_rt(rt);
1013 dst = NULL;
1014 }
1015 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00001017 dst = NULL;
1018 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00001020 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021}
1022
1023static void ip6_link_failure(struct sk_buff *skb)
1024{
1025 struct rt6_info *rt;
1026
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001027 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028
Eric Dumazetadf30902009-06-02 05:19:30 +00001029 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030 if (rt) {
Gao feng1716a962012-04-06 00:13:10 +00001031 if (rt->rt6i_flags & RTF_CACHE)
1032 rt6_update_expires(rt, 0);
1033 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034 rt->rt6i_node->fn_sernum = -1;
1035 }
1036}
1037
1038static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1039{
1040 struct rt6_info *rt6 = (struct rt6_info*)dst;
1041
1042 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1043 rt6->rt6i_flags |= RTF_MODIFIED;
1044 if (mtu < IPV6_MIN_MTU) {
David S. Millerdefb3512010-12-08 21:16:57 -08001045 u32 features = dst_metric(dst, RTAX_FEATURES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046 mtu = IPV6_MIN_MTU;
David S. Millerdefb3512010-12-08 21:16:57 -08001047 features |= RTAX_FEATURE_ALLFRAG;
1048 dst_metric_set(dst, RTAX_FEATURES, features);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049 }
David S. Millerdefb3512010-12-08 21:16:57 -08001050 dst_metric_set(dst, RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051 }
1052}
1053
David S. Miller0dbaee32010-12-13 12:52:14 -08001054static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055{
David S. Miller0dbaee32010-12-13 12:52:14 -08001056 struct net_device *dev = dst->dev;
1057 unsigned int mtu = dst_mtu(dst);
1058 struct net *net = dev_net(dev);
1059
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1061
Daniel Lezcano55786892008-03-04 13:47:47 -08001062 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1063 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064
1065 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001066 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1067 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1068 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069 * rely only on pmtu discovery"
1070 */
1071 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1072 mtu = IPV6_MAXPLEN;
1073 return mtu;
1074}
1075
Steffen Klassertebb762f2011-11-23 02:12:51 +00001076static unsigned int ip6_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001077{
David S. Millerd33e4552010-12-14 13:01:14 -08001078 struct inet6_dev *idev;
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001079 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1080
1081 if (mtu)
1082 return mtu;
1083
1084 mtu = IPV6_MIN_MTU;
David S. Millerd33e4552010-12-14 13:01:14 -08001085
1086 rcu_read_lock();
1087 idev = __in6_dev_get(dst->dev);
1088 if (idev)
1089 mtu = idev->cnf.mtu6;
1090 rcu_read_unlock();
1091
1092 return mtu;
1093}
1094
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001095static struct dst_entry *icmp6_dst_gc_list;
1096static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001097
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001098struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099 struct neighbour *neigh,
David S. Miller87a11572011-12-06 17:04:13 -05001100 struct flowi6 *fl6)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101{
David S. Miller87a11572011-12-06 17:04:13 -05001102 struct dst_entry *dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 struct rt6_info *rt;
1104 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001105 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106
David S. Miller38308472011-12-03 18:02:47 -05001107 if (unlikely(!idev))
Eric Dumazet122bdf62012-03-14 21:13:11 +00001108 return ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109
David S. Miller957c6652011-06-24 15:25:00 -07001110 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
David S. Miller38308472011-12-03 18:02:47 -05001111 if (unlikely(!rt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112 in6_dev_put(idev);
David S. Miller87a11572011-12-06 17:04:13 -05001113 dst = ERR_PTR(-ENOMEM);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 goto out;
1115 }
1116
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117 if (neigh)
1118 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -08001119 else {
David S. Millerf83c7792011-12-28 15:41:23 -05001120 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
David S. Millerb43faac2011-12-13 16:48:21 -05001121 if (IS_ERR(neigh)) {
RongQing.Li252c3d82012-01-12 22:33:46 +00001122 in6_dev_put(idev);
David S. Millerb43faac2011-12-13 16:48:21 -05001123 dst_free(&rt->dst);
1124 return ERR_CAST(neigh);
1125 }
David S. Miller14deae42009-01-04 16:04:39 -08001126 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001128 rt->dst.flags |= DST_HOST;
1129 rt->dst.output = ip6_output;
David S. Miller69cce1d2011-07-17 23:09:49 -07001130 dst_set_neighbour(&rt->dst, neigh);
Changli Gaod8d1f302010-06-10 23:31:35 -07001131 atomic_set(&rt->dst.__refcnt, 1);
David S. Miller87a11572011-12-06 17:04:13 -05001132 rt->rt6i_dst.addr = fl6->daddr;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001133 rt->rt6i_dst.plen = 128;
1134 rt->rt6i_idev = idev;
Gao feng70116872011-10-28 02:46:57 +00001135 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001137 spin_lock_bh(&icmp6_dst_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001138 rt->dst.next = icmp6_dst_gc_list;
1139 icmp6_dst_gc_list = &rt->dst;
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001140 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141
Daniel Lezcano55786892008-03-04 13:47:47 -08001142 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143
David S. Miller87a11572011-12-06 17:04:13 -05001144 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1145
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146out:
David S. Miller87a11572011-12-06 17:04:13 -05001147 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148}
1149
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001150int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151{
Hagen Paul Pfeifere9476e92011-02-25 05:45:19 +00001152 struct dst_entry *dst, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001153 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001155 spin_lock_bh(&icmp6_dst_lock);
1156 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001157
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158 while ((dst = *pprev) != NULL) {
1159 if (!atomic_read(&dst->__refcnt)) {
1160 *pprev = dst->next;
1161 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162 } else {
1163 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001164 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165 }
1166 }
1167
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001168 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001169
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001170 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171}
1172
David S. Miller1e493d12008-09-10 17:27:15 -07001173static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1174 void *arg)
1175{
1176 struct dst_entry *dst, **pprev;
1177
1178 spin_lock_bh(&icmp6_dst_lock);
1179 pprev = &icmp6_dst_gc_list;
1180 while ((dst = *pprev) != NULL) {
1181 struct rt6_info *rt = (struct rt6_info *) dst;
1182 if (func(rt, arg)) {
1183 *pprev = dst->next;
1184 dst_free(dst);
1185 } else {
1186 pprev = &dst->next;
1187 }
1188 }
1189 spin_unlock_bh(&icmp6_dst_lock);
1190}
1191
Daniel Lezcano569d3642008-01-18 03:56:57 -08001192static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001195 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001196 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1197 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1198 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1199 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1200 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001201 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001202
Eric Dumazetfc66f952010-10-08 06:37:34 +00001203 entries = dst_entries_get_fast(ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001204 if (time_after(rt_last_gc + rt_min_interval, now) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00001205 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206 goto out;
1207
Benjamin Thery6891a342008-03-04 13:49:47 -08001208 net->ipv6.ip6_rt_gc_expire++;
1209 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1210 net->ipv6.ip6_rt_last_gc = now;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001211 entries = dst_entries_get_slow(ops);
1212 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08001213 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001215 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001216 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217}
1218
1219/* Clean host part of a prefix. Not necessary in radix tree,
1220 but results in cleaner routing tables.
1221
1222 Remove it only when all the things will work!
1223 */
1224
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001225int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226{
David S. Miller5170ae82010-12-12 21:35:57 -08001227 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
David S. Millera02e4b72010-12-12 21:39:02 -08001228 if (hoplimit == 0) {
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001229 struct net_device *dev = dst->dev;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001230 struct inet6_dev *idev;
1231
1232 rcu_read_lock();
1233 idev = __in6_dev_get(dev);
1234 if (idev)
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001235 hoplimit = idev->cnf.hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001236 else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001237 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001238 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 }
1240 return hoplimit;
1241}
David S. Millerabbf46a2010-12-12 21:14:46 -08001242EXPORT_SYMBOL(ip6_dst_hoplimit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243
1244/*
1245 *
1246 */
1247
Thomas Graf86872cb2006-08-22 00:01:08 -07001248int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249{
1250 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001251 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252 struct rt6_info *rt = NULL;
1253 struct net_device *dev = NULL;
1254 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001255 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256 int addr_type;
1257
Thomas Graf86872cb2006-08-22 00:01:08 -07001258 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259 return -EINVAL;
1260#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001261 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262 return -EINVAL;
1263#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001264 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001266 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267 if (!dev)
1268 goto out;
1269 idev = in6_dev_get(dev);
1270 if (!idev)
1271 goto out;
1272 }
1273
Thomas Graf86872cb2006-08-22 00:01:08 -07001274 if (cfg->fc_metric == 0)
1275 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001276
Matti Vaittinend71314b2011-11-14 00:14:49 +00001277 err = -ENOBUFS;
David S. Miller38308472011-12-03 18:02:47 -05001278 if (cfg->fc_nlinfo.nlh &&
1279 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
Matti Vaittinend71314b2011-11-14 00:14:49 +00001280 table = fib6_get_table(net, cfg->fc_table);
David S. Miller38308472011-12-03 18:02:47 -05001281 if (!table) {
Matti Vaittinend71314b2011-11-14 00:14:49 +00001282 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1283 table = fib6_new_table(net, cfg->fc_table);
1284 }
1285 } else {
1286 table = fib6_new_table(net, cfg->fc_table);
1287 }
David S. Miller38308472011-12-03 18:02:47 -05001288
1289 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07001290 goto out;
Thomas Grafc71099a2006-08-04 23:20:06 -07001291
David S. Miller957c6652011-06-24 15:25:00 -07001292 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293
David S. Miller38308472011-12-03 18:02:47 -05001294 if (!rt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 err = -ENOMEM;
1296 goto out;
1297 }
1298
Changli Gaod8d1f302010-06-10 23:31:35 -07001299 rt->dst.obsolete = -1;
Gao feng1716a962012-04-06 00:13:10 +00001300
1301 if (cfg->fc_flags & RTF_EXPIRES)
1302 rt6_set_expires(rt, jiffies +
1303 clock_t_to_jiffies(cfg->fc_expires));
1304 else
1305 rt6_clean_expires(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306
Thomas Graf86872cb2006-08-22 00:01:08 -07001307 if (cfg->fc_protocol == RTPROT_UNSPEC)
1308 cfg->fc_protocol = RTPROT_BOOT;
1309 rt->rt6i_protocol = cfg->fc_protocol;
1310
1311 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312
1313 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07001314 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001315 else if (cfg->fc_flags & RTF_LOCAL)
1316 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317 else
Changli Gaod8d1f302010-06-10 23:31:35 -07001318 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319
Changli Gaod8d1f302010-06-10 23:31:35 -07001320 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001321
Thomas Graf86872cb2006-08-22 00:01:08 -07001322 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1323 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324 if (rt->rt6i_dst.plen == 128)
David S. Miller11d53b42011-06-24 15:23:34 -07001325 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001327 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1328 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1329 if (!metrics) {
1330 err = -ENOMEM;
1331 goto out;
1332 }
1333 dst_init_metrics(&rt->dst, metrics, 0);
1334 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001335#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001336 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1337 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338#endif
1339
Thomas Graf86872cb2006-08-22 00:01:08 -07001340 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341
1342 /* We cannot add true routes via loopback here,
1343 they would result in kernel looping; promote them to reject routes
1344 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001345 if ((cfg->fc_flags & RTF_REJECT) ||
David S. Miller38308472011-12-03 18:02:47 -05001346 (dev && (dev->flags & IFF_LOOPBACK) &&
1347 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1348 !(cfg->fc_flags & RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001350 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351 if (dev) {
1352 dev_put(dev);
1353 in6_dev_put(idev);
1354 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001355 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356 dev_hold(dev);
1357 idev = in6_dev_get(dev);
1358 if (!idev) {
1359 err = -ENODEV;
1360 goto out;
1361 }
1362 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001363 rt->dst.output = ip6_pkt_discard_out;
1364 rt->dst.input = ip6_pkt_discard;
1365 rt->dst.error = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1367 goto install_route;
1368 }
1369
Thomas Graf86872cb2006-08-22 00:01:08 -07001370 if (cfg->fc_flags & RTF_GATEWAY) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001371 const struct in6_addr *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372 int gwa_type;
1373
Thomas Graf86872cb2006-08-22 00:01:08 -07001374 gw_addr = &cfg->fc_gateway;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001375 rt->rt6i_gateway = *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376 gwa_type = ipv6_addr_type(gw_addr);
1377
1378 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1379 struct rt6_info *grt;
1380
1381 /* IPv6 strictly inhibits using not link-local
1382 addresses as nexthop address.
1383 Otherwise, router will not able to send redirects.
1384 It is very good, but in some (rare!) circumstances
1385 (SIT, PtP, NBMA NOARP links) it is handy to allow
1386 some exceptions. --ANK
1387 */
1388 err = -EINVAL;
David S. Miller38308472011-12-03 18:02:47 -05001389 if (!(gwa_type & IPV6_ADDR_UNICAST))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390 goto out;
1391
Daniel Lezcano55786892008-03-04 13:47:47 -08001392 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393
1394 err = -EHOSTUNREACH;
David S. Miller38308472011-12-03 18:02:47 -05001395 if (!grt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396 goto out;
1397 if (dev) {
David S. Millerd1918542011-12-28 20:19:20 -05001398 if (dev != grt->dst.dev) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001399 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400 goto out;
1401 }
1402 } else {
David S. Millerd1918542011-12-28 20:19:20 -05001403 dev = grt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404 idev = grt->rt6i_idev;
1405 dev_hold(dev);
1406 in6_dev_hold(grt->rt6i_idev);
1407 }
David S. Miller38308472011-12-03 18:02:47 -05001408 if (!(grt->rt6i_flags & RTF_GATEWAY))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001409 err = 0;
Changli Gaod8d1f302010-06-10 23:31:35 -07001410 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411
1412 if (err)
1413 goto out;
1414 }
1415 err = -EINVAL;
David S. Miller38308472011-12-03 18:02:47 -05001416 if (!dev || (dev->flags & IFF_LOOPBACK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417 goto out;
1418 }
1419
1420 err = -ENODEV;
David S. Miller38308472011-12-03 18:02:47 -05001421 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422 goto out;
1423
Daniel Walterc3968a82011-04-13 21:10:57 +00001424 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1425 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1426 err = -EINVAL;
1427 goto out;
1428 }
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001429 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
Daniel Walterc3968a82011-04-13 21:10:57 +00001430 rt->rt6i_prefsrc.plen = 128;
1431 } else
1432 rt->rt6i_prefsrc.plen = 0;
1433
Thomas Graf86872cb2006-08-22 00:01:08 -07001434 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
David S. Miller8ade06c2011-12-29 18:51:57 -05001435 err = rt6_bind_neighbour(rt, dev);
David S. Millerf83c7792011-12-28 15:41:23 -05001436 if (err)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438 }
1439
Thomas Graf86872cb2006-08-22 00:01:08 -07001440 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441
1442install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001443 if (cfg->fc_mx) {
1444 struct nlattr *nla;
1445 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446
Thomas Graf86872cb2006-08-22 00:01:08 -07001447 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001448 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001449
1450 if (type) {
1451 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452 err = -EINVAL;
1453 goto out;
1454 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001455
David S. Millerdefb3512010-12-08 21:16:57 -08001456 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001458 }
1459 }
1460
Changli Gaod8d1f302010-06-10 23:31:35 -07001461 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001463 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001464
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001465 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001466
Thomas Graf86872cb2006-08-22 00:01:08 -07001467 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001468
1469out:
1470 if (dev)
1471 dev_put(dev);
1472 if (idev)
1473 in6_dev_put(idev);
1474 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001475 dst_free(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001476 return err;
1477}
1478
Thomas Graf86872cb2006-08-22 00:01:08 -07001479static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480{
1481 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001482 struct fib6_table *table;
David S. Millerd1918542011-12-28 20:19:20 -05001483 struct net *net = dev_net(rt->dst.dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001485 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001486 return -ENOENT;
1487
Thomas Grafc71099a2006-08-04 23:20:06 -07001488 table = rt->rt6i_table;
1489 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001490
Thomas Graf86872cb2006-08-22 00:01:08 -07001491 err = fib6_del(rt, info);
Changli Gaod8d1f302010-06-10 23:31:35 -07001492 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493
Thomas Grafc71099a2006-08-04 23:20:06 -07001494 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495
1496 return err;
1497}
1498
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001499int ip6_del_rt(struct rt6_info *rt)
1500{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001501 struct nl_info info = {
David S. Millerd1918542011-12-28 20:19:20 -05001502 .nl_net = dev_net(rt->dst.dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001503 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001504 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001505}
1506
Thomas Graf86872cb2006-08-22 00:01:08 -07001507static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508{
Thomas Grafc71099a2006-08-04 23:20:06 -07001509 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510 struct fib6_node *fn;
1511 struct rt6_info *rt;
1512 int err = -ESRCH;
1513
Daniel Lezcano55786892008-03-04 13:47:47 -08001514 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
David S. Miller38308472011-12-03 18:02:47 -05001515 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07001516 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517
Thomas Grafc71099a2006-08-04 23:20:06 -07001518 read_lock_bh(&table->tb6_lock);
1519
1520 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001521 &cfg->fc_dst, cfg->fc_dst_len,
1522 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001523
Linus Torvalds1da177e2005-04-16 15:20:36 -07001524 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001525 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001526 if (cfg->fc_ifindex &&
David S. Millerd1918542011-12-28 20:19:20 -05001527 (!rt->dst.dev ||
1528 rt->dst.dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001530 if (cfg->fc_flags & RTF_GATEWAY &&
1531 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001533 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001535 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001536 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001537
Thomas Graf86872cb2006-08-22 00:01:08 -07001538 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539 }
1540 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001541 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542
1543 return err;
1544}
1545
1546/*
1547 * Handle redirects
1548 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001549struct ip6rd_flowi {
David S. Miller4c9483b2011-03-12 16:22:43 -05001550 struct flowi6 fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001551 struct in6_addr gateway;
1552};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001554static struct rt6_info *__ip6_route_redirect(struct net *net,
1555 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -05001556 struct flowi6 *fl6,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001557 int flags)
1558{
David S. Miller4c9483b2011-03-12 16:22:43 -05001559 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001560 struct rt6_info *rt;
1561 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001562
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001564 * Get the "current" route for this destination and
1565 * check if the redirect has come from approriate router.
1566 *
1567 * RFC 2461 specifies that redirects should only be
1568 * accepted if they come from the nexthop to the target.
1569 * Due to the way the routes are chosen, this notion
1570 * is a bit fuzzy and one might need to check all possible
1571 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573
Thomas Grafc71099a2006-08-04 23:20:06 -07001574 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -05001575 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001576restart:
Changli Gaod8d1f302010-06-10 23:31:35 -07001577 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001578 /*
1579 * Current route is on-link; redirect is always invalid.
1580 *
1581 * Seems, previous statement is not true. It could
1582 * be node, which looks for us as on-link (f.e. proxy ndisc)
1583 * But then router serving it might decide, that we should
1584 * know truth 8)8) --ANK (980726).
1585 */
1586 if (rt6_check_expired(rt))
1587 continue;
1588 if (!(rt->rt6i_flags & RTF_GATEWAY))
1589 continue;
David S. Millerd1918542011-12-28 20:19:20 -05001590 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001591 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001592 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001593 continue;
1594 break;
1595 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001596
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001597 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001598 rt = net->ipv6.ip6_null_entry;
David S. Miller4c9483b2011-03-12 16:22:43 -05001599 BACKTRACK(net, &fl6->saddr);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001600out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001601 dst_hold(&rt->dst);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001602
1603 read_unlock_bh(&table->tb6_lock);
1604
1605 return rt;
1606};
1607
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001608static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1609 const struct in6_addr *src,
1610 const struct in6_addr *gateway,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001611 struct net_device *dev)
1612{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001613 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001614 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001615 struct ip6rd_flowi rdfl = {
David S. Miller4c9483b2011-03-12 16:22:43 -05001616 .fl6 = {
1617 .flowi6_oif = dev->ifindex,
1618 .daddr = *dest,
1619 .saddr = *src,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001620 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001621 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001622
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001623 rdfl.gateway = *gateway;
Brian Haley86c36ce2009-10-07 13:58:01 -07001624
Thomas Grafadaa70b2006-10-13 15:01:03 -07001625 if (rt6_need_strict(dest))
1626 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001627
David S. Miller4c9483b2011-03-12 16:22:43 -05001628 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001629 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001630}
1631
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001632void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1633 const struct in6_addr *saddr,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001634 struct neighbour *neigh, u8 *lladdr, int on_link)
1635{
1636 struct rt6_info *rt, *nrt = NULL;
1637 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001638 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001639
1640 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1641
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001642 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643 if (net_ratelimit())
1644 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1645 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001646 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647 }
1648
Linus Torvalds1da177e2005-04-16 15:20:36 -07001649 /*
1650 * We have finally decided to accept it.
1651 */
1652
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001653 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1655 NEIGH_UPDATE_F_OVERRIDE|
1656 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1657 NEIGH_UPDATE_F_ISROUTER))
1658 );
1659
1660 /*
1661 * Redirect received -> path was valid.
1662 * Look, redirects are sent only in response to data packets,
1663 * so that this nexthop apparently is reachable. --ANK
1664 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001665 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666
1667 /* Duplicate redirect: silently ignore. */
David Miller27217452011-12-02 16:52:08 +00001668 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 goto out;
1670
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001671 nrt = ip6_rt_copy(rt, dest);
David S. Miller38308472011-12-03 18:02:47 -05001672 if (!nrt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673 goto out;
1674
1675 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1676 if (on_link)
1677 nrt->rt6i_flags &= ~RTF_GATEWAY;
1678
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001679 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
David S. Miller69cce1d2011-07-17 23:09:49 -07001680 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681
Thomas Graf40e22e82006-08-22 00:00:45 -07001682 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 goto out;
1684
Changli Gaod8d1f302010-06-10 23:31:35 -07001685 netevent.old = &rt->dst;
1686 netevent.new = &nrt->dst;
Tom Tucker8d717402006-07-30 20:43:36 -07001687 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1688
David S. Miller38308472011-12-03 18:02:47 -05001689 if (rt->rt6i_flags & RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001690 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 return;
1692 }
1693
1694out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001695 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696}
1697
1698/*
1699 * Handle ICMP "packet too big" messages
1700 * i.e. Path MTU discovery
1701 */
1702
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001703static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001704 struct net *net, u32 pmtu, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705{
1706 struct rt6_info *rt, *nrt;
1707 int allfrag = 0;
Andrey Vagind3052b52010-12-11 15:20:11 +00001708again:
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001709 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
David S. Miller38308472011-12-03 18:02:47 -05001710 if (!rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 return;
1712
Andrey Vagind3052b52010-12-11 15:20:11 +00001713 if (rt6_check_expired(rt)) {
1714 ip6_del_rt(rt);
1715 goto again;
1716 }
1717
Changli Gaod8d1f302010-06-10 23:31:35 -07001718 if (pmtu >= dst_mtu(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001719 goto out;
1720
1721 if (pmtu < IPV6_MIN_MTU) {
1722 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001723 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724 * MTU (1280) and a fragment header should always be included
1725 * after a node receiving Too Big message reporting PMTU is
1726 * less than the IPv6 Minimum Link MTU.
1727 */
1728 pmtu = IPV6_MIN_MTU;
1729 allfrag = 1;
1730 }
1731
1732 /* New mtu received -> path was valid.
1733 They are sent only in response to data packets,
1734 so that this nexthop apparently is reachable. --ANK
1735 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001736 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737
1738 /* Host route. If it is static, it would be better
1739 not to override it, but add new one, so that
1740 when cache entry will expire old pmtu
1741 would return automatically.
1742 */
1743 if (rt->rt6i_flags & RTF_CACHE) {
David S. Millerdefb3512010-12-08 21:16:57 -08001744 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1745 if (allfrag) {
1746 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1747 features |= RTAX_FEATURE_ALLFRAG;
1748 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1749 }
Gao feng1716a962012-04-06 00:13:10 +00001750 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1751 rt->rt6i_flags |= RTF_MODIFIED;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001752 goto out;
1753 }
1754
1755 /* Network route.
1756 Two cases are possible:
1757 1. It is connected route. Action: COW
1758 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1759 */
David Miller27217452011-12-02 16:52:08 +00001760 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001761 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001762 else
1763 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001764
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001765 if (nrt) {
David S. Millerdefb3512010-12-08 21:16:57 -08001766 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1767 if (allfrag) {
1768 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1769 features |= RTAX_FEATURE_ALLFRAG;
1770 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1771 }
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001772
1773 /* According to RFC 1981, detecting PMTU increase shouldn't be
1774 * happened within 5 mins, the recommended timer is 10 mins.
1775 * Here this route expiration time is set to ip6_rt_mtu_expires
1776 * which is 10 mins. After 10 mins the decreased pmtu is expired
1777 * and detecting PMTU increase will be automatically happened.
1778 */
Gao feng1716a962012-04-06 00:13:10 +00001779 rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1780 nrt->rt6i_flags |= RTF_DYNAMIC;
Thomas Graf40e22e82006-08-22 00:00:45 -07001781 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001784 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785}
1786
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001787void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001788 struct net_device *dev, u32 pmtu)
1789{
1790 struct net *net = dev_net(dev);
1791
1792 /*
1793 * RFC 1981 states that a node "MUST reduce the size of the packets it
1794 * is sending along the path" that caused the Packet Too Big message.
1795 * Since it's not possible in the general case to determine which
1796 * interface was used to send the original packet, we update the MTU
1797 * on the interface that will be used to send future packets. We also
1798 * update the MTU on the interface that received the Packet Too Big in
1799 * case the original packet was forced out that interface with
1800 * SO_BINDTODEVICE or similar. This is the next best thing to the
1801 * correct behaviour, which would be to update the MTU on all
1802 * interfaces.
1803 */
1804 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1805 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1806}
1807
Linus Torvalds1da177e2005-04-16 15:20:36 -07001808/*
1809 * Misc support functions
1810 */
1811
Gao feng1716a962012-04-06 00:13:10 +00001812static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001813 const struct in6_addr *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814{
David S. Millerd1918542011-12-28 20:19:20 -05001815 struct net *net = dev_net(ort->dst.dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001816 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07001817 ort->dst.dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001818
1819 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001820 rt->dst.input = ort->dst.input;
1821 rt->dst.output = ort->dst.output;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001822 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001824 rt->rt6i_dst.addr = *dest;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001825 rt->rt6i_dst.plen = 128;
David S. Millerdefb3512010-12-08 21:16:57 -08001826 dst_copy_metrics(&rt->dst, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -07001827 rt->dst.error = ort->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001828 rt->rt6i_idev = ort->rt6i_idev;
1829 if (rt->rt6i_idev)
1830 in6_dev_hold(rt->rt6i_idev);
Changli Gaod8d1f302010-06-10 23:31:35 -07001831 rt->dst.lastuse = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001833 rt->rt6i_gateway = ort->rt6i_gateway;
Gao feng1716a962012-04-06 00:13:10 +00001834 rt->rt6i_flags = ort->rt6i_flags;
1835 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1836 (RTF_DEFAULT | RTF_ADDRCONF))
1837 rt6_set_from(rt, ort);
1838 else
1839 rt6_clean_expires(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840 rt->rt6i_metric = 0;
1841
Linus Torvalds1da177e2005-04-16 15:20:36 -07001842#ifdef CONFIG_IPV6_SUBTREES
1843 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1844#endif
Florian Westphal0f6c6392011-05-20 11:27:24 +00001845 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
Thomas Grafc71099a2006-08-04 23:20:06 -07001846 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001847 }
1848 return rt;
1849}
1850
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001851#ifdef CONFIG_IPV6_ROUTE_INFO
Lorenzo Colitti653be182014-03-26 19:35:41 +09001852static struct rt6_info *rt6_get_route_info(struct net_device *dev,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001853 const struct in6_addr *prefix, int prefixlen,
Lorenzo Colitti653be182014-03-26 19:35:41 +09001854 const struct in6_addr *gwaddr)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001855{
1856 struct fib6_node *fn;
1857 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001858 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001859
Lorenzo Colitti653be182014-03-26 19:35:41 +09001860 table = fib6_get_table(dev_net(dev),
1861 addrconf_rt_table(dev, RT6_TABLE_INFO));
David S. Miller38308472011-12-03 18:02:47 -05001862 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07001863 return NULL;
1864
1865 write_lock_bh(&table->tb6_lock);
1866 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001867 if (!fn)
1868 goto out;
1869
Changli Gaod8d1f302010-06-10 23:31:35 -07001870 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Lorenzo Colitti653be182014-03-26 19:35:41 +09001871 if (rt->dst.dev->ifindex != dev->ifindex)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001872 continue;
1873 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1874 continue;
1875 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1876 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001877 dst_hold(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001878 break;
1879 }
1880out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001881 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001882 return rt;
1883}
1884
Lorenzo Colitti653be182014-03-26 19:35:41 +09001885static struct rt6_info *rt6_add_route_info(struct net_device *dev,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001886 const struct in6_addr *prefix, int prefixlen,
Lorenzo Colitti653be182014-03-26 19:35:41 +09001887 const struct in6_addr *gwaddr,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001888 unsigned pref)
1889{
Thomas Graf86872cb2006-08-22 00:01:08 -07001890 struct fib6_config cfg = {
Lorenzo Colitti653be182014-03-26 19:35:41 +09001891 .fc_table = addrconf_rt_table(dev, RT6_TABLE_INFO),
Rami Rosen238fc7e2008-02-09 23:43:11 -08001892 .fc_metric = IP6_RT_PRIO_USER,
Lorenzo Colitti653be182014-03-26 19:35:41 +09001893 .fc_ifindex = dev->ifindex,
Thomas Graf86872cb2006-08-22 00:01:08 -07001894 .fc_dst_len = prefixlen,
1895 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1896 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001897 .fc_nlinfo.pid = 0,
1898 .fc_nlinfo.nlh = NULL,
Lorenzo Colitti653be182014-03-26 19:35:41 +09001899 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001900 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001901
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001902 cfg.fc_dst = *prefix;
1903 cfg.fc_gateway = *gwaddr;
Thomas Graf86872cb2006-08-22 00:01:08 -07001904
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001905 /* We should treat it as a default route if prefix length is 0. */
1906 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001907 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001908
Thomas Graf86872cb2006-08-22 00:01:08 -07001909 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001910
Lorenzo Colitti653be182014-03-26 19:35:41 +09001911 return rt6_get_route_info(dev, prefix, prefixlen, gwaddr);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001912}
1913#endif
1914
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001915struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001916{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001917 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001918 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919
Lorenzo Colitti653be182014-03-26 19:35:41 +09001920 table = fib6_get_table(dev_net(dev),
1921 addrconf_rt_table(dev, RT6_TABLE_MAIN));
David S. Miller38308472011-12-03 18:02:47 -05001922 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07001923 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924
Thomas Grafc71099a2006-08-04 23:20:06 -07001925 write_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001926 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
David S. Millerd1918542011-12-28 20:19:20 -05001927 if (dev == rt->dst.dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001928 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001929 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1930 break;
1931 }
1932 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001933 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001934 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001935 return rt;
1936}
1937
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001938struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001939 struct net_device *dev,
1940 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001941{
Thomas Graf86872cb2006-08-22 00:01:08 -07001942 struct fib6_config cfg = {
Lorenzo Colitti653be182014-03-26 19:35:41 +09001943 .fc_table = addrconf_rt_table(dev, RT6_TABLE_DFLT),
Rami Rosen238fc7e2008-02-09 23:43:11 -08001944 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001945 .fc_ifindex = dev->ifindex,
1946 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1947 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001948 .fc_nlinfo.pid = 0,
1949 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001950 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001951 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001952
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001953 cfg.fc_gateway = *gwaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954
Thomas Graf86872cb2006-08-22 00:01:08 -07001955 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001956
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957 return rt6_get_dflt_router(gwaddr, dev);
1958}
1959
Lorenzo Colitti653be182014-03-26 19:35:41 +09001960
1961int rt6_addrconf_purge(struct rt6_info *rt, void *arg) {
1962 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
1963 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2))
1964 return -1;
1965 return 0;
1966}
1967
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001968void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001969{
Lorenzo Colitti653be182014-03-26 19:35:41 +09001970 fib6_clean_all(net, rt6_addrconf_purge, 0, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001971}
1972
Daniel Lezcano55786892008-03-04 13:47:47 -08001973static void rtmsg_to_fib6_config(struct net *net,
1974 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001975 struct fib6_config *cfg)
1976{
1977 memset(cfg, 0, sizeof(*cfg));
1978
1979 cfg->fc_table = RT6_TABLE_MAIN;
1980 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1981 cfg->fc_metric = rtmsg->rtmsg_metric;
1982 cfg->fc_expires = rtmsg->rtmsg_info;
1983 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1984 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1985 cfg->fc_flags = rtmsg->rtmsg_flags;
1986
Daniel Lezcano55786892008-03-04 13:47:47 -08001987 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001988
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001989 cfg->fc_dst = rtmsg->rtmsg_dst;
1990 cfg->fc_src = rtmsg->rtmsg_src;
1991 cfg->fc_gateway = rtmsg->rtmsg_gateway;
Thomas Graf86872cb2006-08-22 00:01:08 -07001992}
1993
Daniel Lezcano55786892008-03-04 13:47:47 -08001994int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001995{
Thomas Graf86872cb2006-08-22 00:01:08 -07001996 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997 struct in6_rtmsg rtmsg;
1998 int err;
1999
2000 switch(cmd) {
2001 case SIOCADDRT: /* Add a route */
2002 case SIOCDELRT: /* Delete a route */
2003 if (!capable(CAP_NET_ADMIN))
2004 return -EPERM;
2005 err = copy_from_user(&rtmsg, arg,
2006 sizeof(struct in6_rtmsg));
2007 if (err)
2008 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07002009
Daniel Lezcano55786892008-03-04 13:47:47 -08002010 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07002011
Linus Torvalds1da177e2005-04-16 15:20:36 -07002012 rtnl_lock();
2013 switch (cmd) {
2014 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07002015 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002016 break;
2017 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07002018 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019 break;
2020 default:
2021 err = -EINVAL;
2022 }
2023 rtnl_unlock();
2024
2025 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07002026 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027
2028 return -EINVAL;
2029}
2030
2031/*
2032 * Drop the packet on the floor
2033 */
2034
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07002035static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002036{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002037 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00002038 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002039 switch (ipstats_mib_noroutes) {
2040 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07002041 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00002042 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07002043 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2044 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002045 break;
2046 }
2047 /* FALLTHROUGH */
2048 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07002049 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2050 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002051 break;
2052 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00002053 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002054 kfree_skb(skb);
2055 return 0;
2056}
2057
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002058static int ip6_pkt_discard(struct sk_buff *skb)
2059{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002060 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002061}
2062
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03002063static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064{
Eric Dumazetadf30902009-06-02 05:19:30 +00002065 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002066 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002067}
2068
David S. Miller6723ab52006-10-18 21:20:57 -07002069#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2070
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002071static int ip6_pkt_prohibit(struct sk_buff *skb)
2072{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002073 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002074}
2075
2076static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2077{
Eric Dumazetadf30902009-06-02 05:19:30 +00002078 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002079 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002080}
2081
David S. Miller6723ab52006-10-18 21:20:57 -07002082#endif
2083
Linus Torvalds1da177e2005-04-16 15:20:36 -07002084/*
2085 * Allocate a dst for local (unicast / anycast) address.
2086 */
2087
2088struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2089 const struct in6_addr *addr,
David S. Miller8f031512011-12-06 16:48:14 -05002090 bool anycast)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002091{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002092 struct net *net = dev_net(idev->dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002093 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07002094 net->loopback_dev, 0);
David S. Millerf83c7792011-12-28 15:41:23 -05002095 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002096
David S. Miller38308472011-12-03 18:02:47 -05002097 if (!rt) {
Ben Greear40385652010-11-08 12:33:48 +00002098 if (net_ratelimit())
2099 pr_warning("IPv6: Maximum number of routes reached,"
2100 " consider increasing route/max_size.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002101 return ERR_PTR(-ENOMEM);
Ben Greear40385652010-11-08 12:33:48 +00002102 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002103
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104 in6_dev_hold(idev);
2105
David S. Miller11d53b42011-06-24 15:23:34 -07002106 rt->dst.flags |= DST_HOST;
Changli Gaod8d1f302010-06-10 23:31:35 -07002107 rt->dst.input = ip6_input;
2108 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002109 rt->rt6i_idev = idev;
Changli Gaod8d1f302010-06-10 23:31:35 -07002110 rt->dst.obsolete = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002111
2112 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09002113 if (anycast)
2114 rt->rt6i_flags |= RTF_ANYCAST;
2115 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07002116 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller8ade06c2011-12-29 18:51:57 -05002117 err = rt6_bind_neighbour(rt, rt->dst.dev);
David S. Millerf83c7792011-12-28 15:41:23 -05002118 if (err) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002119 dst_free(&rt->dst);
David S. Millerf83c7792011-12-28 15:41:23 -05002120 return ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002121 }
2122
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002123 rt->rt6i_dst.addr = *addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002124 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08002125 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002126
Changli Gaod8d1f302010-06-10 23:31:35 -07002127 atomic_set(&rt->dst.__refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002128
2129 return rt;
2130}
2131
Daniel Walterc3968a82011-04-13 21:10:57 +00002132int ip6_route_get_saddr(struct net *net,
2133 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00002134 const struct in6_addr *daddr,
Daniel Walterc3968a82011-04-13 21:10:57 +00002135 unsigned int prefs,
2136 struct in6_addr *saddr)
2137{
2138 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2139 int err = 0;
2140 if (rt->rt6i_prefsrc.plen)
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002141 *saddr = rt->rt6i_prefsrc.addr;
Daniel Walterc3968a82011-04-13 21:10:57 +00002142 else
2143 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2144 daddr, prefs, saddr);
2145 return err;
2146}
2147
2148/* remove deleted ip from prefsrc entries */
2149struct arg_dev_net_ip {
2150 struct net_device *dev;
2151 struct net *net;
2152 struct in6_addr *addr;
2153};
2154
2155static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2156{
2157 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2158 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2159 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2160
David S. Millerd1918542011-12-28 20:19:20 -05002161 if (((void *)rt->dst.dev == dev || !dev) &&
Daniel Walterc3968a82011-04-13 21:10:57 +00002162 rt != net->ipv6.ip6_null_entry &&
2163 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2164 /* remove prefsrc entry */
2165 rt->rt6i_prefsrc.plen = 0;
2166 }
2167 return 0;
2168}
2169
2170void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2171{
2172 struct net *net = dev_net(ifp->idev->dev);
2173 struct arg_dev_net_ip adni = {
2174 .dev = ifp->idev->dev,
2175 .net = net,
2176 .addr = &ifp->addr,
2177 };
2178 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2179}
2180
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002181struct arg_dev_net {
2182 struct net_device *dev;
2183 struct net *net;
2184};
2185
Linus Torvalds1da177e2005-04-16 15:20:36 -07002186static int fib6_ifdown(struct rt6_info *rt, void *arg)
2187{
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002188 const struct arg_dev_net *adn = arg;
2189 const struct net_device *dev = adn->dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002190
David S. Millerd1918542011-12-28 20:19:20 -05002191 if ((rt->dst.dev == dev || !dev) &&
David S. Millerc159d302011-12-26 15:24:36 -05002192 rt != adn->net->ipv6.ip6_null_entry)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002193 return -1;
David S. Millerc159d302011-12-26 15:24:36 -05002194
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195 return 0;
2196}
2197
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002198void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002200 struct arg_dev_net adn = {
2201 .dev = dev,
2202 .net = net,
2203 };
2204
2205 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07002206 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002207}
2208
2209struct rt6_mtu_change_arg
2210{
2211 struct net_device *dev;
2212 unsigned mtu;
2213};
2214
2215static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2216{
2217 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2218 struct inet6_dev *idev;
2219
2220 /* In IPv6 pmtu discovery is not optional,
2221 so that RTAX_MTU lock cannot disable it.
2222 We still use this lock to block changes
2223 caused by addrconf/ndisc.
2224 */
2225
2226 idev = __in6_dev_get(arg->dev);
David S. Miller38308472011-12-03 18:02:47 -05002227 if (!idev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228 return 0;
2229
2230 /* For administrative MTU increase, there is no way to discover
2231 IPv6 PMTU increase, so PMTU increase should be updated here.
2232 Since RFC 1981 doesn't include administrative MTU increase
2233 update PMTU increase is a MUST. (i.e. jumbo frame)
2234 */
2235 /*
2236 If new MTU is less than route PMTU, this new MTU will be the
2237 lowest MTU in the path, update the route PMTU to reflect PMTU
2238 decreases; if new MTU is greater than route PMTU, and the
2239 old MTU is the lowest MTU in the path, update the route PMTU
2240 to reflect the increase. In this case if the other nodes' MTU
2241 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2242 PMTU discouvery.
2243 */
David S. Millerd1918542011-12-28 20:19:20 -05002244 if (rt->dst.dev == arg->dev &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002245 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2246 (dst_mtu(&rt->dst) >= arg->mtu ||
2247 (dst_mtu(&rt->dst) < arg->mtu &&
2248 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
David S. Millerdefb3512010-12-08 21:16:57 -08002249 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002250 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002251 return 0;
2252}
2253
2254void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2255{
Thomas Grafc71099a2006-08-04 23:20:06 -07002256 struct rt6_mtu_change_arg arg = {
2257 .dev = dev,
2258 .mtu = mtu,
2259 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002260
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002261 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262}
2263
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002264static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002265 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002266 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002267 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002268 [RTA_PRIORITY] = { .type = NLA_U32 },
2269 [RTA_METRICS] = { .type = NLA_NESTED },
2270};
2271
2272static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2273 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274{
Thomas Graf86872cb2006-08-22 00:01:08 -07002275 struct rtmsg *rtm;
2276 struct nlattr *tb[RTA_MAX+1];
2277 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278
Thomas Graf86872cb2006-08-22 00:01:08 -07002279 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2280 if (err < 0)
2281 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002282
Thomas Graf86872cb2006-08-22 00:01:08 -07002283 err = -EINVAL;
2284 rtm = nlmsg_data(nlh);
2285 memset(cfg, 0, sizeof(*cfg));
2286
2287 cfg->fc_table = rtm->rtm_table;
2288 cfg->fc_dst_len = rtm->rtm_dst_len;
2289 cfg->fc_src_len = rtm->rtm_src_len;
2290 cfg->fc_flags = RTF_UP;
2291 cfg->fc_protocol = rtm->rtm_protocol;
2292
2293 if (rtm->rtm_type == RTN_UNREACHABLE)
2294 cfg->fc_flags |= RTF_REJECT;
2295
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002296 if (rtm->rtm_type == RTN_LOCAL)
2297 cfg->fc_flags |= RTF_LOCAL;
2298
Thomas Graf86872cb2006-08-22 00:01:08 -07002299 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2300 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002301 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002302
2303 if (tb[RTA_GATEWAY]) {
2304 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2305 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002306 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002307
2308 if (tb[RTA_DST]) {
2309 int plen = (rtm->rtm_dst_len + 7) >> 3;
2310
2311 if (nla_len(tb[RTA_DST]) < plen)
2312 goto errout;
2313
2314 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002315 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002316
2317 if (tb[RTA_SRC]) {
2318 int plen = (rtm->rtm_src_len + 7) >> 3;
2319
2320 if (nla_len(tb[RTA_SRC]) < plen)
2321 goto errout;
2322
2323 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002324 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002325
Daniel Walterc3968a82011-04-13 21:10:57 +00002326 if (tb[RTA_PREFSRC])
2327 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2328
Thomas Graf86872cb2006-08-22 00:01:08 -07002329 if (tb[RTA_OIF])
2330 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2331
2332 if (tb[RTA_PRIORITY])
2333 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2334
2335 if (tb[RTA_METRICS]) {
2336 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2337 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002338 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002339
2340 if (tb[RTA_TABLE])
2341 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2342
2343 err = 0;
2344errout:
2345 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346}
2347
Thomas Grafc127ea22007-03-22 11:58:32 -07002348static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002349{
Thomas Graf86872cb2006-08-22 00:01:08 -07002350 struct fib6_config cfg;
2351 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352
Thomas Graf86872cb2006-08-22 00:01:08 -07002353 err = rtm_to_fib6_config(skb, nlh, &cfg);
2354 if (err < 0)
2355 return err;
2356
2357 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002358}
2359
Thomas Grafc127ea22007-03-22 11:58:32 -07002360static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002361{
Thomas Graf86872cb2006-08-22 00:01:08 -07002362 struct fib6_config cfg;
2363 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002364
Thomas Graf86872cb2006-08-22 00:01:08 -07002365 err = rtm_to_fib6_config(skb, nlh, &cfg);
2366 if (err < 0)
2367 return err;
2368
2369 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002370}
2371
Thomas Graf339bf982006-11-10 14:10:15 -08002372static inline size_t rt6_nlmsg_size(void)
2373{
2374 return NLMSG_ALIGN(sizeof(struct rtmsg))
2375 + nla_total_size(16) /* RTA_SRC */
2376 + nla_total_size(16) /* RTA_DST */
2377 + nla_total_size(16) /* RTA_GATEWAY */
2378 + nla_total_size(16) /* RTA_PREFSRC */
2379 + nla_total_size(4) /* RTA_TABLE */
2380 + nla_total_size(4) /* RTA_IIF */
2381 + nla_total_size(4) /* RTA_OIF */
2382 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002383 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002384 + nla_total_size(sizeof(struct rta_cacheinfo));
2385}
2386
Brian Haley191cd582008-08-14 15:33:21 -07002387static int rt6_fill_node(struct net *net,
2388 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002389 struct in6_addr *dst, struct in6_addr *src,
2390 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002391 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002392{
David S. Miller346f8702011-12-29 15:22:33 -05002393 const struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002394 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002395 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002396 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002397 u32 table;
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002398 struct neighbour *n;
David S. Miller346f8702011-12-29 15:22:33 -05002399 u32 ts, tsage;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400
2401 if (prefix) { /* user wants prefix routes only */
2402 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2403 /* success since this is not a prefix route */
2404 return 1;
2405 }
2406 }
2407
Thomas Graf2d7202b2006-08-22 00:01:27 -07002408 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
David S. Miller38308472011-12-03 18:02:47 -05002409 if (!nlh)
Patrick McHardy26932562007-01-31 23:16:40 -08002410 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002411
2412 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413 rtm->rtm_family = AF_INET6;
2414 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2415 rtm->rtm_src_len = rt->rt6i_src.plen;
2416 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002417 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002418 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002419 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002420 table = RT6_TABLE_UNSPEC;
2421 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002422 NLA_PUT_U32(skb, RTA_TABLE, table);
David S. Miller38308472011-12-03 18:02:47 -05002423 if (rt->rt6i_flags & RTF_REJECT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002424 rtm->rtm_type = RTN_UNREACHABLE;
David S. Miller38308472011-12-03 18:02:47 -05002425 else if (rt->rt6i_flags & RTF_LOCAL)
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002426 rtm->rtm_type = RTN_LOCAL;
David S. Millerd1918542011-12-28 20:19:20 -05002427 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002428 rtm->rtm_type = RTN_LOCAL;
2429 else
2430 rtm->rtm_type = RTN_UNICAST;
2431 rtm->rtm_flags = 0;
2432 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2433 rtm->rtm_protocol = rt->rt6i_protocol;
David S. Miller38308472011-12-03 18:02:47 -05002434 if (rt->rt6i_flags & RTF_DYNAMIC)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002435 rtm->rtm_protocol = RTPROT_REDIRECT;
2436 else if (rt->rt6i_flags & RTF_ADDRCONF)
2437 rtm->rtm_protocol = RTPROT_KERNEL;
David S. Miller38308472011-12-03 18:02:47 -05002438 else if (rt->rt6i_flags & RTF_DEFAULT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002439 rtm->rtm_protocol = RTPROT_RA;
2440
David S. Miller38308472011-12-03 18:02:47 -05002441 if (rt->rt6i_flags & RTF_CACHE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002442 rtm->rtm_flags |= RTM_F_CLONED;
2443
2444 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002445 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002446 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002447 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002448 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002449#ifdef CONFIG_IPV6_SUBTREES
2450 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002451 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002452 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002453 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002454 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002455#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002456 if (iif) {
2457#ifdef CONFIG_IPV6_MROUTE
2458 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002459 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002460 if (err <= 0) {
2461 if (!nowait) {
2462 if (err == 0)
2463 return 0;
2464 goto nla_put_failure;
2465 } else {
2466 if (err == -EMSGSIZE)
2467 goto nla_put_failure;
2468 }
2469 }
2470 } else
2471#endif
2472 NLA_PUT_U32(skb, RTA_IIF, iif);
2473 } else if (dst) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002474 struct in6_addr saddr_buf;
Daniel Walterc3968a82011-04-13 21:10:57 +00002475 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002476 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002477 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002478
Daniel Walterc3968a82011-04-13 21:10:57 +00002479 if (rt->rt6i_prefsrc.plen) {
2480 struct in6_addr saddr_buf;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002481 saddr_buf = rt->rt6i_prefsrc.addr;
Daniel Walterc3968a82011-04-13 21:10:57 +00002482 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2483 }
2484
David S. Millerdefb3512010-12-08 21:16:57 -08002485 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002486 goto nla_put_failure;
2487
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002488 rcu_read_lock();
David Miller27217452011-12-02 16:52:08 +00002489 n = dst_get_neighbour_noref(&rt->dst);
Eric Dumazet94f826b2012-03-27 09:53:52 +00002490 if (n) {
2491 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2492 rcu_read_unlock();
2493 goto nla_put_failure;
2494 }
2495 }
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002496 rcu_read_unlock();
Thomas Graf2d7202b2006-08-22 00:01:27 -07002497
Changli Gaod8d1f302010-06-10 23:31:35 -07002498 if (rt->dst.dev)
David S. Millerd1918542011-12-28 20:19:20 -05002499 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
Thomas Graf2d7202b2006-08-22 00:01:27 -07002500
2501 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002502
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002503 if (!(rt->rt6i_flags & RTF_EXPIRES))
2504 expires = 0;
David S. Millerd1918542011-12-28 20:19:20 -05002505 else if (rt->dst.expires - jiffies < INT_MAX)
2506 expires = rt->dst.expires - jiffies;
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002507 else
2508 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002509
David S. Miller346f8702011-12-29 15:22:33 -05002510 peer = rt->rt6i_peer;
2511 ts = tsage = 0;
2512 if (peer && peer->tcp_ts_stamp) {
2513 ts = peer->tcp_ts;
2514 tsage = get_seconds() - peer->tcp_ts_stamp;
2515 }
2516
2517 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
Changli Gaod8d1f302010-06-10 23:31:35 -07002518 expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002519 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002520
Thomas Graf2d7202b2006-08-22 00:01:27 -07002521 return nlmsg_end(skb, nlh);
2522
2523nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002524 nlmsg_cancel(skb, nlh);
2525 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002526}
2527
Patrick McHardy1b43af52006-08-10 23:11:17 -07002528int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002529{
2530 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2531 int prefix;
2532
Thomas Graf2d7202b2006-08-22 00:01:27 -07002533 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2534 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002535 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2536 } else
2537 prefix = 0;
2538
Brian Haley191cd582008-08-14 15:33:21 -07002539 return rt6_fill_node(arg->net,
2540 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002541 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002542 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543}
2544
Thomas Grafc127ea22007-03-22 11:58:32 -07002545static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002546{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002547 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002548 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002549 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002550 struct sk_buff *skb;
2551 struct rtmsg *rtm;
David S. Miller4c9483b2011-03-12 16:22:43 -05002552 struct flowi6 fl6;
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00002553 int err, iif = 0, oif = 0;
Thomas Grafab364a62006-08-22 00:01:47 -07002554
2555 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2556 if (err < 0)
2557 goto errout;
2558
2559 err = -EINVAL;
David S. Miller4c9483b2011-03-12 16:22:43 -05002560 memset(&fl6, 0, sizeof(fl6));
Thomas Grafab364a62006-08-22 00:01:47 -07002561
2562 if (tb[RTA_SRC]) {
2563 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2564 goto errout;
2565
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002566 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
Thomas Grafab364a62006-08-22 00:01:47 -07002567 }
2568
2569 if (tb[RTA_DST]) {
2570 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2571 goto errout;
2572
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002573 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
Thomas Grafab364a62006-08-22 00:01:47 -07002574 }
2575
2576 if (tb[RTA_IIF])
2577 iif = nla_get_u32(tb[RTA_IIF]);
2578
2579 if (tb[RTA_OIF])
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00002580 oif = nla_get_u32(tb[RTA_OIF]);
Thomas Grafab364a62006-08-22 00:01:47 -07002581
2582 if (iif) {
2583 struct net_device *dev;
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00002584 int flags = 0;
2585
Daniel Lezcano55786892008-03-04 13:47:47 -08002586 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002587 if (!dev) {
2588 err = -ENODEV;
2589 goto errout;
2590 }
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00002591
2592 fl6.flowi6_iif = iif;
2593
2594 if (!ipv6_addr_any(&fl6.saddr))
2595 flags |= RT6_LOOKUP_F_HAS_SADDR;
2596
2597 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2598 flags);
2599 } else {
2600 fl6.flowi6_oif = oif;
2601
2602 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
Thomas Grafab364a62006-08-22 00:01:47 -07002603 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002604
2605 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
David S. Miller38308472011-12-03 18:02:47 -05002606 if (!skb) {
Thomas Grafab364a62006-08-22 00:01:47 -07002607 err = -ENOBUFS;
2608 goto errout;
2609 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002610
2611 /* Reserve room for dummy headers, this skb can pass
2612 through good chunk of routing engine.
2613 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002614 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002615 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2616
Changli Gaod8d1f302010-06-10 23:31:35 -07002617 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002618
David S. Miller4c9483b2011-03-12 16:22:43 -05002619 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002620 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002621 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002622 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002623 kfree_skb(skb);
2624 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002625 }
2626
Daniel Lezcano55786892008-03-04 13:47:47 -08002627 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002628errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002629 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002630}
2631
Thomas Graf86872cb2006-08-22 00:01:08 -07002632void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002633{
2634 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002635 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002636 u32 seq;
2637 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002638
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002639 err = -ENOBUFS;
David S. Miller38308472011-12-03 18:02:47 -05002640 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002641
Thomas Graf339bf982006-11-10 14:10:15 -08002642 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
David S. Miller38308472011-12-03 18:02:47 -05002643 if (!skb)
Thomas Graf21713eb2006-08-15 00:35:24 -07002644 goto errout;
2645
Brian Haley191cd582008-08-14 15:33:21 -07002646 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002647 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002648 if (err < 0) {
2649 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2650 WARN_ON(err == -EMSGSIZE);
2651 kfree_skb(skb);
2652 goto errout;
2653 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002654 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2655 info->nlh, gfp_any());
2656 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002657errout:
2658 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002659 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002660}
2661
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002662static int ip6_route_dev_notify(struct notifier_block *this,
2663 unsigned long event, void *data)
2664{
2665 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002666 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002667
2668 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002669 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002670 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2671#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002672 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002673 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002674 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002675 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2676#endif
2677 }
2678
2679 return NOTIFY_OK;
2680}
2681
Linus Torvalds1da177e2005-04-16 15:20:36 -07002682/*
2683 * /proc
2684 */
2685
2686#ifdef CONFIG_PROC_FS
2687
Linus Torvalds1da177e2005-04-16 15:20:36 -07002688struct rt6_proc_arg
2689{
2690 char *buffer;
2691 int offset;
2692 int length;
2693 int skip;
2694 int len;
2695};
2696
2697static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2698{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002699 struct seq_file *m = p_arg;
David S. Miller69cce1d2011-07-17 23:09:49 -07002700 struct neighbour *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002701
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002702 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002703
2704#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002705 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002706#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002707 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002708#endif
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002709 rcu_read_lock();
David Miller27217452011-12-02 16:52:08 +00002710 n = dst_get_neighbour_noref(&rt->dst);
David S. Miller69cce1d2011-07-17 23:09:49 -07002711 if (n) {
2712 seq_printf(m, "%pi6", n->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002713 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002714 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002715 }
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002716 rcu_read_unlock();
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002717 seq_printf(m, " %08x %08x %08x %08x %8s\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07002718 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2719 rt->dst.__use, rt->rt6i_flags,
David S. Millerd1918542011-12-28 20:19:20 -05002720 rt->dst.dev ? rt->dst.dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002721 return 0;
2722}
2723
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002724static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002725{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002726 struct net *net = (struct net *)m->private;
Josh Hunt32b293a2011-12-28 13:23:07 +00002727 fib6_clean_all_ro(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002728 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002729}
2730
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002731static int ipv6_route_open(struct inode *inode, struct file *file)
2732{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002733 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002734}
2735
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002736static const struct file_operations ipv6_route_proc_fops = {
2737 .owner = THIS_MODULE,
2738 .open = ipv6_route_open,
2739 .read = seq_read,
2740 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002741 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002742};
2743
Linus Torvalds1da177e2005-04-16 15:20:36 -07002744static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2745{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002746 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002747 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002748 net->ipv6.rt6_stats->fib_nodes,
2749 net->ipv6.rt6_stats->fib_route_nodes,
2750 net->ipv6.rt6_stats->fib_rt_alloc,
2751 net->ipv6.rt6_stats->fib_rt_entries,
2752 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00002753 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002754 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002755
2756 return 0;
2757}
2758
2759static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2760{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002761 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002762}
2763
Arjan van de Ven9a321442007-02-12 00:55:35 -08002764static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002765 .owner = THIS_MODULE,
2766 .open = rt6_stats_seq_open,
2767 .read = seq_read,
2768 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002769 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002770};
2771#endif /* CONFIG_PROC_FS */
2772
2773#ifdef CONFIG_SYSCTL
2774
Linus Torvalds1da177e2005-04-16 15:20:36 -07002775static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002776int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002777 void __user *buffer, size_t *lenp, loff_t *ppos)
2778{
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002779 struct net *net;
2780 int delay;
2781 if (!write)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002782 return -EINVAL;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002783
2784 net = (struct net *)ctl->extra1;
2785 delay = net->ipv6.sysctl.flush_delay;
2786 proc_dointvec(ctl, write, buffer, lenp, ppos);
2787 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2788 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002789}
2790
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002791ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002792 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002793 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002794 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002795 .maxlen = sizeof(int),
Dave Jones89c8b3a2005-04-28 12:11:49 -07002796 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002797 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002798 },
2799 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002800 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002801 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002802 .maxlen = sizeof(int),
2803 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002804 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002805 },
2806 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002807 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002808 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002809 .maxlen = sizeof(int),
2810 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002811 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002812 },
2813 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002814 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002815 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002816 .maxlen = sizeof(int),
2817 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002818 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002819 },
2820 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002821 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002822 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002823 .maxlen = sizeof(int),
2824 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002825 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002826 },
2827 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002828 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002829 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002830 .maxlen = sizeof(int),
2831 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002832 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002833 },
2834 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002835 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002836 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002837 .maxlen = sizeof(int),
2838 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002839 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002840 },
2841 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002842 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002843 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002844 .maxlen = sizeof(int),
2845 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002846 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002847 },
2848 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002849 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002850 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002851 .maxlen = sizeof(int),
2852 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002853 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002854 },
2855 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002856 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002857 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002858 .maxlen = sizeof(int),
2859 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002860 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002861 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002862 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002863};
2864
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002865struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002866{
2867 struct ctl_table *table;
2868
2869 table = kmemdup(ipv6_route_table_template,
2870 sizeof(ipv6_route_table_template),
2871 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002872
2873 if (table) {
2874 table[0].data = &net->ipv6.sysctl.flush_delay;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002875 table[0].extra1 = net;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002876 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002877 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2878 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2879 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2880 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2881 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2882 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2883 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002884 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002885 }
2886
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002887 return table;
2888}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002889#endif
2890
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002891static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002892{
Pavel Emelyanov633d4242008-04-21 14:25:23 -07002893 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002894
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002895 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2896 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002897
Eric Dumazetfc66f952010-10-08 06:37:34 +00002898 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2899 goto out_ip6_dst_ops;
2900
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002901 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2902 sizeof(*net->ipv6.ip6_null_entry),
2903 GFP_KERNEL);
2904 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00002905 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07002906 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002907 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002908 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002909 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2910 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002911
2912#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2913 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2914 sizeof(*net->ipv6.ip6_prohibit_entry),
2915 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002916 if (!net->ipv6.ip6_prohibit_entry)
2917 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002918 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002919 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002920 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002921 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2922 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002923
2924 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2925 sizeof(*net->ipv6.ip6_blk_hole_entry),
2926 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002927 if (!net->ipv6.ip6_blk_hole_entry)
2928 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002929 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002930 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002931 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002932 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2933 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002934#endif
2935
Peter Zijlstrab339a472008-10-07 14:15:00 -07002936 net->ipv6.sysctl.flush_delay = 0;
2937 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2938 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2939 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2940 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2941 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2942 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2943 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2944
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002945#ifdef CONFIG_PROC_FS
2946 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2947 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2948#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002949 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2950
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002951 ret = 0;
2952out:
2953 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002954
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002955#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2956out_ip6_prohibit_entry:
2957 kfree(net->ipv6.ip6_prohibit_entry);
2958out_ip6_null_entry:
2959 kfree(net->ipv6.ip6_null_entry);
2960#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00002961out_ip6_dst_entries:
2962 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002963out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002964 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002965}
2966
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002967static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002968{
2969#ifdef CONFIG_PROC_FS
2970 proc_net_remove(net, "ipv6_route");
2971 proc_net_remove(net, "rt6_stats");
2972#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002973 kfree(net->ipv6.ip6_null_entry);
2974#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2975 kfree(net->ipv6.ip6_prohibit_entry);
2976 kfree(net->ipv6.ip6_blk_hole_entry);
2977#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002978 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002979}
2980
2981static struct pernet_operations ip6_route_net_ops = {
2982 .init = ip6_route_net_init,
2983 .exit = ip6_route_net_exit,
2984};
2985
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002986static struct notifier_block ip6_route_dev_notifier = {
2987 .notifier_call = ip6_route_dev_notify,
2988 .priority = 0,
2989};
2990
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002991int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002992{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002993 int ret;
2994
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002995 ret = -ENOMEM;
2996 ip6_dst_ops_template.kmem_cachep =
2997 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2998 SLAB_HWCACHE_ALIGN, NULL);
2999 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08003000 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07003001
Eric Dumazetfc66f952010-10-08 06:37:34 +00003002 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003003 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08003004 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08003005
Eric Dumazetfc66f952010-10-08 06:37:34 +00003006 ret = register_pernet_subsys(&ip6_route_net_ops);
3007 if (ret)
3008 goto out_dst_entries;
3009
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07003010 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3011
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003012 /* Registering of the loopback is done before this portion of code,
3013 * the loopback reference in rt6_info will not be taken, do it
3014 * manually for init_net */
Changli Gaod8d1f302010-06-10 23:31:35 -07003015 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003016 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3017 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07003018 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003019 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07003020 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003021 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3022 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003023 ret = fib6_init();
3024 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003025 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003026
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003027 ret = xfrm6_init();
3028 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08003029 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08003030
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003031 ret = fib6_rules_init();
3032 if (ret)
3033 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08003034
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003035 ret = -ENOBUFS;
Greg Rosec7ac8672011-06-10 01:27:09 +00003036 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3037 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3038 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003039 goto fib6_rules_init;
3040
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003041 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08003042 if (ret)
3043 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003044
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003045out:
3046 return ret;
3047
3048fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003049 fib6_rules_cleanup();
3050xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003051 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003052out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003053 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003054out_register_subsys:
3055 unregister_pernet_subsys(&ip6_route_net_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00003056out_dst_entries:
3057 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003058out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08003059 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003060 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003061}
3062
3063void ip6_route_cleanup(void)
3064{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003065 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07003066 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003067 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003068 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003069 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00003070 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08003071 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003072}