blob: c035251beb070bb7f4220324a90e0ffeee8e103c [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090021 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090039 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070055 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080056 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
Joe Perchesafd465032012-03-12 07:03:32 +000065#define pr_fmt(fmt) "IPv4: " fmt
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <linux/module.h>
68#include <asm/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <linux/mm.h>
Eric Dumazet424c4b72005-07-05 14:58:19 -070073#include <linux/bootmem.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070074#include <linux/string.h>
75#include <linux/socket.h>
76#include <linux/sockios.h>
77#include <linux/errno.h>
78#include <linux/in.h>
79#include <linux/inet.h>
80#include <linux/netdevice.h>
81#include <linux/proc_fs.h>
82#include <linux/init.h>
Eric Dumazet39c90ec2007-09-15 10:55:54 -070083#include <linux/workqueue.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070084#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070085#include <linux/inetdevice.h>
86#include <linux/igmp.h>
87#include <linux/pkt_sched.h>
88#include <linux/mroute.h>
89#include <linux/netfilter_ipv4.h>
90#include <linux/random.h>
91#include <linux/jhash.h>
92#include <linux/rcupdate.h>
93#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090094#include <linux/slab.h>
Stephen Rothwellb9eda062011-12-22 17:03:29 +110095#include <linux/prefetch.h>
Herbert Xu352e5122007-11-13 21:34:06 -080096#include <net/dst.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020097#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070098#include <net/protocol.h>
99#include <net/ip.h>
100#include <net/route.h>
101#include <net/inetpeer.h>
102#include <net/sock.h>
103#include <net/ip_fib.h>
104#include <net/arp.h>
105#include <net/tcp.h>
106#include <net/icmp.h>
107#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700108#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700109#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110#ifdef CONFIG_SYSCTL
111#include <linux/sysctl.h>
Shan Wei7426a562012-04-18 18:05:46 +0000112#include <linux/kmemleak.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700114#include <net/secure_seq.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115
David S. Miller68a5e3d2011-03-11 20:07:33 -0500116#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000117 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118
119#define IP_MAX_MTU 0xFFF0
120
121#define RT_GC_TIMEOUT (300*HZ)
122
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700124static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500125static int ip_rt_gc_interval __read_mostly = 60 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700126static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
127static int ip_rt_redirect_number __read_mostly = 9;
128static int ip_rt_redirect_load __read_mostly = HZ / 50;
129static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
130static int ip_rt_error_cost __read_mostly = HZ;
131static int ip_rt_error_burst __read_mostly = 5 * HZ;
132static int ip_rt_gc_elasticity __read_mostly = 8;
133static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
134static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
135static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500136
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137/*
138 * Interface to generic destination cache.
139 */
140
141static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800142static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000143static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
145static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700146static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
147 struct sk_buff *skb, u32 mtu);
148static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
149 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700150static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151
Eric Dumazet72cdd1d2010-11-11 07:14:07 +0000152static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
153 int how)
154{
155}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156
David S. Miller62fa8a82011-01-26 20:51:05 -0800157static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
158{
David S. Miller31248732012-07-10 07:08:18 -0700159 WARN_ON(1);
160 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800161}
162
David S. Millerf894cbf2012-07-02 21:52:24 -0700163static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
164 struct sk_buff *skb,
165 const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700166
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167static struct dst_ops ipv4_dst_ops = {
168 .family = AF_INET,
Harvey Harrison09640e62009-02-01 00:45:17 -0800169 .protocol = cpu_to_be16(ETH_P_IP),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800171 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000172 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800173 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700174 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 .ifdown = ipv4_dst_ifdown,
176 .negative_advice = ipv4_negative_advice,
177 .link_failure = ipv4_link_failure,
178 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700179 .redirect = ip_do_redirect,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700180 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700181 .neigh_lookup = ipv4_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182};
183
184#define ECN_OR_COST(class) TC_PRIO_##class
185
Philippe De Muyter4839c522007-07-09 15:32:57 -0700186const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000188 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 TC_PRIO_BESTEFFORT,
190 ECN_OR_COST(BESTEFFORT),
191 TC_PRIO_BULK,
192 ECN_OR_COST(BULK),
193 TC_PRIO_BULK,
194 ECN_OR_COST(BULK),
195 TC_PRIO_INTERACTIVE,
196 ECN_OR_COST(INTERACTIVE),
197 TC_PRIO_INTERACTIVE,
198 ECN_OR_COST(INTERACTIVE),
199 TC_PRIO_INTERACTIVE_BULK,
200 ECN_OR_COST(INTERACTIVE_BULK),
201 TC_PRIO_INTERACTIVE_BULK,
202 ECN_OR_COST(INTERACTIVE_BULK)
203};
Amir Vadaid4a96862012-04-04 21:33:28 +0000204EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205
Eric Dumazet2f970d82006-01-17 02:54:36 -0800206static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Eric Dumazet27f39c72010-05-19 22:07:23 +0000207#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700209static inline int rt_genid(struct net *net)
210{
211 return atomic_read(&net->ipv4.rt_genid);
212}
213
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
216{
Eric Dumazet29e75252008-01-31 17:05:09 -0800217 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700218 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800219 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220}
221
222static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
223{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700225 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226}
227
228static void rt_cache_seq_stop(struct seq_file *seq, void *v)
229{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230}
231
232static int rt_cache_seq_show(struct seq_file *seq, void *v)
233{
234 if (v == SEQ_START_TOKEN)
235 seq_printf(seq, "%-127s\n",
236 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
237 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
238 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900239 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240}
241
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700242static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 .start = rt_cache_seq_start,
244 .next = rt_cache_seq_next,
245 .stop = rt_cache_seq_stop,
246 .show = rt_cache_seq_show,
247};
248
249static int rt_cache_seq_open(struct inode *inode, struct file *file)
250{
David S. Miller89aef892012-07-17 11:00:09 -0700251 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252}
253
Arjan van de Ven9a321442007-02-12 00:55:35 -0800254static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 .owner = THIS_MODULE,
256 .open = rt_cache_seq_open,
257 .read = seq_read,
258 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700259 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260};
261
262
263static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
264{
265 int cpu;
266
267 if (*pos == 0)
268 return SEQ_START_TOKEN;
269
Rusty Russell0f23174a2008-12-29 12:23:42 +0000270 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 if (!cpu_possible(cpu))
272 continue;
273 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800274 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 }
276 return NULL;
277}
278
279static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
280{
281 int cpu;
282
Rusty Russell0f23174a2008-12-29 12:23:42 +0000283 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 if (!cpu_possible(cpu))
285 continue;
286 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800287 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288 }
289 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900290
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291}
292
293static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
294{
295
296}
297
298static int rt_cpu_seq_show(struct seq_file *seq, void *v)
299{
300 struct rt_cache_stat *st = v;
301
302 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700303 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304 return 0;
305 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900306
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
308 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000309 dst_entries_get_slow(&ipv4_dst_ops),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310 st->in_hit,
311 st->in_slow_tot,
312 st->in_slow_mc,
313 st->in_no_route,
314 st->in_brd,
315 st->in_martian_dst,
316 st->in_martian_src,
317
318 st->out_hit,
319 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900320 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321
322 st->gc_total,
323 st->gc_ignored,
324 st->gc_goal_miss,
325 st->gc_dst_overflow,
326 st->in_hlist_search,
327 st->out_hlist_search
328 );
329 return 0;
330}
331
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700332static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 .start = rt_cpu_seq_start,
334 .next = rt_cpu_seq_next,
335 .stop = rt_cpu_seq_stop,
336 .show = rt_cpu_seq_show,
337};
338
339
340static int rt_cpu_seq_open(struct inode *inode, struct file *file)
341{
342 return seq_open(file, &rt_cpu_seq_ops);
343}
344
Arjan van de Ven9a321442007-02-12 00:55:35 -0800345static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346 .owner = THIS_MODULE,
347 .open = rt_cpu_seq_open,
348 .read = seq_read,
349 .llseek = seq_lseek,
350 .release = seq_release,
351};
352
Patrick McHardyc7066f72011-01-14 13:36:42 +0100353#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800354static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800355{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800356 struct ip_rt_acct *dst, *src;
357 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800358
Alexey Dobriyana661c412009-11-25 15:40:35 -0800359 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
360 if (!dst)
361 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800362
Alexey Dobriyana661c412009-11-25 15:40:35 -0800363 for_each_possible_cpu(i) {
364 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
365 for (j = 0; j < 256; j++) {
366 dst[j].o_bytes += src[j].o_bytes;
367 dst[j].o_packets += src[j].o_packets;
368 dst[j].i_bytes += src[j].i_bytes;
369 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800370 }
371 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800372
373 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
374 kfree(dst);
375 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800376}
Alexey Dobriyana661c412009-11-25 15:40:35 -0800377
378static int rt_acct_proc_open(struct inode *inode, struct file *file)
379{
380 return single_open(file, rt_acct_proc_show, NULL);
381}
382
383static const struct file_operations rt_acct_proc_fops = {
384 .owner = THIS_MODULE,
385 .open = rt_acct_proc_open,
386 .read = seq_read,
387 .llseek = seq_lseek,
388 .release = single_release,
389};
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800390#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800391
Denis V. Lunev73b38712008-02-28 20:51:18 -0800392static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800393{
394 struct proc_dir_entry *pde;
395
396 pde = proc_net_fops_create(net, "rt_cache", S_IRUGO,
397 &rt_cache_seq_fops);
398 if (!pde)
399 goto err1;
400
Wang Chen77020722008-02-28 14:14:25 -0800401 pde = proc_create("rt_cache", S_IRUGO,
402 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800403 if (!pde)
404 goto err2;
405
Patrick McHardyc7066f72011-01-14 13:36:42 +0100406#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800407 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800408 if (!pde)
409 goto err3;
410#endif
411 return 0;
412
Patrick McHardyc7066f72011-01-14 13:36:42 +0100413#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800414err3:
415 remove_proc_entry("rt_cache", net->proc_net_stat);
416#endif
417err2:
418 remove_proc_entry("rt_cache", net->proc_net);
419err1:
420 return -ENOMEM;
421}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800422
423static void __net_exit ip_rt_do_proc_exit(struct net *net)
424{
425 remove_proc_entry("rt_cache", net->proc_net_stat);
426 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100427#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800428 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000429#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800430}
431
432static struct pernet_operations ip_rt_proc_ops __net_initdata = {
433 .init = ip_rt_do_proc_init,
434 .exit = ip_rt_do_proc_exit,
435};
436
437static int __init ip_rt_proc_init(void)
438{
439 return register_pernet_subsys(&ip_rt_proc_ops);
440}
441
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800442#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800443static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800444{
445 return 0;
446}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900448
Eric Dumazet4331deb2012-07-25 05:11:23 +0000449static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700450{
Changli Gaod8d1f302010-06-10 23:31:35 -0700451 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700452}
453
Eric Dumazetbeb659b2007-11-19 22:43:37 -0800454/*
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300455 * Perturbation of rt_genid by a small quantity [1..256]
Eric Dumazet29e75252008-01-31 17:05:09 -0800456 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
457 * many times (2^24) without giving recent rt_genid.
458 * Jenkins hash is strong enough that litle changes of rt_genid are OK.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459 */
Denis V. Lunev86c657f2008-07-05 19:03:31 -0700460static void rt_cache_invalidate(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461{
Eric Dumazet29e75252008-01-31 17:05:09 -0800462 unsigned char shuffle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463
Eric Dumazet29e75252008-01-31 17:05:09 -0800464 get_random_bytes(&shuffle, sizeof(shuffle));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700465 atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466}
467
Eric Dumazetbeb659b2007-11-19 22:43:37 -0800468/*
Eric Dumazet29e75252008-01-31 17:05:09 -0800469 * delay < 0 : invalidate cache (fast : entries will be deleted later)
470 * delay >= 0 : invalidate & flush cache (can be long)
471 */
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700472void rt_cache_flush(struct net *net, int delay)
Eric Dumazet29e75252008-01-31 17:05:09 -0800473{
Denis V. Lunev86c657f2008-07-05 19:03:31 -0700474 rt_cache_invalidate(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000475}
476
David S. Millerf894cbf2012-07-02 21:52:24 -0700477static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
478 struct sk_buff *skb,
479 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000480{
David S. Millerd3aaeb32011-07-18 00:40:17 -0700481 struct net_device *dev = dst->dev;
482 const __be32 *pkey = daddr;
David S. Miller39232972012-01-26 15:22:32 -0500483 const struct rtable *rt;
David Miller3769cff2011-07-11 22:44:24 +0000484 struct neighbour *n;
485
David S. Miller39232972012-01-26 15:22:32 -0500486 rt = (const struct rtable *) dst;
David S. Millera263b302012-07-02 02:02:15 -0700487 if (rt->rt_gateway)
David S. Miller39232972012-01-26 15:22:32 -0500488 pkey = (const __be32 *) &rt->rt_gateway;
David S. Millerf894cbf2012-07-02 21:52:24 -0700489 else if (skb)
490 pkey = &ip_hdr(skb)->daddr;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700491
David S. Miller80703d22012-02-15 17:48:35 -0500492 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700493 if (n)
494 return n;
David Miller32092ec2011-07-25 00:01:41 +0000495 return neigh_create(&arp_tbl, pkey, dev);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700496}
497
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498/*
499 * Peer allocation may fail only in serious out-of-memory conditions. However
500 * we still can generate some output.
501 * Random ID selection looks a bit dangerous because we have no chances to
502 * select ID being unique in a reasonable period of time.
503 * But broken packet identifier may be better than no packet at all.
504 */
505static void ip_select_fb_ident(struct iphdr *iph)
506{
507 static DEFINE_SPINLOCK(ip_fb_id_lock);
508 static u32 ip_fallback_id;
509 u32 salt;
510
511 spin_lock_bh(&ip_fb_id_lock);
Al Viroe4485152006-09-26 22:15:01 -0700512 salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513 iph->id = htons(salt & 0xFFFF);
514 ip_fallback_id = salt;
515 spin_unlock_bh(&ip_fb_id_lock);
516}
517
518void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
519{
David S. Miller1d861aa2012-07-10 03:58:16 -0700520 struct net *net = dev_net(dst->dev);
521 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522
David S. Miller1d861aa2012-07-10 03:58:16 -0700523 peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
524 if (peer) {
525 iph->id = htons(inet_getid(peer, more));
526 inet_putpeer(peer);
527 return;
528 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529
530 ip_select_fb_ident(iph);
531}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000532EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200534static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700535 const struct iphdr *iph,
536 int oif, u8 tos,
537 u8 prot, u32 mark, int flow_flags)
538{
539 if (sk) {
540 const struct inet_sock *inet = inet_sk(sk);
541
542 oif = sk->sk_bound_dev_if;
543 mark = sk->sk_mark;
544 tos = RT_CONN_FLAGS(sk);
545 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
546 }
547 flowi4_init_output(fl4, oif, mark, tos,
548 RT_SCOPE_UNIVERSE, prot,
549 flow_flags,
550 iph->daddr, iph->saddr, 0, 0);
551}
552
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200553static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
554 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700555{
556 const struct iphdr *iph = ip_hdr(skb);
557 int oif = skb->dev->ifindex;
558 u8 tos = RT_TOS(iph->tos);
559 u8 prot = iph->protocol;
560 u32 mark = skb->mark;
561
562 __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
563}
564
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200565static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700566{
567 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200568 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700569 __be32 daddr = inet->inet_daddr;
570
571 rcu_read_lock();
572 inet_opt = rcu_dereference(inet->inet_opt);
573 if (inet_opt && inet_opt->opt.srr)
574 daddr = inet_opt->opt.faddr;
575 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
576 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
577 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
578 inet_sk_flowi_flags(sk),
579 daddr, inet->inet_saddr, 0, 0);
580 rcu_read_unlock();
581}
582
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200583static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
584 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700585{
586 if (skb)
587 build_skb_flow_key(fl4, skb, sk);
588 else
589 build_sk_flow_key(fl4, sk);
590}
591
David S. Millerc5038a82012-07-31 15:02:02 -0700592static inline void rt_free(struct rtable *rt)
593{
594 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
595}
596
597static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700598
Julian Anastasovaee06da2012-07-18 10:15:35 +0000599static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700600{
601 struct fib_nh_exception *fnhe, *oldest;
David S. Millerc5038a82012-07-31 15:02:02 -0700602 struct rtable *orig;
David S. Miller4895c772012-07-17 04:19:00 -0700603
604 oldest = rcu_dereference(hash->chain);
605 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
606 fnhe = rcu_dereference(fnhe->fnhe_next)) {
607 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
608 oldest = fnhe;
609 }
David S. Millerc5038a82012-07-31 15:02:02 -0700610 orig = rcu_dereference(oldest->fnhe_rth);
611 if (orig) {
612 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
613 rt_free(orig);
614 }
David S. Miller4895c772012-07-17 04:19:00 -0700615 return oldest;
616}
617
David S. Millerd3a25c92012-07-17 13:23:08 -0700618static inline u32 fnhe_hashfun(__be32 daddr)
619{
620 u32 hval;
621
622 hval = (__force u32) daddr;
623 hval ^= (hval >> 11) ^ (hval >> 22);
624
625 return hval & (FNHE_HASH_SIZE - 1);
626}
627
Julian Anastasovaee06da2012-07-18 10:15:35 +0000628static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
629 u32 pmtu, unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700630{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000631 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700632 struct fib_nh_exception *fnhe;
633 int depth;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000634 u32 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700635
David S. Millerc5038a82012-07-31 15:02:02 -0700636 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000637
638 hash = nh->nh_exceptions;
David S. Miller4895c772012-07-17 04:19:00 -0700639 if (!hash) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000640 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700641 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000642 goto out_unlock;
643 nh->nh_exceptions = hash;
David S. Miller4895c772012-07-17 04:19:00 -0700644 }
645
David S. Miller4895c772012-07-17 04:19:00 -0700646 hash += hval;
647
648 depth = 0;
649 for (fnhe = rcu_dereference(hash->chain); fnhe;
650 fnhe = rcu_dereference(fnhe->fnhe_next)) {
651 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000652 break;
David S. Miller4895c772012-07-17 04:19:00 -0700653 depth++;
654 }
655
Julian Anastasovaee06da2012-07-18 10:15:35 +0000656 if (fnhe) {
657 if (gw)
658 fnhe->fnhe_gw = gw;
659 if (pmtu) {
660 fnhe->fnhe_pmtu = pmtu;
661 fnhe->fnhe_expires = expires;
662 }
663 } else {
664 if (depth > FNHE_RECLAIM_DEPTH)
665 fnhe = fnhe_oldest(hash);
666 else {
667 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
668 if (!fnhe)
669 goto out_unlock;
670
671 fnhe->fnhe_next = hash->chain;
672 rcu_assign_pointer(hash->chain, fnhe);
673 }
674 fnhe->fnhe_daddr = daddr;
675 fnhe->fnhe_gw = gw;
676 fnhe->fnhe_pmtu = pmtu;
677 fnhe->fnhe_expires = expires;
David S. Miller4895c772012-07-17 04:19:00 -0700678 }
David S. Miller4895c772012-07-17 04:19:00 -0700679
David S. Miller4895c772012-07-17 04:19:00 -0700680 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000681
682out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700683 spin_unlock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000684 return;
David S. Miller4895c772012-07-17 04:19:00 -0700685}
686
David S. Millerceb33202012-07-17 11:31:28 -0700687static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
688 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689{
David S. Millere47a1852012-07-11 20:55:47 -0700690 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700691 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700692 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700693 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700694 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700695 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800696 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697
David S. Miller94206122012-07-11 20:38:08 -0700698 switch (icmp_hdr(skb)->code & 7) {
699 case ICMP_REDIR_NET:
700 case ICMP_REDIR_NETTOS:
701 case ICMP_REDIR_HOST:
702 case ICMP_REDIR_HOSTTOS:
703 break;
704
705 default:
706 return;
707 }
708
David S. Millere47a1852012-07-11 20:55:47 -0700709 if (rt->rt_gateway != old_gw)
710 return;
711
712 in_dev = __in_dev_get_rcu(dev);
713 if (!in_dev)
714 return;
715
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900716 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800717 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
718 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
719 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 goto reject_redirect;
721
722 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
723 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
724 goto reject_redirect;
725 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
726 goto reject_redirect;
727 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800728 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729 goto reject_redirect;
730 }
731
David S. Miller4895c772012-07-17 04:19:00 -0700732 n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
David S. Millere47a1852012-07-11 20:55:47 -0700733 if (n) {
734 if (!(n->nud_state & NUD_VALID)) {
735 neigh_event_send(n, NULL);
736 } else {
David S. Miller4895c772012-07-17 04:19:00 -0700737 if (fib_lookup(net, fl4, &res) == 0) {
738 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700739
Julian Anastasovaee06da2012-07-18 10:15:35 +0000740 update_or_create_fnhe(nh, fl4->daddr, new_gw,
741 0, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700742 }
David S. Millerceb33202012-07-17 11:31:28 -0700743 if (kill_route)
744 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700745 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
746 }
747 neigh_release(n);
748 }
749 return;
750
751reject_redirect:
752#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700753 if (IN_DEV_LOG_MARTIANS(in_dev)) {
754 const struct iphdr *iph = (const struct iphdr *) skb->data;
755 __be32 daddr = iph->daddr;
756 __be32 saddr = iph->saddr;
757
David S. Millere47a1852012-07-11 20:55:47 -0700758 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
759 " Advised path = %pI4 -> %pI4\n",
760 &old_gw, dev->name, &new_gw,
761 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700762 }
David S. Millere47a1852012-07-11 20:55:47 -0700763#endif
764 ;
765}
766
David S. Miller4895c772012-07-17 04:19:00 -0700767static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
768{
769 struct rtable *rt;
770 struct flowi4 fl4;
771
772 rt = (struct rtable *) dst;
773
774 ip_rt_build_flow_key(&fl4, sk, skb);
David S. Millerceb33202012-07-17 11:31:28 -0700775 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700776}
777
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
779{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800780 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 struct dst_entry *ret = dst;
782
783 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000784 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785 ip_rt_put(rt);
786 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700787 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
788 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700789 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790 ret = NULL;
791 }
792 }
793 return ret;
794}
795
796/*
797 * Algorithm:
798 * 1. The first ip_rt_redirect_number redirects are sent
799 * with exponential backoff, then we stop sending them at all,
800 * assuming that the host ignores our redirects.
801 * 2. If we did not see packets requiring redirects
802 * during ip_rt_redirect_silence, we assume that the host
803 * forgot redirected route and start to send redirects again.
804 *
805 * This algorithm is much cheaper and more intelligent than dumb load limiting
806 * in icmp.c.
807 *
808 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
809 * and "frag. need" (breaks PMTU discovery) in icmp.c.
810 */
811
812void ip_rt_send_redirect(struct sk_buff *skb)
813{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000814 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700815 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800816 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700817 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700818 int log_martians;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819
Eric Dumazet30038fc2009-08-28 23:52:01 -0700820 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700821 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700822 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
823 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700825 }
826 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
827 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828
David S. Miller1d861aa2012-07-10 03:58:16 -0700829 net = dev_net(rt->dst.dev);
830 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800831 if (!peer) {
832 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
833 return;
834 }
835
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836 /* No redirected packets during ip_rt_redirect_silence;
837 * reset the algorithm.
838 */
David S. Miller92d86822011-02-04 15:55:25 -0800839 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
840 peer->rate_tokens = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841
842 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700843 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 */
David S. Miller92d86822011-02-04 15:55:25 -0800845 if (peer->rate_tokens >= ip_rt_redirect_number) {
846 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700847 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 }
849
850 /* Check for load limit; set rate_last to the latest sent
851 * redirect.
852 */
David S. Miller92d86822011-02-04 15:55:25 -0800853 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800854 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800855 (peer->rate_last +
856 (ip_rt_redirect_load << peer->rate_tokens)))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
David S. Miller92d86822011-02-04 15:55:25 -0800858 peer->rate_last = jiffies;
859 ++peer->rate_tokens;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700861 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000862 peer->rate_tokens == ip_rt_redirect_number)
863 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700864 &ip_hdr(skb)->saddr, inet_iif(skb),
David S. Millerf1ce3062012-07-12 10:10:17 -0700865 &ip_hdr(skb)->daddr, &rt->rt_gateway);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866#endif
867 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700868out_put_peer:
869 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870}
871
872static int ip_error(struct sk_buff *skb)
873{
David S. Miller251da412012-06-26 16:27:09 -0700874 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +0000875 struct rtable *rt = skb_rtable(skb);
David S. Miller92d86822011-02-04 15:55:25 -0800876 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700878 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800879 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880 int code;
881
David S. Miller251da412012-06-26 16:27:09 -0700882 net = dev_net(rt->dst.dev);
883 if (!IN_DEV_FORWARD(in_dev)) {
884 switch (rt->dst.error) {
885 case EHOSTUNREACH:
886 IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS);
887 break;
888
889 case ENETUNREACH:
890 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
891 break;
892 }
893 goto out;
894 }
895
Changli Gaod8d1f302010-06-10 23:31:35 -0700896 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000897 case EINVAL:
898 default:
899 goto out;
900 case EHOSTUNREACH:
901 code = ICMP_HOST_UNREACH;
902 break;
903 case ENETUNREACH:
904 code = ICMP_NET_UNREACH;
David S. Miller251da412012-06-26 16:27:09 -0700905 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000906 break;
907 case EACCES:
908 code = ICMP_PKT_FILTERED;
909 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 }
911
David S. Miller1d861aa2012-07-10 03:58:16 -0700912 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800913
914 send = true;
915 if (peer) {
916 now = jiffies;
917 peer->rate_tokens += now - peer->rate_last;
918 if (peer->rate_tokens > ip_rt_error_burst)
919 peer->rate_tokens = ip_rt_error_burst;
920 peer->rate_last = now;
921 if (peer->rate_tokens >= ip_rt_error_cost)
922 peer->rate_tokens -= ip_rt_error_cost;
923 else
924 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -0700925 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926 }
David S. Miller92d86822011-02-04 15:55:25 -0800927 if (send)
928 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929
930out: kfree_skb(skb);
931 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900932}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933
David S. Millerceb33202012-07-17 11:31:28 -0700934static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935{
David S. Miller4895c772012-07-17 04:19:00 -0700936 struct fib_result res;
David S. Miller2c8cec52011-02-09 20:42:07 -0800937
David S. Miller59436342012-07-10 06:58:42 -0700938 if (mtu < ip_rt_min_pmtu)
939 mtu = ip_rt_min_pmtu;
Eric Dumazetfe6fe792011-06-08 06:07:07 +0000940
David S. Miller4895c772012-07-17 04:19:00 -0700941 if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) {
942 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700943
Julian Anastasovaee06da2012-07-18 10:15:35 +0000944 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
945 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -0700946 }
David S. Millerceb33202012-07-17 11:31:28 -0700947 return mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948}
949
David S. Miller4895c772012-07-17 04:19:00 -0700950static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
951 struct sk_buff *skb, u32 mtu)
952{
953 struct rtable *rt = (struct rtable *) dst;
954 struct flowi4 fl4;
955
956 ip_rt_build_flow_key(&fl4, sk, skb);
David S. Millerceb33202012-07-17 11:31:28 -0700957 mtu = __ip_rt_update_pmtu(rt, &fl4, mtu);
958
959 if (!rt->rt_pmtu) {
960 dst->obsolete = DST_OBSOLETE_KILL;
961 } else {
962 rt->rt_pmtu = mtu;
963 dst_set_expires(&rt->dst, ip_rt_mtu_expires);
964 }
David S. Miller4895c772012-07-17 04:19:00 -0700965}
966
David S. Miller36393392012-06-14 22:21:46 -0700967void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
968 int oif, u32 mark, u8 protocol, int flow_flags)
969{
David S. Miller4895c772012-07-17 04:19:00 -0700970 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -0700971 struct flowi4 fl4;
972 struct rtable *rt;
973
David S. Miller4895c772012-07-17 04:19:00 -0700974 __build_flow_key(&fl4, NULL, iph, oif,
975 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Miller36393392012-06-14 22:21:46 -0700976 rt = __ip_route_output_key(net, &fl4);
977 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -0700978 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -0700979 ip_rt_put(rt);
980 }
981}
982EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
983
984void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
985{
David S. Miller4895c772012-07-17 04:19:00 -0700986 const struct iphdr *iph = (const struct iphdr *) skb->data;
987 struct flowi4 fl4;
988 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -0700989
David S. Miller4895c772012-07-17 04:19:00 -0700990 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
991 rt = __ip_route_output_key(sock_net(sk), &fl4);
992 if (!IS_ERR(rt)) {
993 __ip_rt_update_pmtu(rt, &fl4, mtu);
994 ip_rt_put(rt);
995 }
David S. Miller36393392012-06-14 22:21:46 -0700996}
997EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -0800998
David S. Millerb42597e2012-07-11 21:25:45 -0700999void ipv4_redirect(struct sk_buff *skb, struct net *net,
1000 int oif, u32 mark, u8 protocol, int flow_flags)
1001{
David S. Miller4895c772012-07-17 04:19:00 -07001002 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -07001003 struct flowi4 fl4;
1004 struct rtable *rt;
1005
David S. Miller4895c772012-07-17 04:19:00 -07001006 __build_flow_key(&fl4, NULL, iph, oif,
1007 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Millerb42597e2012-07-11 21:25:45 -07001008 rt = __ip_route_output_key(net, &fl4);
1009 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001010 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -07001011 ip_rt_put(rt);
1012 }
1013}
1014EXPORT_SYMBOL_GPL(ipv4_redirect);
1015
1016void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1017{
David S. Miller4895c772012-07-17 04:19:00 -07001018 const struct iphdr *iph = (const struct iphdr *) skb->data;
1019 struct flowi4 fl4;
1020 struct rtable *rt;
David S. Millerb42597e2012-07-11 21:25:45 -07001021
David S. Miller4895c772012-07-17 04:19:00 -07001022 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1023 rt = __ip_route_output_key(sock_net(sk), &fl4);
1024 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001025 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001026 ip_rt_put(rt);
1027 }
David S. Millerb42597e2012-07-11 21:25:45 -07001028}
1029EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1030
David S. Millerefbc3682011-12-01 13:38:59 -05001031static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1032{
1033 struct rtable *rt = (struct rtable *) dst;
1034
David S. Millerceb33202012-07-17 11:31:28 -07001035 /* All IPV4 dsts are created with ->obsolete set to the value
1036 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1037 * into this function always.
1038 *
1039 * When a PMTU/redirect information update invalidates a
1040 * route, this is indicated by setting obsolete to
1041 * DST_OBSOLETE_KILL.
1042 */
1043 if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt))
David S. Millerefbc3682011-12-01 13:38:59 -05001044 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001045 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046}
1047
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048static void ipv4_link_failure(struct sk_buff *skb)
1049{
1050 struct rtable *rt;
1051
1052 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1053
Eric Dumazet511c3f92009-06-02 05:14:27 +00001054 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001055 if (rt)
1056 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057}
1058
1059static int ip_rt_bug(struct sk_buff *skb)
1060{
Joe Perches91df42b2012-05-15 14:11:54 +00001061 pr_debug("%s: %pI4 -> %pI4, %s\n",
1062 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1063 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001065 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001066 return 0;
1067}
1068
1069/*
1070 We do not cache source address of outgoing interface,
1071 because it is used only by IP RR, TS and SRR options,
1072 so that it out of fast path.
1073
1074 BTW remember: "addr" is allowed to be not aligned
1075 in IP options!
1076 */
1077
David S. Miller8e363602011-05-13 17:29:41 -04001078void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079{
Al Viroa61ced52006-09-26 21:27:54 -07001080 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081
David S. Millerc7537962010-11-11 17:07:48 -08001082 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001083 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001084 else {
David S. Miller8e363602011-05-13 17:29:41 -04001085 struct fib_result res;
1086 struct flowi4 fl4;
1087 struct iphdr *iph;
1088
1089 iph = ip_hdr(skb);
1090
1091 memset(&fl4, 0, sizeof(fl4));
1092 fl4.daddr = iph->daddr;
1093 fl4.saddr = iph->saddr;
Julian Anastasovb0fe4a32011-07-23 02:00:41 +00001094 fl4.flowi4_tos = RT_TOS(iph->tos);
David S. Miller8e363602011-05-13 17:29:41 -04001095 fl4.flowi4_oif = rt->dst.dev->ifindex;
1096 fl4.flowi4_iif = skb->dev->ifindex;
1097 fl4.flowi4_mark = skb->mark;
David S. Miller5e2b61f2011-03-04 21:47:09 -08001098
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001099 rcu_read_lock();
David S. Miller68a5e3d2011-03-11 20:07:33 -05001100 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
David S. Miller436c3b62011-03-24 17:42:21 -07001101 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001102 else
David S. Millerf8126f12012-07-13 05:03:45 -07001103 src = inet_select_addr(rt->dst.dev,
1104 rt_nexthop(rt, iph->daddr),
1105 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001106 rcu_read_unlock();
1107 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001108 memcpy(addr, &src, 4);
1109}
1110
Patrick McHardyc7066f72011-01-14 13:36:42 +01001111#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112static void set_class_tag(struct rtable *rt, u32 tag)
1113{
Changli Gaod8d1f302010-06-10 23:31:35 -07001114 if (!(rt->dst.tclassid & 0xFFFF))
1115 rt->dst.tclassid |= tag & 0xFFFF;
1116 if (!(rt->dst.tclassid & 0xFFFF0000))
1117 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118}
1119#endif
1120
David S. Miller0dbaee32010-12-13 12:52:14 -08001121static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1122{
1123 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1124
1125 if (advmss == 0) {
1126 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1127 ip_rt_min_advmss);
1128 if (advmss > 65535 - 40)
1129 advmss = 65535 - 40;
1130 }
1131 return advmss;
1132}
1133
Steffen Klassertebb762f2011-11-23 02:12:51 +00001134static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001135{
Steffen Klassert261663b2011-11-23 02:14:50 +00001136 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001137 unsigned int mtu = rt->rt_pmtu;
1138
1139 if (mtu && time_after_eq(jiffies, rt->dst.expires))
1140 mtu = 0;
1141
1142 if (!mtu)
1143 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001144
Steffen Klassert261663b2011-11-23 02:14:50 +00001145 if (mtu && rt_is_output_route(rt))
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001146 return mtu;
1147
1148 mtu = dst->dev->mtu;
David S. Millerd33e4552010-12-14 13:01:14 -08001149
1150 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
David S. Millerf8126f12012-07-13 05:03:45 -07001151 if (rt->rt_gateway && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001152 mtu = 576;
1153 }
1154
1155 if (mtu > IP_MAX_MTU)
1156 mtu = IP_MAX_MTU;
1157
1158 return mtu;
1159}
1160
David S. Millerf2bb4be2012-07-17 12:20:47 -07001161static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001162{
1163 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
1164 struct fib_nh_exception *fnhe;
1165 u32 hval;
1166
David S. Millerf2bb4be2012-07-17 12:20:47 -07001167 if (!hash)
1168 return NULL;
1169
David S. Millerd3a25c92012-07-17 13:23:08 -07001170 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001171
1172 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1173 fnhe = rcu_dereference(fnhe->fnhe_next)) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001174 if (fnhe->fnhe_daddr == daddr)
1175 return fnhe;
1176 }
1177 return NULL;
1178}
David S. Miller4895c772012-07-17 04:19:00 -07001179
David S. Millercaacf052012-07-31 15:06:50 -07001180static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001181 __be32 daddr)
1182{
David S. Millercaacf052012-07-31 15:06:50 -07001183 bool ret = false;
1184
David S. Millerc5038a82012-07-31 15:02:02 -07001185 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001186
David S. Millerc5038a82012-07-31 15:02:02 -07001187 if (daddr == fnhe->fnhe_daddr) {
1188 struct rtable *orig;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001189
David S. Millerc5038a82012-07-31 15:02:02 -07001190 if (fnhe->fnhe_pmtu) {
1191 unsigned long expires = fnhe->fnhe_expires;
1192 unsigned long diff = expires - jiffies;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001193
David S. Millerc5038a82012-07-31 15:02:02 -07001194 if (time_before(jiffies, expires)) {
1195 rt->rt_pmtu = fnhe->fnhe_pmtu;
1196 dst_set_expires(&rt->dst, diff);
1197 }
David S. Miller4895c772012-07-17 04:19:00 -07001198 }
David S. Millerc5038a82012-07-31 15:02:02 -07001199 if (fnhe->fnhe_gw) {
1200 rt->rt_flags |= RTCF_REDIRECTED;
1201 rt->rt_gateway = fnhe->fnhe_gw;
1202 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001203
David S. Millerc5038a82012-07-31 15:02:02 -07001204 orig = rcu_dereference(fnhe->fnhe_rth);
1205 rcu_assign_pointer(fnhe->fnhe_rth, rt);
1206 if (orig)
1207 rt_free(orig);
1208
1209 fnhe->fnhe_stamp = jiffies;
David S. Millercaacf052012-07-31 15:06:50 -07001210 ret = true;
David S. Millerc5038a82012-07-31 15:02:02 -07001211 } else {
1212 /* Routes we intend to cache in nexthop exception have
1213 * the DST_NOCACHE bit clear. However, if we are
1214 * unsuccessful at storing this route into the cache
1215 * we really need to set it.
1216 */
1217 rt->dst.flags |= DST_NOCACHE;
1218 }
1219 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001220
1221 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001222}
1223
David S. Millercaacf052012-07-31 15:06:50 -07001224static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001225{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001226 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001227 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001228
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001229 if (rt_is_input_route(rt)) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001230 p = (struct rtable **)&nh->nh_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001231 } else {
1232 if (!nh->nh_pcpu_rth_output)
1233 goto nocache;
1234 p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
1235 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001236 orig = *p;
1237
1238 prev = cmpxchg(p, orig, rt);
1239 if (prev == orig) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001240 if (orig)
Eric Dumazet54764bb2012-07-31 01:08:23 +00001241 rt_free(orig);
David S. Millerc6cffba2012-07-26 11:14:38 +00001242 } else {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001243 /* Routes we intend to cache in the FIB nexthop have
1244 * the DST_NOCACHE bit clear. However, if we are
1245 * unsuccessful at storing this route into the cache
1246 * we really need to set it.
1247 */
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001248nocache:
Eric Dumazet54764bb2012-07-31 01:08:23 +00001249 rt->dst.flags |= DST_NOCACHE;
David S. Millercaacf052012-07-31 15:06:50 -07001250 ret = false;
1251 }
1252
1253 return ret;
1254}
1255
1256static DEFINE_SPINLOCK(rt_uncached_lock);
1257static LIST_HEAD(rt_uncached_list);
1258
1259static void rt_add_uncached_list(struct rtable *rt)
1260{
1261 spin_lock_bh(&rt_uncached_lock);
1262 list_add_tail(&rt->rt_uncached, &rt_uncached_list);
1263 spin_unlock_bh(&rt_uncached_lock);
1264}
1265
1266static void ipv4_dst_destroy(struct dst_entry *dst)
1267{
1268 struct rtable *rt = (struct rtable *) dst;
1269
1270 if (dst->flags & DST_NOCACHE) {
1271 spin_lock_bh(&rt_uncached_lock);
1272 list_del(&rt->rt_uncached);
1273 spin_unlock_bh(&rt_uncached_lock);
1274 }
1275}
1276
1277void rt_flush_dev(struct net_device *dev)
1278{
1279 if (!list_empty(&rt_uncached_list)) {
1280 struct net *net = dev_net(dev);
1281 struct rtable *rt;
1282
1283 spin_lock_bh(&rt_uncached_lock);
1284 list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
1285 if (rt->dst.dev != dev)
1286 continue;
1287 rt->dst.dev = net->loopback_dev;
1288 dev_hold(rt->dst.dev);
1289 dev_put(dev);
1290 }
1291 spin_unlock_bh(&rt_uncached_lock);
David S. Miller4895c772012-07-17 04:19:00 -07001292 }
1293}
1294
Eric Dumazet4331deb2012-07-25 05:11:23 +00001295static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba2012-07-17 12:58:50 -07001296{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001297 return rt &&
1298 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1299 !rt_is_expired(rt);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001300}
1301
David S. Millerf2bb4be2012-07-17 12:20:47 -07001302static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001303 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001304 struct fib_nh_exception *fnhe,
David S. Miller982721f2011-02-16 21:44:24 -08001305 struct fib_info *fi, u16 type, u32 itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306{
David S. Millercaacf052012-07-31 15:06:50 -07001307 bool cached = false;
1308
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 if (fi) {
David S. Miller4895c772012-07-17 04:19:00 -07001310 struct fib_nh *nh = &FIB_RES_NH(*res);
1311
1312 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
1313 rt->rt_gateway = nh->nh_gw;
David S. Miller28605832012-07-17 14:55:59 -07001314 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
Patrick McHardyc7066f72011-01-14 13:36:42 +01001315#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf2bb4be2012-07-17 12:20:47 -07001316 rt->dst.tclassid = nh->nh_tclassid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317#endif
David S. Millerc5038a82012-07-31 15:02:02 -07001318 if (unlikely(fnhe))
David S. Millercaacf052012-07-31 15:06:50 -07001319 cached = rt_bind_exception(rt, fnhe, daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001320 else if (!(rt->dst.flags & DST_NOCACHE))
David S. Millercaacf052012-07-31 15:06:50 -07001321 cached = rt_cache_route(nh, rt);
David S. Millerd33e4552010-12-14 13:01:14 -08001322 }
David S. Millercaacf052012-07-31 15:06:50 -07001323 if (unlikely(!cached))
1324 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325
Patrick McHardyc7066f72011-01-14 13:36:42 +01001326#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001328 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329#endif
1330 set_class_tag(rt, itag);
1331#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332}
1333
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001334static struct rtable *rt_dst_alloc(struct net_device *dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001335 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001336{
David S. Millerf5b0a872012-07-19 12:31:33 -07001337 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
David S. Millerc6cffba2012-07-26 11:14:38 +00001338 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001339 (nopolicy ? DST_NOPOLICY : 0) |
1340 (noxfrm ? DST_NOXFRM : 0));
David S. Miller0c4dcd52011-02-17 15:42:37 -08001341}
1342
Eric Dumazet96d36222010-06-02 19:21:31 +00001343/* called in rcu_read_lock() section */
Al Viro9e12bb22006-09-26 21:25:20 -07001344static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345 u8 tos, struct net_device *dev, int our)
1346{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347 struct rtable *rth;
Eric Dumazet96d36222010-06-02 19:21:31 +00001348 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349 u32 itag = 0;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001350 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351
1352 /* Primary sanity checks. */
1353
1354 if (in_dev == NULL)
1355 return -EINVAL;
1356
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001357 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001358 skb->protocol != htons(ETH_P_IP))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 goto e_inval;
1360
Thomas Grafd0daebc32012-06-12 00:44:01 +00001361 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1362 if (ipv4_is_loopback(saddr))
1363 goto e_inval;
1364
Joe Perchesf97c1e02007-12-16 13:45:43 -08001365 if (ipv4_is_zeronet(saddr)) {
1366 if (!ipv4_is_local_multicast(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367 goto e_inval;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001368 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001369 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1370 in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001371 if (err < 0)
1372 goto e_err;
1373 }
Benjamin LaHaise4e7b2f12012-03-27 15:55:32 +00001374 rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001375 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376 if (!rth)
1377 goto e_nobufs;
1378
Patrick McHardyc7066f72011-01-14 13:36:42 +01001379#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001380 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381#endif
David S. Millercf911662011-04-28 14:31:47 -07001382 rth->dst.output = ip_rt_bug;
1383
Denis V. Luneve84f84f2008-07-05 19:04:32 -07001384 rth->rt_genid = rt_genid(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385 rth->rt_flags = RTCF_MULTICAST;
Eric Dumazet29e75252008-01-31 17:05:09 -08001386 rth->rt_type = RTN_MULTICAST;
David S. Miller9917e1e82012-07-17 14:44:26 -07001387 rth->rt_is_input= 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001388 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001389 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001390 rth->rt_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001391 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392 if (our) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001393 rth->dst.input= ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394 rth->rt_flags |= RTCF_LOCAL;
1395 }
1396
1397#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001398 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001399 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400#endif
1401 RT_CACHE_STAT_INC(in_slow_mc);
1402
David S. Miller89aef892012-07-17 11:00:09 -07001403 skb_dst_set(skb, &rth->dst);
1404 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405
1406e_nobufs:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408e_inval:
Eric Dumazet96d36222010-06-02 19:21:31 +00001409 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001410e_err:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001411 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412}
1413
1414
1415static void ip_handle_martian_source(struct net_device *dev,
1416 struct in_device *in_dev,
1417 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001418 __be32 daddr,
1419 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420{
1421 RT_CACHE_STAT_INC(in_martian_src);
1422#ifdef CONFIG_IP_ROUTE_VERBOSE
1423 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1424 /*
1425 * RFC1812 recommendation, if source is martian,
1426 * the only hint is MAC header.
1427 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001428 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001429 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001430 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001431 print_hex_dump(KERN_WARNING, "ll header: ",
1432 DUMP_PREFIX_OFFSET, 16, 1,
1433 skb_mac_header(skb),
1434 dev->hard_header_len, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435 }
1436 }
1437#endif
1438}
1439
Eric Dumazet47360222010-06-03 04:13:21 +00001440/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001441static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001442 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001443 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001444 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446 struct rtable *rth;
1447 int err;
1448 struct in_device *out_dev;
Eric Dumazet47360222010-06-03 04:13:21 +00001449 unsigned int flags = 0;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001450 bool do_cache;
Al Virod9c9df82006-09-26 21:28:14 -07001451 u32 itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452
1453 /* get a working reference to the output device */
Eric Dumazet47360222010-06-03 04:13:21 +00001454 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455 if (out_dev == NULL) {
Joe Perchese87cc472012-05-13 21:56:26 +00001456 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 return -EINVAL;
1458 }
1459
1460
Michael Smith5c04c812011-04-07 04:51:50 +00001461 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001462 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001464 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001466
Linus Torvalds1da177e2005-04-16 15:20:36 -07001467 goto cleanup;
1468 }
1469
Thomas Graf51b77ca2008-06-03 16:36:01 -07001470 if (out_dev == in_dev && err &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 (IN_DEV_SHARED_MEDIA(out_dev) ||
1472 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1473 flags |= RTCF_DOREDIRECT;
1474
1475 if (skb->protocol != htons(ETH_P_IP)) {
1476 /* Not IP (i.e. ARP). Do not create route, if it is
1477 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001478 *
1479 * Proxy arp feature have been extended to allow, ARP
1480 * replies back to the same interface, to support
1481 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001483 if (out_dev == in_dev &&
1484 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 err = -EINVAL;
1486 goto cleanup;
1487 }
1488 }
1489
David S. Millerd2d68ba2012-07-17 12:58:50 -07001490 do_cache = false;
1491 if (res->fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07001492 if (!itag) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001493 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001494 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00001495 skb_dst_set_noref(skb, &rth->dst);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001496 goto out;
1497 }
1498 do_cache = true;
1499 }
1500 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001501
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001502 rth = rt_dst_alloc(out_dev->dev,
1503 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba2012-07-17 12:58:50 -07001504 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505 if (!rth) {
1506 err = -ENOBUFS;
1507 goto cleanup;
1508 }
1509
David S. Millercf911662011-04-28 14:31:47 -07001510 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
1511 rth->rt_flags = flags;
1512 rth->rt_type = res->type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001513 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001514 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001515 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001516 rth->rt_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001517 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518
Changli Gaod8d1f302010-06-10 23:31:35 -07001519 rth->dst.input = ip_forward;
1520 rth->dst.output = ip_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521
David S. Millerd2d68ba2012-07-17 12:58:50 -07001522 rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag);
David S. Millerc6cffba2012-07-26 11:14:38 +00001523 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001524out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001525 err = 0;
1526 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001528}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529
Stephen Hemminger5969f712008-04-10 01:52:09 -07001530static int ip_mkroute_input(struct sk_buff *skb,
1531 struct fib_result *res,
David S. Miller68a5e3d2011-03-11 20:07:33 -05001532 const struct flowi4 *fl4,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001533 struct in_device *in_dev,
1534 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Millerff3fccb2011-03-10 16:23:24 -08001537 if (res->fi && res->fi->fib_nhs > 1)
David S. Miller1b7fe5932011-03-10 17:01:16 -08001538 fib_select_multipath(res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539#endif
1540
1541 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001542 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543}
1544
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545/*
1546 * NOTE. We drop all the packets that has local source
1547 * addresses, because every properly looped back packet
1548 * must have correct destination already attached by output routine.
1549 *
1550 * Such approach solves two big problems:
1551 * 1. Not simplex devices are handled properly.
1552 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001553 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001554 */
1555
Al Viro9e12bb22006-09-26 21:25:20 -07001556static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David S. Millerc10237e2012-06-27 17:05:06 -07001557 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558{
1559 struct fib_result res;
Eric Dumazet96d36222010-06-02 19:21:31 +00001560 struct in_device *in_dev = __in_dev_get_rcu(dev);
David S. Miller68a5e3d2011-03-11 20:07:33 -05001561 struct flowi4 fl4;
Eric Dumazet95c96172012-04-15 05:58:06 +00001562 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00001564 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565 int err = -EINVAL;
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001566 struct net *net = dev_net(dev);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001567 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568
1569 /* IP on this device is disabled. */
1570
1571 if (!in_dev)
1572 goto out;
1573
1574 /* Check for the most weird martians, which can be not detected
1575 by fib_lookup.
1576 */
1577
Thomas Grafd0daebc32012-06-12 00:44:01 +00001578 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001579 goto martian_source;
1580
David S. Millerd2d68ba2012-07-17 12:58:50 -07001581 res.fi = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00001582 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583 goto brd_input;
1584
1585 /* Accept zero addresses only to limited broadcast;
1586 * I even do not know to fix it or not. Waiting for complains :-)
1587 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001588 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589 goto martian_source;
1590
Thomas Grafd0daebc32012-06-12 00:44:01 +00001591 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592 goto martian_destination;
1593
Thomas Grafd0daebc32012-06-12 00:44:01 +00001594 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) {
1595 if (ipv4_is_loopback(daddr))
1596 goto martian_destination;
1597
1598 if (ipv4_is_loopback(saddr))
1599 goto martian_source;
1600 }
1601
Linus Torvalds1da177e2005-04-16 15:20:36 -07001602 /*
1603 * Now we are ready to route packet.
1604 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05001605 fl4.flowi4_oif = 0;
1606 fl4.flowi4_iif = dev->ifindex;
1607 fl4.flowi4_mark = skb->mark;
1608 fl4.flowi4_tos = tos;
1609 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1610 fl4.daddr = daddr;
1611 fl4.saddr = saddr;
1612 err = fib_lookup(net, &fl4, &res);
David S. Miller251da412012-06-26 16:27:09 -07001613 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001614 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001615
1616 RT_CACHE_STAT_INC(in_slow_tot);
1617
1618 if (res.type == RTN_BROADCAST)
1619 goto brd_input;
1620
1621 if (res.type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00001622 err = fib_validate_source(skb, saddr, daddr, tos,
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001623 net->loopback_dev->ifindex,
David S. Miller9e56e382012-06-28 18:54:02 -07001624 dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001625 if (err < 0)
1626 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627 goto local_input;
1628 }
1629
1630 if (!IN_DEV_FORWARD(in_dev))
David S. Miller251da412012-06-26 16:27:09 -07001631 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632 if (res.type != RTN_UNICAST)
1633 goto martian_destination;
1634
David S. Miller68a5e3d2011-03-11 20:07:33 -05001635 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636out: return err;
1637
1638brd_input:
1639 if (skb->protocol != htons(ETH_P_IP))
1640 goto e_inval;
1641
David S. Miller41347dc2012-06-28 04:05:27 -07001642 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07001643 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1644 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645 if (err < 0)
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001646 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647 }
1648 flags |= RTCF_BROADCAST;
1649 res.type = RTN_BROADCAST;
1650 RT_CACHE_STAT_INC(in_brd);
1651
1652local_input:
David S. Millerd2d68ba2012-07-17 12:58:50 -07001653 do_cache = false;
1654 if (res.fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07001655 if (!itag) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001656 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001657 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00001658 skb_dst_set_noref(skb, &rth->dst);
1659 err = 0;
1660 goto out;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001661 }
1662 do_cache = true;
1663 }
1664 }
1665
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001666 rth = rt_dst_alloc(net->loopback_dev,
David S. Millerd2d68ba2012-07-17 12:58:50 -07001667 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 if (!rth)
1669 goto e_nobufs;
1670
David S. Millercf911662011-04-28 14:31:47 -07001671 rth->dst.input= ip_local_deliver;
Changli Gaod8d1f302010-06-10 23:31:35 -07001672 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07001673#ifdef CONFIG_IP_ROUTE_CLASSID
1674 rth->dst.tclassid = itag;
1675#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676
David S. Millercf911662011-04-28 14:31:47 -07001677 rth->rt_genid = rt_genid(net);
1678 rth->rt_flags = flags|RTCF_LOCAL;
1679 rth->rt_type = res.type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001680 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001681 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001682 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001683 rth->rt_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001684 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685 if (res.type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001686 rth->dst.input= ip_error;
1687 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688 rth->rt_flags &= ~RTCF_LOCAL;
1689 }
David S. Millerd2d68ba2012-07-17 12:58:50 -07001690 if (do_cache)
1691 rt_cache_route(&FIB_RES_NH(res), rth);
David S. Miller89aef892012-07-17 11:00:09 -07001692 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001693 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001694 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695
1696no_route:
1697 RT_CACHE_STAT_INC(in_no_route);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 res.type = RTN_UNREACHABLE;
Mitsuru Chinen7f538782007-12-07 01:07:24 -08001699 if (err == -ESRCH)
1700 err = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701 goto local_input;
1702
1703 /*
1704 * Do not cache martian addresses: they should be logged (RFC1812)
1705 */
1706martian_destination:
1707 RT_CACHE_STAT_INC(in_martian_dst);
1708#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00001709 if (IN_DEV_LOG_MARTIANS(in_dev))
1710 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1711 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07001713
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714e_inval:
1715 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001716 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717
1718e_nobufs:
1719 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001720 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721
1722martian_source:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001723 err = -EINVAL;
1724martian_source_keep_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001726 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727}
1728
David S. Millerc6cffba2012-07-26 11:14:38 +00001729int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1730 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731{
Eric Dumazet96d36222010-06-02 19:21:31 +00001732 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733
Eric Dumazet96d36222010-06-02 19:21:31 +00001734 rcu_read_lock();
1735
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736 /* Multicast recognition logic is moved from route cache to here.
1737 The problem was that too many Ethernet cards have broken/missing
1738 hardware multicast filters :-( As result the host on multicasting
1739 network acquires a lot of useless route cache entries, sort of
1740 SDR messages from all the world. Now we try to get rid of them.
1741 Really, provided software IP multicast filter is organized
1742 reasonably (at least, hashed), it does not result in a slowdown
1743 comparing with route cache reject entries.
1744 Note, that multicast routers are not affected, because
1745 route cache entry is created eventually.
1746 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001747 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001748 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749
Eric Dumazet96d36222010-06-02 19:21:31 +00001750 if (in_dev) {
David S. Millerdbdd9a52011-03-10 16:34:38 -08001751 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
1752 ip_hdr(skb)->protocol);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753 if (our
1754#ifdef CONFIG_IP_MROUTE
Joe Perches9d4fb272009-11-23 10:41:23 -08001755 ||
1756 (!ipv4_is_local_multicast(daddr) &&
1757 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758#endif
Joe Perches9d4fb272009-11-23 10:41:23 -08001759 ) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001760 int res = ip_route_input_mc(skb, daddr, saddr,
1761 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00001763 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764 }
1765 }
1766 rcu_read_unlock();
1767 return -EINVAL;
1768 }
David S. Millerc10237e2012-06-27 17:05:06 -07001769 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
Eric Dumazet96d36222010-06-02 19:21:31 +00001770 rcu_read_unlock();
1771 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772}
David S. Millerc6cffba2012-07-26 11:14:38 +00001773EXPORT_SYMBOL(ip_route_input_noref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001775/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08001776static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00001777 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00001778 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08001779 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780{
David S. Miller982721f2011-02-16 21:44:24 -08001781 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001782 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08001783 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08001784 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08001785 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001786
Thomas Grafd0daebc32012-06-12 00:44:01 +00001787 in_dev = __in_dev_get_rcu(dev_out);
1788 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08001789 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790
Thomas Grafd0daebc32012-06-12 00:44:01 +00001791 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1792 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
1793 return ERR_PTR(-EINVAL);
1794
David S. Miller68a5e3d2011-03-11 20:07:33 -05001795 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001796 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001797 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001798 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001799 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08001800 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801
1802 if (dev_out->flags & IFF_LOOPBACK)
1803 flags |= RTCF_LOCAL;
1804
David S. Miller982721f2011-02-16 21:44:24 -08001805 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08001807 fi = NULL;
1808 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001809 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07001810 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
1811 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001812 flags &= ~RTCF_LOCAL;
1813 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001814 * default one, but do not gateway in this case.
1815 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816 */
David S. Miller982721f2011-02-16 21:44:24 -08001817 if (fi && res->prefixlen < 4)
1818 fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819 }
1820
David S. Millerf2bb4be2012-07-17 12:20:47 -07001821 fnhe = NULL;
1822 if (fi) {
David S. Millerc5038a82012-07-31 15:02:02 -07001823 struct rtable __rcu **prth;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001824
David S. Millerc5038a82012-07-31 15:02:02 -07001825 fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
1826 if (fnhe)
1827 prth = &fnhe->fnhe_rth;
1828 else
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001829 prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
David S. Millerc5038a82012-07-31 15:02:02 -07001830 rth = rcu_dereference(*prth);
1831 if (rt_cache_valid(rth)) {
1832 dst_hold(&rth->dst);
1833 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001834 }
1835 }
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001836 rth = rt_dst_alloc(dev_out,
1837 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07001838 IN_DEV_CONF_GET(in_dev, NOXFRM),
David S. Millerc5038a82012-07-31 15:02:02 -07001839 fi);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001840 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08001841 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001842
David S. Millercf911662011-04-28 14:31:47 -07001843 rth->dst.output = ip_output;
1844
David S. Millercf911662011-04-28 14:31:47 -07001845 rth->rt_genid = rt_genid(dev_net(dev_out));
1846 rth->rt_flags = flags;
1847 rth->rt_type = type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001848 rth->rt_is_input = 0;
David S. Miller13378ca2012-07-23 13:57:45 -07001849 rth->rt_iif = orig_oif ? : 0;
David S. Miller59436342012-07-10 06:58:42 -07001850 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001851 rth->rt_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001852 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001853
1854 RT_CACHE_STAT_INC(out_slow_tot);
1855
David S. Miller41347dc2012-06-28 04:05:27 -07001856 if (flags & RTCF_LOCAL)
Changli Gaod8d1f302010-06-10 23:31:35 -07001857 rth->dst.input = ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001858 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001859 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001861 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862 RT_CACHE_STAT_INC(out_slow_mc);
1863 }
1864#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08001865 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07001867 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001868 rth->dst.input = ip_mr_input;
1869 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870 }
1871 }
1872#endif
1873 }
1874
David S. Millerf2bb4be2012-07-17 12:20:47 -07001875 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876
David S. Miller5ada5522011-02-17 15:29:00 -08001877 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001878}
1879
Linus Torvalds1da177e2005-04-16 15:20:36 -07001880/*
1881 * Major route resolver routine.
1882 */
1883
David S. Miller89aef892012-07-17 11:00:09 -07001884struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001886 struct net_device *dev_out = NULL;
Julian Anastasovf61759e2011-12-02 11:39:42 +00001887 __u8 tos = RT_FL_TOS(fl4);
David S. Miller813b3b52011-04-28 14:48:42 -07001888 unsigned int flags = 0;
1889 struct fib_result res;
David S. Miller5ada5522011-02-17 15:29:00 -08001890 struct rtable *rth;
David S. Miller813b3b52011-04-28 14:48:42 -07001891 int orig_oif;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001892
David S. Miller85b91b02012-07-13 08:21:29 -07001893 res.tclassid = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07001895 res.table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001896
David S. Miller813b3b52011-04-28 14:48:42 -07001897 orig_oif = fl4->flowi4_oif;
1898
1899 fl4->flowi4_iif = net->loopback_dev->ifindex;
1900 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
1901 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
1902 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08001903
David S. Miller010c2702011-02-17 15:37:09 -08001904 rcu_read_lock();
David S. Miller813b3b52011-04-28 14:48:42 -07001905 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08001906 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07001907 if (ipv4_is_multicast(fl4->saddr) ||
1908 ipv4_is_lbcast(fl4->saddr) ||
1909 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910 goto out;
1911
Linus Torvalds1da177e2005-04-16 15:20:36 -07001912 /* I removed check for oif == dev_out->oif here.
1913 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08001914 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
1915 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001916 2. Moreover, we are allowed to send packets with saddr
1917 of another iface. --ANK
1918 */
1919
David S. Miller813b3b52011-04-28 14:48:42 -07001920 if (fl4->flowi4_oif == 0 &&
1921 (ipv4_is_multicast(fl4->daddr) ||
1922 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07001923 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07001924 dev_out = __ip_dev_find(net, fl4->saddr, false);
Julian Anastasova210d012008-10-01 07:28:28 -07001925 if (dev_out == NULL)
1926 goto out;
1927
Linus Torvalds1da177e2005-04-16 15:20:36 -07001928 /* Special hack: user can direct multicasts
1929 and limited broadcast via necessary interface
1930 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
1931 This hack is not just for fun, it allows
1932 vic,vat and friends to work.
1933 They bind socket to loopback, set ttl to zero
1934 and expect that it will work.
1935 From the viewpoint of routing cache they are broken,
1936 because we are not allowed to build multicast path
1937 with loopback source addr (look, routing cache
1938 cannot know, that ttl is zero, so that packet
1939 will not leave this host and route is valid).
1940 Luckily, this hack is good workaround.
1941 */
1942
David S. Miller813b3b52011-04-28 14:48:42 -07001943 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001944 goto make_route;
1945 }
Julian Anastasova210d012008-10-01 07:28:28 -07001946
David S. Miller813b3b52011-04-28 14:48:42 -07001947 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07001948 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07001949 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07001950 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07001951 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001952 }
1953
1954
David S. Miller813b3b52011-04-28 14:48:42 -07001955 if (fl4->flowi4_oif) {
1956 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001957 rth = ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958 if (dev_out == NULL)
1959 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07001960
1961 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00001962 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08001963 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00001964 goto out;
1965 }
David S. Miller813b3b52011-04-28 14:48:42 -07001966 if (ipv4_is_local_multicast(fl4->daddr) ||
1967 ipv4_is_lbcast(fl4->daddr)) {
1968 if (!fl4->saddr)
1969 fl4->saddr = inet_select_addr(dev_out, 0,
1970 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001971 goto make_route;
1972 }
David S. Miller813b3b52011-04-28 14:48:42 -07001973 if (fl4->saddr) {
1974 if (ipv4_is_multicast(fl4->daddr))
1975 fl4->saddr = inet_select_addr(dev_out, 0,
1976 fl4->flowi4_scope);
1977 else if (!fl4->daddr)
1978 fl4->saddr = inet_select_addr(dev_out, 0,
1979 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001980 }
1981 }
1982
David S. Miller813b3b52011-04-28 14:48:42 -07001983 if (!fl4->daddr) {
1984 fl4->daddr = fl4->saddr;
1985 if (!fl4->daddr)
1986 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08001987 dev_out = net->loopback_dev;
David S. Miller813b3b52011-04-28 14:48:42 -07001988 fl4->flowi4_oif = net->loopback_dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001989 res.type = RTN_LOCAL;
1990 flags |= RTCF_LOCAL;
1991 goto make_route;
1992 }
1993
David S. Miller813b3b52011-04-28 14:48:42 -07001994 if (fib_lookup(net, fl4, &res)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001995 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07001996 res.table = NULL;
David S. Miller813b3b52011-04-28 14:48:42 -07001997 if (fl4->flowi4_oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001998 /* Apparently, routing tables are wrong. Assume,
1999 that the destination is on link.
2000
2001 WHY? DW.
2002 Because we are allowed to send to iface
2003 even if it has NO routes and NO assigned
2004 addresses. When oif is specified, routing
2005 tables are looked up with only one purpose:
2006 to catch if destination is gatewayed, rather than
2007 direct. Moreover, if MSG_DONTROUTE is set,
2008 we send packet, ignoring both routing tables
2009 and ifaddr state. --ANK
2010
2011
2012 We could make it even if oif is unknown,
2013 likely IPv6, but we do not.
2014 */
2015
David S. Miller813b3b52011-04-28 14:48:42 -07002016 if (fl4->saddr == 0)
2017 fl4->saddr = inet_select_addr(dev_out, 0,
2018 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019 res.type = RTN_UNICAST;
2020 goto make_route;
2021 }
David S. Millerb23dd4f2011-03-02 14:31:35 -08002022 rth = ERR_PTR(-ENETUNREACH);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023 goto out;
2024 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002025
2026 if (res.type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002027 if (!fl4->saddr) {
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002028 if (res.fi->fib_prefsrc)
David S. Miller813b3b52011-04-28 14:48:42 -07002029 fl4->saddr = res.fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002030 else
David S. Miller813b3b52011-04-28 14:48:42 -07002031 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002032 }
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002033 dev_out = net->loopback_dev;
David S. Miller813b3b52011-04-28 14:48:42 -07002034 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035 res.fi = NULL;
2036 flags |= RTCF_LOCAL;
2037 goto make_route;
2038 }
2039
2040#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Miller813b3b52011-04-28 14:48:42 -07002041 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
David S. Miller1b7fe5932011-03-10 17:01:16 -08002042 fib_select_multipath(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043 else
2044#endif
David S. Miller21d8c492011-04-14 14:49:37 -07002045 if (!res.prefixlen &&
2046 res.table->tb_num_default > 1 &&
David S. Miller813b3b52011-04-28 14:48:42 -07002047 res.type == RTN_UNICAST && !fl4->flowi4_oif)
David S. Miller0c838ff2011-01-31 16:16:50 -08002048 fib_select_default(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002049
David S. Miller813b3b52011-04-28 14:48:42 -07002050 if (!fl4->saddr)
2051 fl4->saddr = FIB_RES_PREFSRC(net, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002052
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053 dev_out = FIB_RES_DEV(res);
David S. Miller813b3b52011-04-28 14:48:42 -07002054 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055
2056
2057make_route:
David Miller1a00fee2012-07-01 02:02:56 +00002058 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002059
David S. Miller010c2702011-02-17 15:37:09 -08002060out:
2061 rcu_read_unlock();
David S. Millerb23dd4f2011-03-02 14:31:35 -08002062 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002063}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002064EXPORT_SYMBOL_GPL(__ip_route_output_key);
2065
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002066static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2067{
2068 return NULL;
2069}
2070
Steffen Klassertebb762f2011-11-23 02:12:51 +00002071static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002072{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002073 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2074
2075 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002076}
2077
David S. Miller6700c272012-07-17 03:29:28 -07002078static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2079 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002080{
2081}
2082
David S. Miller6700c272012-07-17 03:29:28 -07002083static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2084 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002085{
2086}
2087
Held Bernhard0972ddb2011-04-24 22:07:32 +00002088static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2089 unsigned long old)
2090{
2091 return NULL;
2092}
2093
David S. Miller14e50e52007-05-24 18:17:54 -07002094static struct dst_ops ipv4_dst_blackhole_ops = {
2095 .family = AF_INET,
Harvey Harrison09640e62009-02-01 00:45:17 -08002096 .protocol = cpu_to_be16(ETH_P_IP),
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002097 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002098 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002099 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002100 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002101 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002102 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002103 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002104};
2105
David S. Miller2774c132011-03-01 14:59:04 -08002106struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002107{
David S. Miller2774c132011-03-01 14:59:04 -08002108 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002109 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002110
David S. Millerf5b0a872012-07-19 12:31:33 -07002111 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002112 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002113 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002114
David S. Miller14e50e52007-05-24 18:17:54 -07002115 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002116 new->input = dst_discard;
2117 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -07002118
Changli Gaod8d1f302010-06-10 23:31:35 -07002119 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002120 if (new->dev)
2121 dev_hold(new->dev);
2122
David S. Miller9917e1e82012-07-17 14:44:26 -07002123 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002124 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002125 rt->rt_pmtu = ort->rt_pmtu;
David S. Miller14e50e52007-05-24 18:17:54 -07002126
Denis V. Luneve84f84f2008-07-05 19:04:32 -07002127 rt->rt_genid = rt_genid(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002128 rt->rt_flags = ort->rt_flags;
2129 rt->rt_type = ort->rt_type;
David S. Miller14e50e52007-05-24 18:17:54 -07002130 rt->rt_gateway = ort->rt_gateway;
David S. Miller14e50e52007-05-24 18:17:54 -07002131
David S. Millercaacf052012-07-31 15:06:50 -07002132 INIT_LIST_HEAD(&rt->rt_uncached);
2133
David S. Miller14e50e52007-05-24 18:17:54 -07002134 dst_free(new);
2135 }
2136
David S. Miller2774c132011-03-01 14:59:04 -08002137 dst_release(dst_orig);
2138
2139 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002140}
2141
David S. Miller9d6ec932011-03-12 01:12:47 -05002142struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
David S. Millerb23dd4f2011-03-02 14:31:35 -08002143 struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002144{
David S. Miller9d6ec932011-03-12 01:12:47 -05002145 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002146
David S. Millerb23dd4f2011-03-02 14:31:35 -08002147 if (IS_ERR(rt))
2148 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002149
David S. Miller56157872011-05-02 14:37:45 -07002150 if (flp4->flowi4_proto)
David S. Miller9d6ec932011-03-12 01:12:47 -05002151 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2152 flowi4_to_flowi(flp4),
2153 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154
David S. Millerb23dd4f2011-03-02 14:31:35 -08002155 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002156}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002157EXPORT_SYMBOL_GPL(ip_route_output_flow);
2158
David S. Millerf1ce3062012-07-12 10:10:17 -07002159static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2160 struct flowi4 *fl4, struct sk_buff *skb, u32 pid,
2161 u32 seq, int event, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162{
Eric Dumazet511c3f92009-06-02 05:14:27 +00002163 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002165 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002166 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002167 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002168 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002169
2170 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
2171 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002172 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002173
2174 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002175 r->rtm_family = AF_INET;
2176 r->rtm_dst_len = 32;
2177 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002178 r->rtm_tos = fl4->flowi4_tos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179 r->rtm_table = RT_TABLE_MAIN;
David S. Millerf3756b72012-04-01 20:39:02 -04002180 if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
2181 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002182 r->rtm_type = rt->rt_type;
2183 r->rtm_scope = RT_SCOPE_UNIVERSE;
2184 r->rtm_protocol = RTPROT_UNSPEC;
2185 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2186 if (rt->rt_flags & RTCF_NOTIFY)
2187 r->rtm_flags |= RTM_F_NOTIFY;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002188
David S. Millerf1ce3062012-07-12 10:10:17 -07002189 if (nla_put_be32(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002190 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002191 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192 r->rtm_src_len = 32;
David Miller1a00fee2012-07-01 02:02:56 +00002193 if (nla_put_be32(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002194 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195 }
David S. Millerf3756b72012-04-01 20:39:02 -04002196 if (rt->dst.dev &&
2197 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2198 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002199#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002200 if (rt->dst.tclassid &&
2201 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2202 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002204 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002205 fl4->saddr != src) {
2206 if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002207 goto nla_put_failure;
2208 }
David S. Millerf8126f12012-07-13 05:03:45 -07002209 if (rt->rt_gateway &&
David S. Millerf3756b72012-04-01 20:39:02 -04002210 nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
2211 goto nla_put_failure;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002212
Julian Anastasov521f5492012-07-20 12:02:08 +03002213 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2214 if (rt->rt_pmtu)
2215 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2216 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002217 goto nla_put_failure;
2218
David Millerb4869882012-07-01 02:03:01 +00002219 if (fl4->flowi4_mark &&
2220 nla_put_be32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002221 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002222
Changli Gaod8d1f302010-06-10 23:31:35 -07002223 error = rt->dst.error;
David S. Miller59436342012-07-10 06:58:42 -07002224 expires = rt->dst.expires;
2225 if (expires) {
2226 if (time_before(jiffies, expires))
2227 expires -= jiffies;
2228 else
2229 expires = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230 }
Thomas Grafbe403ea2006-08-17 18:15:17 -07002231
David S. Millerc7537962010-11-11 17:07:48 -08002232 if (rt_is_input_route(rt)) {
David S. Millerf1ce3062012-07-12 10:10:17 -07002233 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
2234 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002235 }
2236
David S. Millerf1850712012-07-10 07:26:01 -07002237 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002238 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002239
Thomas Grafbe403ea2006-08-17 18:15:17 -07002240 return nlmsg_end(skb, nlh);
2241
2242nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002243 nlmsg_cancel(skb, nlh);
2244 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245}
2246
Daniel Baluta5e73ea12012-04-15 01:34:41 +00002247static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002249 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002250 struct rtmsg *rtm;
2251 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002252 struct rtable *rt = NULL;
David Millerd6c0a4f2012-07-01 02:02:59 +00002253 struct flowi4 fl4;
Al Viro9e12bb22006-09-26 21:25:20 -07002254 __be32 dst = 0;
2255 __be32 src = 0;
2256 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002257 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002258 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259 struct sk_buff *skb;
2260
Thomas Grafd889ce32006-08-17 18:15:44 -07002261 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2262 if (err < 0)
2263 goto errout;
2264
2265 rtm = nlmsg_data(nlh);
2266
Linus Torvalds1da177e2005-04-16 15:20:36 -07002267 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafd889ce32006-08-17 18:15:44 -07002268 if (skb == NULL) {
2269 err = -ENOBUFS;
2270 goto errout;
2271 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002272
2273 /* Reserve room for dummy headers, this skb can pass
2274 through good chunk of routing engine.
2275 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002276 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002277 skb_reset_network_header(skb);
Stephen Hemmingerd2c962b2006-04-17 17:27:11 -07002278
2279 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07002280 ip_hdr(skb)->protocol = IPPROTO_ICMP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2282
Al Viro17fb2c62006-09-26 22:15:25 -07002283 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2284 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002285 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002286 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002287
David Millerd6c0a4f2012-07-01 02:02:59 +00002288 memset(&fl4, 0, sizeof(fl4));
2289 fl4.daddr = dst;
2290 fl4.saddr = src;
2291 fl4.flowi4_tos = rtm->rtm_tos;
2292 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2293 fl4.flowi4_mark = mark;
2294
Linus Torvalds1da177e2005-04-16 15:20:36 -07002295 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002296 struct net_device *dev;
2297
Denis V. Lunev19375042008-02-28 20:52:04 -08002298 dev = __dev_get_by_index(net, iif);
Thomas Grafd889ce32006-08-17 18:15:44 -07002299 if (dev == NULL) {
2300 err = -ENODEV;
2301 goto errout_free;
2302 }
2303
Linus Torvalds1da177e2005-04-16 15:20:36 -07002304 skb->protocol = htons(ETH_P_IP);
2305 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002306 skb->mark = mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002307 local_bh_disable();
2308 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2309 local_bh_enable();
Thomas Grafd889ce32006-08-17 18:15:44 -07002310
Eric Dumazet511c3f92009-06-02 05:14:27 +00002311 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07002312 if (err == 0 && rt->dst.error)
2313 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002314 } else {
David S. Miller9d6ec932011-03-12 01:12:47 -05002315 rt = ip_route_output_key(net, &fl4);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002316
2317 err = 0;
2318 if (IS_ERR(rt))
2319 err = PTR_ERR(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002320 }
Thomas Grafd889ce32006-08-17 18:15:44 -07002321
Linus Torvalds1da177e2005-04-16 15:20:36 -07002322 if (err)
Thomas Grafd889ce32006-08-17 18:15:44 -07002323 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002324
Changli Gaod8d1f302010-06-10 23:31:35 -07002325 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326 if (rtm->rtm_flags & RTM_F_NOTIFY)
2327 rt->rt_flags |= RTCF_NOTIFY;
2328
David S. Millerf1ce3062012-07-12 10:10:17 -07002329 err = rt_fill_info(net, dst, src, &fl4, skb,
David Miller1a00fee2012-07-01 02:02:56 +00002330 NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
Denis V. Lunev19375042008-02-28 20:52:04 -08002331 RTM_NEWROUTE, 0, 0);
Thomas Grafd889ce32006-08-17 18:15:44 -07002332 if (err <= 0)
2333 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002334
Denis V. Lunev19375042008-02-28 20:52:04 -08002335 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafd889ce32006-08-17 18:15:44 -07002336errout:
Thomas Graf2942e902006-08-15 00:30:25 -07002337 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002338
Thomas Grafd889ce32006-08-17 18:15:44 -07002339errout_free:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002340 kfree_skb(skb);
Thomas Grafd889ce32006-08-17 18:15:44 -07002341 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002342}
2343
2344int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2345{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346 return skb->len;
2347}
2348
2349void ip_rt_multicast_event(struct in_device *in_dev)
2350{
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -07002351 rt_cache_flush(dev_net(in_dev->dev), 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352}
2353
2354#ifdef CONFIG_SYSCTL
Denis V. Lunev81c684d2008-07-08 03:05:28 -07002355static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002356 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002357 size_t *lenp, loff_t *ppos)
2358{
2359 if (write) {
Denis V. Lunev639e1042008-07-05 19:02:06 -07002360 int flush_delay;
Denis V. Lunev81c684d2008-07-08 03:05:28 -07002361 ctl_table ctl;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002362 struct net *net;
Denis V. Lunev639e1042008-07-05 19:02:06 -07002363
Denis V. Lunev81c684d2008-07-08 03:05:28 -07002364 memcpy(&ctl, __ctl, sizeof(ctl));
2365 ctl.data = &flush_delay;
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002366 proc_dointvec(&ctl, write, buffer, lenp, ppos);
Denis V. Lunev639e1042008-07-05 19:02:06 -07002367
Denis V. Lunev81c684d2008-07-08 03:05:28 -07002368 net = (struct net *)__ctl->extra1;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002369 rt_cache_flush(net, flush_delay);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002370 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002371 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002372
2373 return -EINVAL;
2374}
2375
Al Viroeeb61f72008-07-27 08:59:33 +01002376static ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002377 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378 .procname = "gc_thresh",
2379 .data = &ipv4_dst_ops.gc_thresh,
2380 .maxlen = sizeof(int),
2381 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002382 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002383 },
2384 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002385 .procname = "max_size",
2386 .data = &ip_rt_max_size,
2387 .maxlen = sizeof(int),
2388 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002389 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002390 },
2391 {
2392 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002393
Linus Torvalds1da177e2005-04-16 15:20:36 -07002394 .procname = "gc_min_interval",
2395 .data = &ip_rt_gc_min_interval,
2396 .maxlen = sizeof(int),
2397 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002398 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002399 },
2400 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002401 .procname = "gc_min_interval_ms",
2402 .data = &ip_rt_gc_min_interval,
2403 .maxlen = sizeof(int),
2404 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002405 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406 },
2407 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002408 .procname = "gc_timeout",
2409 .data = &ip_rt_gc_timeout,
2410 .maxlen = sizeof(int),
2411 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002412 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413 },
2414 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05002415 .procname = "gc_interval",
2416 .data = &ip_rt_gc_interval,
2417 .maxlen = sizeof(int),
2418 .mode = 0644,
2419 .proc_handler = proc_dointvec_jiffies,
2420 },
2421 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002422 .procname = "redirect_load",
2423 .data = &ip_rt_redirect_load,
2424 .maxlen = sizeof(int),
2425 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002426 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002427 },
2428 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002429 .procname = "redirect_number",
2430 .data = &ip_rt_redirect_number,
2431 .maxlen = sizeof(int),
2432 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002433 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434 },
2435 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002436 .procname = "redirect_silence",
2437 .data = &ip_rt_redirect_silence,
2438 .maxlen = sizeof(int),
2439 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002440 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441 },
2442 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002443 .procname = "error_cost",
2444 .data = &ip_rt_error_cost,
2445 .maxlen = sizeof(int),
2446 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002447 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002448 },
2449 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450 .procname = "error_burst",
2451 .data = &ip_rt_error_burst,
2452 .maxlen = sizeof(int),
2453 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002454 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002455 },
2456 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457 .procname = "gc_elasticity",
2458 .data = &ip_rt_gc_elasticity,
2459 .maxlen = sizeof(int),
2460 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002461 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462 },
2463 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002464 .procname = "mtu_expires",
2465 .data = &ip_rt_mtu_expires,
2466 .maxlen = sizeof(int),
2467 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002468 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002469 },
2470 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002471 .procname = "min_pmtu",
2472 .data = &ip_rt_min_pmtu,
2473 .maxlen = sizeof(int),
2474 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002475 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476 },
2477 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478 .procname = "min_adv_mss",
2479 .data = &ip_rt_min_advmss,
2480 .maxlen = sizeof(int),
2481 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002482 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002483 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002484 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002485};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002486
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002487static struct ctl_table ipv4_route_flush_table[] = {
2488 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002489 .procname = "flush",
2490 .maxlen = sizeof(int),
2491 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002492 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002493 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002494 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002495};
2496
2497static __net_init int sysctl_route_net_init(struct net *net)
2498{
2499 struct ctl_table *tbl;
2500
2501 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08002502 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002503 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2504 if (tbl == NULL)
2505 goto err_dup;
2506 }
2507 tbl[0].extra1 = net;
2508
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00002509 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002510 if (net->ipv4.route_hdr == NULL)
2511 goto err_reg;
2512 return 0;
2513
2514err_reg:
2515 if (tbl != ipv4_route_flush_table)
2516 kfree(tbl);
2517err_dup:
2518 return -ENOMEM;
2519}
2520
2521static __net_exit void sysctl_route_net_exit(struct net *net)
2522{
2523 struct ctl_table *tbl;
2524
2525 tbl = net->ipv4.route_hdr->ctl_table_arg;
2526 unregister_net_sysctl_table(net->ipv4.route_hdr);
2527 BUG_ON(tbl == ipv4_route_flush_table);
2528 kfree(tbl);
2529}
2530
2531static __net_initdata struct pernet_operations sysctl_route_ops = {
2532 .init = sysctl_route_net_init,
2533 .exit = sysctl_route_net_exit,
2534};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002535#endif
2536
Neil Horman3ee94372010-05-08 01:57:52 -07002537static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002538{
Neil Horman3ee94372010-05-08 01:57:52 -07002539 get_random_bytes(&net->ipv4.rt_genid,
2540 sizeof(net->ipv4.rt_genid));
David S. Miller436c3b62011-03-24 17:42:21 -07002541 get_random_bytes(&net->ipv4.dev_addr_genid,
2542 sizeof(net->ipv4.dev_addr_genid));
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002543 return 0;
2544}
2545
Neil Horman3ee94372010-05-08 01:57:52 -07002546static __net_initdata struct pernet_operations rt_genid_ops = {
2547 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002548};
2549
David S. Millerc3426b42012-06-09 16:27:05 -07002550static int __net_init ipv4_inetpeer_init(struct net *net)
2551{
2552 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2553
2554 if (!bp)
2555 return -ENOMEM;
2556 inet_peer_base_init(bp);
2557 net->ipv4.peers = bp;
2558 return 0;
2559}
2560
2561static void __net_exit ipv4_inetpeer_exit(struct net *net)
2562{
2563 struct inet_peer_base *bp = net->ipv4.peers;
2564
2565 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07002566 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07002567 kfree(bp);
2568}
2569
2570static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2571 .init = ipv4_inetpeer_init,
2572 .exit = ipv4_inetpeer_exit,
2573};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002574
Patrick McHardyc7066f72011-01-14 13:36:42 +01002575#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00002576struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002577#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002578
Linus Torvalds1da177e2005-04-16 15:20:36 -07002579int __init ip_rt_init(void)
2580{
Eric Dumazet424c4b72005-07-05 14:58:19 -07002581 int rc = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002582
Patrick McHardyc7066f72011-01-14 13:36:42 +01002583#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01002584 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002585 if (!ip_rt_acct)
2586 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002587#endif
2588
Alexey Dobriyane5d679f2006-08-26 19:25:52 -07002589 ipv4_dst_ops.kmem_cachep =
2590 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002591 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002592
David S. Miller14e50e52007-05-24 18:17:54 -07002593 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2594
Eric Dumazetfc66f952010-10-08 06:37:34 +00002595 if (dst_entries_init(&ipv4_dst_ops) < 0)
2596 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2597
2598 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2599 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2600
David S. Miller89aef892012-07-17 11:00:09 -07002601 ipv4_dst_ops.gc_thresh = ~0;
2602 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002603
Linus Torvalds1da177e2005-04-16 15:20:36 -07002604 devinet_init();
2605 ip_fib_init();
2606
Denis V. Lunev73b38712008-02-28 20:51:18 -08002607 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00002608 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002609#ifdef CONFIG_XFRM
2610 xfrm_init();
Neil Hormana33bc5c2009-07-30 18:52:15 -07002611 xfrm4_init(ip_rt_max_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002612#endif
Greg Rosec7ac8672011-06-10 01:27:09 +00002613 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
Thomas Graf63f34442007-03-22 11:55:17 -07002614
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002615#ifdef CONFIG_SYSCTL
2616 register_pernet_subsys(&sysctl_route_ops);
2617#endif
Neil Horman3ee94372010-05-08 01:57:52 -07002618 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07002619 register_pernet_subsys(&ipv4_inetpeer_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002620 return rc;
2621}
2622
Al Viroa1bc6eb2008-07-30 06:32:52 -04002623#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01002624/*
2625 * We really need to sanitize the damn ipv4 init order, then all
2626 * this nonsense will go away.
2627 */
2628void __init ip_static_sysctl_init(void)
2629{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00002630 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01002631}
Al Viroa1bc6eb2008-07-30 06:32:52 -04002632#endif