blob: 4a69a957872b6f24a98dc59a3515a79d1663f605 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080020#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/types.h>
22#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020030#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031#include <linux/netdevice.h>
Thomas Graf18237302006-08-04 23:04:54 -070032#include <linux/if_addr.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/if_arp.h>
34#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/init.h>
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070036#include <linux/list.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090037#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044#include <net/arp.h>
45#include <net/ip_fib.h>
Thomas Graf63f34442007-03-22 11:55:17 -070046#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#ifndef CONFIG_IP_MULTIPLE_TABLES
49
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -080050static int __net_init fib4_rules_init(struct net *net)
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080051{
Denis V. Lunev93456b62008-01-10 03:23:38 -080052 struct fib_table *local_table, *main_table;
53
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080054 local_table = fib_hash_table(RT_TABLE_LOCAL);
Denis V. Lunev93456b62008-01-10 03:23:38 -080055 if (local_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080056 return -ENOMEM;
57
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080058 main_table = fib_hash_table(RT_TABLE_MAIN);
Denis V. Lunev93456b62008-01-10 03:23:38 -080059 if (main_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080060 goto fail;
61
Denis V. Lunev93456b62008-01-10 03:23:38 -080062 hlist_add_head_rcu(&local_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080063 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
Denis V. Lunev93456b62008-01-10 03:23:38 -080064 hlist_add_head_rcu(&main_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080065 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080066 return 0;
67
68fail:
Denis V. Lunev93456b62008-01-10 03:23:38 -080069 kfree(local_table);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080070 return -ENOMEM;
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080071}
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#else
73
Denis V. Lunev8ad49422008-01-10 03:24:11 -080074struct fib_table *fib_new_table(struct net *net, u32 id)
Linus Torvalds1da177e2005-04-16 15:20:36 -070075{
76 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070077 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070078
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070079 if (id == 0)
80 id = RT_TABLE_MAIN;
Denis V. Lunev8ad49422008-01-10 03:24:11 -080081 tb = fib_get_table(net, id);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070082 if (tb)
83 return tb;
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080084
85 tb = fib_hash_table(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -070086 if (!tb)
87 return NULL;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070088 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080089 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
Linus Torvalds1da177e2005-04-16 15:20:36 -070090 return tb;
91}
92
Denis V. Lunev8ad49422008-01-10 03:24:11 -080093struct fib_table *fib_get_table(struct net *net, u32 id)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070094{
95 struct fib_table *tb;
96 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080097 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070098 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070099
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700100 if (id == 0)
101 id = RT_TABLE_MAIN;
102 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800103
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700104 rcu_read_lock();
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800105 head = &net->ipv4.fib_table_hash[h];
106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700107 if (tb->tb_id == id) {
108 rcu_read_unlock();
109 return tb;
110 }
111 }
112 rcu_read_unlock();
113 return NULL;
114}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#endif /* CONFIG_IP_MULTIPLE_TABLES */
116
Denis V. Lunev010278e2008-01-22 22:04:04 -0800117void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800119{
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126#endif
Denis V. Lunev010278e2008-01-22 22:04:04 -0800127 tb = fib_get_table(net, table);
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000129 fib_table_select_default(tb, flp, res);
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800130}
131
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800132static void fib_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133{
134 int flushed = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700136 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800137 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700138 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800141 head = &net->ipv4.fib_table_hash[h];
142 hlist_for_each_entry(tb, node, head, tb_hlist)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000143 flushed += fib_table_flush(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145
146 if (flushed)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700147 rt_cache_flush(net, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148}
149
Eric Dumazet82efee12010-09-30 03:31:56 +0000150/**
151 * __ip_dev_find - find the first device with a given source address.
152 * @net: the net namespace
153 * @addr: the source address
154 * @devref: if true, take a reference on the found device
155 *
156 * If a caller uses devref=false, it should be protected by RCU
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157 */
Eric Dumazet82efee12010-09-30 03:31:56 +0000158struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159{
Eric Dumazet82efee12010-09-30 03:31:56 +0000160 struct flowi fl = {
161 .nl_u = {
162 .ip4_u = {
163 .daddr = addr
164 }
165 },
166 .flags = FLOWI_FLAG_MATCH_ANY_IIF
167 };
168 struct fib_result res = { 0 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 struct net_device *dev = NULL;
170
Tom Herbert4465b462010-05-23 19:54:12 +0000171 if (fib_lookup(net, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172 return NULL;
173 if (res.type != RTN_LOCAL)
174 goto out;
175 dev = FIB_RES_DEV(res);
176
Eric Dumazet82efee12010-09-30 03:31:56 +0000177 if (dev && devref)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 dev_hold(dev);
179out:
180 fib_res_put(&res);
181 return dev;
182}
Eric Dumazet82efee12010-09-30 03:31:56 +0000183EXPORT_SYMBOL(__ip_dev_find);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800185/*
186 * Find address type as if only "dev" was present in the system. If
187 * on_dev is NULL then all interfaces are taken into consideration.
188 */
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800189static inline unsigned __inet_dev_addr_type(struct net *net,
190 const struct net_device *dev,
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800191 __be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192{
193 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
194 struct fib_result res;
195 unsigned ret = RTN_BROADCAST;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700196 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197
Jan Engelhardt1e637c72008-01-21 03:18:08 -0800198 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 return RTN_BROADCAST;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800200 if (ipv4_is_multicast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 return RTN_MULTICAST;
202
203#ifdef CONFIG_IP_MULTIPLE_TABLES
204 res.r = NULL;
205#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900206
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800207 local_table = fib_get_table(net, RT_TABLE_LOCAL);
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700208 if (local_table) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209 ret = RTN_UNICAST;
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000210 if (!fib_table_lookup(local_table, &fl, &res)) {
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800211 if (!dev || dev == res.fi->fib_dev)
212 ret = res.type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 fib_res_put(&res);
214 }
215 }
216 return ret;
217}
218
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800219unsigned int inet_addr_type(struct net *net, __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800220{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800221 return __inet_dev_addr_type(net, NULL, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800222}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000223EXPORT_SYMBOL(inet_addr_type);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800224
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800225unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
226 __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800227{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800228 return __inet_dev_addr_type(net, dev, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800229}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000230EXPORT_SYMBOL(inet_dev_addr_type);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800231
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232/* Given (packet source, input interface) and optional (dst, oif, tos):
233 - (main) check, that source is valid i.e. not broadcast or our local
234 address.
235 - figure out what "logical" interface this packet arrived
236 and calculate "specific destination" address.
237 - check, that packet arrived from expected physical interface.
238 */
239
Al Virod9c9df82006-09-26 21:28:14 -0700240int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
jamalb0c110c2009-10-18 02:12:33 +0000241 struct net_device *dev, __be32 *spec_dst,
242 u32 *itag, u32 mark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243{
244 struct in_device *in_dev;
245 struct flowi fl = { .nl_u = { .ip4_u =
246 { .daddr = src,
247 .saddr = dst,
248 .tos = tos } },
jamalb0c110c2009-10-18 02:12:33 +0000249 .mark = mark,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250 .iif = oif };
jamalb0c110c2009-10-18 02:12:33 +0000251
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 struct fib_result res;
Patrick McHardy8153a102009-12-03 01:25:58 +0000253 int no_addr, rpf, accept_local;
David S. Miller6f86b322010-09-06 22:36:19 -0700254 bool dev_match;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 int ret;
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800256 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257
Patrick McHardy8153a102009-12-03 01:25:58 +0000258 no_addr = rpf = accept_local = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 rcu_read_lock();
Herbert Xue5ed6392005-10-03 14:35:55 -0700260 in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 if (in_dev) {
262 no_addr = in_dev->ifa_list == NULL;
263 rpf = IN_DEV_RPFILTER(in_dev);
Patrick McHardy8153a102009-12-03 01:25:58 +0000264 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
Jamal Hadi Salim28f6aee2009-12-25 17:30:22 -0800265 if (mark && !IN_DEV_SRC_VMARK(in_dev))
266 fl.mark = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 }
268 rcu_read_unlock();
269
270 if (in_dev == NULL)
271 goto e_inval;
272
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900273 net = dev_net(dev);
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800274 if (fib_lookup(net, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 goto last_resort;
Patrick McHardy8153a102009-12-03 01:25:58 +0000276 if (res.type != RTN_UNICAST) {
277 if (res.type != RTN_LOCAL || !accept_local)
278 goto e_inval_res;
279 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 *spec_dst = FIB_RES_PREFSRC(res);
281 fib_combine_itag(itag, &res);
David S. Miller6f86b322010-09-06 22:36:19 -0700282 dev_match = false;
283
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Miller6f86b322010-09-06 22:36:19 -0700285 for (ret = 0; ret < res.fi->fib_nhs; ret++) {
286 struct fib_nh *nh = &res.fi->fib_nh[ret];
287
288 if (nh->nh_dev == dev) {
289 dev_match = true;
290 break;
291 }
292 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293#else
294 if (FIB_RES_DEV(res) == dev)
David S. Miller6f86b322010-09-06 22:36:19 -0700295 dev_match = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296#endif
David S. Miller6f86b322010-09-06 22:36:19 -0700297 if (dev_match) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
299 fib_res_put(&res);
300 return ret;
301 }
302 fib_res_put(&res);
303 if (no_addr)
304 goto last_resort;
Stephen Hemmingerc1cf8422009-02-20 08:25:36 +0000305 if (rpf == 1)
Eric Dumazetb5f7e752010-06-02 12:05:27 +0000306 goto e_rpf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 fl.oif = dev->ifindex;
308
309 ret = 0;
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800310 if (fib_lookup(net, &fl, &res) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311 if (res.type == RTN_UNICAST) {
312 *spec_dst = FIB_RES_PREFSRC(res);
313 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
314 }
315 fib_res_put(&res);
316 }
317 return ret;
318
319last_resort:
320 if (rpf)
Eric Dumazetb5f7e752010-06-02 12:05:27 +0000321 goto e_rpf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
323 *itag = 0;
324 return 0;
325
326e_inval_res:
327 fib_res_put(&res);
328e_inval:
329 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +0000330e_rpf:
331 return -EXDEV;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332}
333
Al Viro81f7bf62006-09-27 18:40:00 -0700334static inline __be32 sk_extract_addr(struct sockaddr *addr)
Thomas Graf4e902c52006-08-17 18:14:52 -0700335{
336 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
337}
338
339static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
340{
341 struct nlattr *nla;
342
343 nla = (struct nlattr *) ((char *) mx + len);
344 nla->nla_type = type;
345 nla->nla_len = nla_attr_size(4);
346 *(u32 *) nla_data(nla) = value;
347
348 return len + nla_total_size(4);
349}
350
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800351static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
Thomas Graf4e902c52006-08-17 18:14:52 -0700352 struct fib_config *cfg)
353{
Al Viro6d85c102006-09-26 22:15:46 -0700354 __be32 addr;
Thomas Graf4e902c52006-08-17 18:14:52 -0700355 int plen;
356
357 memset(cfg, 0, sizeof(*cfg));
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800358 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700359
360 if (rt->rt_dst.sa_family != AF_INET)
361 return -EAFNOSUPPORT;
362
363 /*
364 * Check mask for validity:
365 * a) it must be contiguous.
366 * b) destination must have all host bits clear.
367 * c) if application forgot to set correct family (AF_INET),
368 * reject request unless it is absolutely clear i.e.
369 * both family and mask are zero.
370 */
371 plen = 32;
372 addr = sk_extract_addr(&rt->rt_dst);
373 if (!(rt->rt_flags & RTF_HOST)) {
Al Viro81f7bf62006-09-27 18:40:00 -0700374 __be32 mask = sk_extract_addr(&rt->rt_genmask);
Thomas Graf4e902c52006-08-17 18:14:52 -0700375
376 if (rt->rt_genmask.sa_family != AF_INET) {
377 if (mask || rt->rt_genmask.sa_family)
378 return -EAFNOSUPPORT;
379 }
380
381 if (bad_mask(mask, addr))
382 return -EINVAL;
383
384 plen = inet_mask_len(mask);
385 }
386
387 cfg->fc_dst_len = plen;
388 cfg->fc_dst = addr;
389
390 if (cmd != SIOCDELRT) {
391 cfg->fc_nlflags = NLM_F_CREATE;
392 cfg->fc_protocol = RTPROT_BOOT;
393 }
394
395 if (rt->rt_metric)
396 cfg->fc_priority = rt->rt_metric - 1;
397
398 if (rt->rt_flags & RTF_REJECT) {
399 cfg->fc_scope = RT_SCOPE_HOST;
400 cfg->fc_type = RTN_UNREACHABLE;
401 return 0;
402 }
403
404 cfg->fc_scope = RT_SCOPE_NOWHERE;
405 cfg->fc_type = RTN_UNICAST;
406
407 if (rt->rt_dev) {
408 char *colon;
409 struct net_device *dev;
410 char devname[IFNAMSIZ];
411
412 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
413 return -EFAULT;
414
415 devname[IFNAMSIZ-1] = 0;
416 colon = strchr(devname, ':');
417 if (colon)
418 *colon = 0;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800419 dev = __dev_get_by_name(net, devname);
Thomas Graf4e902c52006-08-17 18:14:52 -0700420 if (!dev)
421 return -ENODEV;
422 cfg->fc_oif = dev->ifindex;
423 if (colon) {
424 struct in_ifaddr *ifa;
425 struct in_device *in_dev = __in_dev_get_rtnl(dev);
426 if (!in_dev)
427 return -ENODEV;
428 *colon = ':';
429 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
430 if (strcmp(ifa->ifa_label, devname) == 0)
431 break;
432 if (ifa == NULL)
433 return -ENODEV;
434 cfg->fc_prefsrc = ifa->ifa_local;
435 }
436 }
437
438 addr = sk_extract_addr(&rt->rt_gateway);
439 if (rt->rt_gateway.sa_family == AF_INET && addr) {
440 cfg->fc_gw = addr;
441 if (rt->rt_flags & RTF_GATEWAY &&
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800442 inet_addr_type(net, addr) == RTN_UNICAST)
Thomas Graf4e902c52006-08-17 18:14:52 -0700443 cfg->fc_scope = RT_SCOPE_UNIVERSE;
444 }
445
446 if (cmd == SIOCDELRT)
447 return 0;
448
449 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
450 return -EINVAL;
451
452 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
453 cfg->fc_scope = RT_SCOPE_LINK;
454
455 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
456 struct nlattr *mx;
457 int len = 0;
458
459 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900460 if (mx == NULL)
Thomas Graf4e902c52006-08-17 18:14:52 -0700461 return -ENOMEM;
462
463 if (rt->rt_flags & RTF_MTU)
464 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
465
466 if (rt->rt_flags & RTF_WINDOW)
467 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
468
469 if (rt->rt_flags & RTF_IRTT)
470 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
471
472 cfg->fc_mx = mx;
473 cfg->fc_mx_len = len;
474 }
475
476 return 0;
477}
478
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479/*
480 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
481 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900482
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800483int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484{
Thomas Graf4e902c52006-08-17 18:14:52 -0700485 struct fib_config cfg;
486 struct rtentry rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488
489 switch (cmd) {
490 case SIOCADDRT: /* Add a route */
491 case SIOCDELRT: /* Delete a route */
492 if (!capable(CAP_NET_ADMIN))
493 return -EPERM;
Thomas Graf4e902c52006-08-17 18:14:52 -0700494
495 if (copy_from_user(&rt, arg, sizeof(rt)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496 return -EFAULT;
Thomas Graf4e902c52006-08-17 18:14:52 -0700497
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 rtnl_lock();
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800499 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 if (err == 0) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700501 struct fib_table *tb;
502
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503 if (cmd == SIOCDELRT) {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800504 tb = fib_get_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505 if (tb)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000506 err = fib_table_delete(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700507 else
508 err = -ESRCH;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509 } else {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800510 tb = fib_new_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511 if (tb)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000512 err = fib_table_insert(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700513 else
514 err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700516
517 /* allocated by rtentry_to_fib_config() */
518 kfree(cfg.fc_mx);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 }
520 rtnl_unlock();
521 return err;
522 }
523 return -EINVAL;
524}
525
Patrick McHardyef7c79e2007-06-05 12:38:30 -0700526const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
Thomas Graf4e902c52006-08-17 18:14:52 -0700527 [RTA_DST] = { .type = NLA_U32 },
528 [RTA_SRC] = { .type = NLA_U32 },
529 [RTA_IIF] = { .type = NLA_U32 },
530 [RTA_OIF] = { .type = NLA_U32 },
531 [RTA_GATEWAY] = { .type = NLA_U32 },
532 [RTA_PRIORITY] = { .type = NLA_U32 },
533 [RTA_PREFSRC] = { .type = NLA_U32 },
534 [RTA_METRICS] = { .type = NLA_NESTED },
Thomas Graf5176f912006-08-26 20:13:18 -0700535 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
Thomas Graf4e902c52006-08-17 18:14:52 -0700536 [RTA_FLOW] = { .type = NLA_U32 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700537};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800539static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
540 struct nlmsghdr *nlh, struct fib_config *cfg)
Thomas Graf4e902c52006-08-17 18:14:52 -0700541{
542 struct nlattr *attr;
543 int err, remaining;
544 struct rtmsg *rtm;
545
546 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
547 if (err < 0)
548 goto errout;
549
550 memset(cfg, 0, sizeof(*cfg));
551
552 rtm = nlmsg_data(nlh);
Thomas Graf4e902c52006-08-17 18:14:52 -0700553 cfg->fc_dst_len = rtm->rtm_dst_len;
Thomas Graf4e902c52006-08-17 18:14:52 -0700554 cfg->fc_tos = rtm->rtm_tos;
555 cfg->fc_table = rtm->rtm_table;
556 cfg->fc_protocol = rtm->rtm_protocol;
557 cfg->fc_scope = rtm->rtm_scope;
558 cfg->fc_type = rtm->rtm_type;
559 cfg->fc_flags = rtm->rtm_flags;
560 cfg->fc_nlflags = nlh->nlmsg_flags;
561
562 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
563 cfg->fc_nlinfo.nlh = nlh;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800564 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700565
Thomas Grafa0ee18b2007-03-24 20:32:54 -0700566 if (cfg->fc_type > RTN_MAX) {
567 err = -EINVAL;
568 goto errout;
569 }
570
Thomas Graf4e902c52006-08-17 18:14:52 -0700571 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200572 switch (nla_type(attr)) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700573 case RTA_DST:
Al Viro17fb2c62006-09-26 22:15:25 -0700574 cfg->fc_dst = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700575 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700576 case RTA_OIF:
577 cfg->fc_oif = nla_get_u32(attr);
578 break;
579 case RTA_GATEWAY:
Al Viro17fb2c62006-09-26 22:15:25 -0700580 cfg->fc_gw = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700581 break;
582 case RTA_PRIORITY:
583 cfg->fc_priority = nla_get_u32(attr);
584 break;
585 case RTA_PREFSRC:
Al Viro17fb2c62006-09-26 22:15:25 -0700586 cfg->fc_prefsrc = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700587 break;
588 case RTA_METRICS:
589 cfg->fc_mx = nla_data(attr);
590 cfg->fc_mx_len = nla_len(attr);
591 break;
592 case RTA_MULTIPATH:
593 cfg->fc_mp = nla_data(attr);
594 cfg->fc_mp_len = nla_len(attr);
595 break;
596 case RTA_FLOW:
597 cfg->fc_flow = nla_get_u32(attr);
598 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700599 case RTA_TABLE:
600 cfg->fc_table = nla_get_u32(attr);
601 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602 }
603 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700604
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700606errout:
607 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608}
609
Jianjun Kong6ed25332008-11-03 00:25:16 -0800610static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900612 struct net *net = sock_net(skb->sk);
Thomas Graf4e902c52006-08-17 18:14:52 -0700613 struct fib_config cfg;
614 struct fib_table *tb;
615 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800617 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700618 if (err < 0)
619 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620
Denis V. Lunev8ad49422008-01-10 03:24:11 -0800621 tb = fib_get_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700622 if (tb == NULL) {
623 err = -ESRCH;
624 goto errout;
625 }
626
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000627 err = fib_table_delete(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700628errout:
629 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630}
631
Jianjun Kong6ed25332008-11-03 00:25:16 -0800632static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900634 struct net *net = sock_net(skb->sk);
Thomas Graf4e902c52006-08-17 18:14:52 -0700635 struct fib_config cfg;
636 struct fib_table *tb;
637 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800639 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700640 if (err < 0)
641 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642
Denis V. Lunev226b0b4a52008-01-10 03:30:24 -0800643 tb = fib_new_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700644 if (tb == NULL) {
645 err = -ENOBUFS;
646 goto errout;
647 }
648
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000649 err = fib_table_insert(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700650errout:
651 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652}
653
Thomas Graf63f34442007-03-22 11:55:17 -0700654static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900656 struct net *net = sock_net(skb->sk);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700657 unsigned int h, s_h;
658 unsigned int e = 0, s_e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700660 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800661 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700662 int dumped = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663
Thomas Grafbe403ea2006-08-17 18:15:17 -0700664 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
665 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666 return ip_rt_dump(skb, cb);
667
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700668 s_h = cb->args[0];
669 s_e = cb->args[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700671 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
672 e = 0;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800673 head = &net->ipv4.fib_table_hash[h];
674 hlist_for_each_entry(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700675 if (e < s_e)
676 goto next;
677 if (dumped)
678 memset(&cb->args[2], 0, sizeof(cb->args) -
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900679 2 * sizeof(cb->args[0]));
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000680 if (fib_table_dump(tb, skb, cb) < 0)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700681 goto out;
682 dumped = 1;
683next:
684 e++;
685 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700687out:
688 cb->args[1] = e;
689 cb->args[0] = h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690
691 return skb->len;
692}
693
694/* Prepare and feed intra-kernel routing request.
695 Really, it should be netlink message, but :-( netlink
696 can be not configured, so that we feed it directly
697 to fib engine. It is legal, because all events occur
698 only when netlink is already locked.
699 */
700
Al Viro81f7bf62006-09-27 18:40:00 -0700701static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900703 struct net *net = dev_net(ifa->ifa_dev->dev);
Thomas Graf4e902c52006-08-17 18:14:52 -0700704 struct fib_table *tb;
705 struct fib_config cfg = {
706 .fc_protocol = RTPROT_KERNEL,
707 .fc_type = type,
708 .fc_dst = dst,
709 .fc_dst_len = dst_len,
710 .fc_prefsrc = ifa->ifa_local,
711 .fc_oif = ifa->ifa_dev->dev->ifindex,
712 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800713 .fc_nlinfo = {
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800714 .nl_net = net,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800715 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700716 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717
718 if (type == RTN_UNICAST)
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800719 tb = fib_new_table(net, RT_TABLE_MAIN);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 else
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800721 tb = fib_new_table(net, RT_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722
723 if (tb == NULL)
724 return;
725
Thomas Graf4e902c52006-08-17 18:14:52 -0700726 cfg.fc_table = tb->tb_id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727
Thomas Graf4e902c52006-08-17 18:14:52 -0700728 if (type != RTN_LOCAL)
729 cfg.fc_scope = RT_SCOPE_LINK;
730 else
731 cfg.fc_scope = RT_SCOPE_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732
733 if (cmd == RTM_NEWROUTE)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000734 fib_table_insert(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 else
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000736 fib_table_delete(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737}
738
Jamal Hadi Salim0ff60a42005-11-22 14:47:37 -0800739void fib_add_ifaddr(struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740{
741 struct in_device *in_dev = ifa->ifa_dev;
742 struct net_device *dev = in_dev->dev;
743 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700744 __be32 mask = ifa->ifa_mask;
745 __be32 addr = ifa->ifa_local;
746 __be32 prefix = ifa->ifa_address&mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747
748 if (ifa->ifa_flags&IFA_F_SECONDARY) {
749 prim = inet_ifa_byprefix(in_dev, prefix, mask);
750 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800751 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 return;
753 }
754 }
755
756 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
757
758 if (!(dev->flags&IFF_UP))
759 return;
760
761 /* Add broadcast address, if it is explicitly assigned. */
Al Viroa144ea42006-09-28 18:00:55 -0700762 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
764
Joe Perchesf97c1e02007-12-16 13:45:43 -0800765 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766 (prefix != addr || ifa->ifa_prefixlen < 32)) {
767 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
768 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
769
770 /* Add network specific broadcasts, when it takes a sense */
771 if (ifa->ifa_prefixlen < 31) {
772 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
773 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
774 }
775 }
776}
777
778static void fib_del_ifaddr(struct in_ifaddr *ifa)
779{
780 struct in_device *in_dev = ifa->ifa_dev;
781 struct net_device *dev = in_dev->dev;
782 struct in_ifaddr *ifa1;
783 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700784 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
785 __be32 any = ifa->ifa_address&ifa->ifa_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786#define LOCAL_OK 1
787#define BRD_OK 2
788#define BRD0_OK 4
789#define BRD1_OK 8
790 unsigned ok = 0;
791
792 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
793 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
794 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
795 else {
796 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
797 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800798 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799 return;
800 }
801 }
802
803 /* Deletion is more complicated than add.
804 We should take care of not to delete too much :-)
805
806 Scan address list to be sure that addresses are really gone.
807 */
808
809 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
810 if (ifa->ifa_local == ifa1->ifa_local)
811 ok |= LOCAL_OK;
812 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
813 ok |= BRD_OK;
814 if (brd == ifa1->ifa_broadcast)
815 ok |= BRD1_OK;
816 if (any == ifa1->ifa_broadcast)
817 ok |= BRD0_OK;
818 }
819
820 if (!(ok&BRD_OK))
821 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
822 if (!(ok&BRD1_OK))
823 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
824 if (!(ok&BRD0_OK))
825 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
826 if (!(ok&LOCAL_OK)) {
827 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
828
829 /* Check, that this local address finally disappeared. */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900830 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831 /* And the last, but not the least thing.
832 We must flush stray FIB entries.
833
834 First of all, we scan fib_info list searching
835 for stray nexthop entries, then ignite fib_flush.
836 */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900837 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
838 fib_flush(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 }
840 }
841#undef LOCAL_OK
842#undef BRD_OK
843#undef BRD0_OK
844#undef BRD1_OK
845}
846
Robert Olsson246955f2005-06-20 13:36:39 -0700847static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
848{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900849
Robert Olsson246955f2005-06-20 13:36:39 -0700850 struct fib_result res;
Thomas Graf5f3008932006-11-09 15:21:41 -0800851 struct flowi fl = { .mark = frn->fl_mark,
Thomas Graf47dcf0c2006-11-09 15:20:38 -0800852 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
Robert Olsson246955f2005-06-20 13:36:39 -0700853 .tos = frn->fl_tos,
854 .scope = frn->fl_scope } } };
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700855
Sergey Vlasov912a41a2007-04-27 02:17:19 -0700856#ifdef CONFIG_IP_MULTIPLE_TABLES
857 res.r = NULL;
858#endif
859
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700860 frn->err = -ENOENT;
Robert Olsson246955f2005-06-20 13:36:39 -0700861 if (tb) {
862 local_bh_disable();
863
864 frn->tb_id = tb->tb_id;
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000865 frn->err = fib_table_lookup(tb, &fl, &res);
Robert Olsson246955f2005-06-20 13:36:39 -0700866
867 if (!frn->err) {
868 frn->prefixlen = res.prefixlen;
869 frn->nh_sel = res.nh_sel;
870 frn->type = res.type;
871 frn->scope = res.scope;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700872 fib_res_put(&res);
Robert Olsson246955f2005-06-20 13:36:39 -0700873 }
874 local_bh_enable();
875 }
876}
877
David S. Miller28f7b0362007-10-10 21:32:39 -0700878static void nl_fib_input(struct sk_buff *skb)
Robert Olsson246955f2005-06-20 13:36:39 -0700879{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800880 struct net *net;
Robert Olsson246955f2005-06-20 13:36:39 -0700881 struct fib_result_nl *frn;
David S. Miller28f7b0362007-10-10 21:32:39 -0700882 struct nlmsghdr *nlh;
Robert Olsson246955f2005-06-20 13:36:39 -0700883 struct fib_table *tb;
David S. Miller28f7b0362007-10-10 21:32:39 -0700884 u32 pid;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700885
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900886 net = sock_net(skb->sk);
Arnaldo Carvalho de Melob529ccf2007-04-25 19:08:35 -0700887 nlh = nlmsg_hdr(skb);
Thomas Grafea865752005-12-01 14:30:00 -0800888 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
Denis V. Lunevd883a032007-12-21 02:01:53 -0800889 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
Thomas Grafea865752005-12-01 14:30:00 -0800890 return;
Denis V. Lunevd883a032007-12-21 02:01:53 -0800891
892 skb = skb_clone(skb, GFP_KERNEL);
893 if (skb == NULL)
894 return;
895 nlh = nlmsg_hdr(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900896
Robert Olsson246955f2005-06-20 13:36:39 -0700897 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800898 tb = fib_get_table(net, frn->tb_id_in);
Robert Olsson246955f2005-06-20 13:36:39 -0700899
900 nl_fib_lookup(frn, tb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900901
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700902 pid = NETLINK_CB(skb).pid; /* pid of sending process */
Robert Olsson246955f2005-06-20 13:36:39 -0700903 NETLINK_CB(skb).pid = 0; /* from kernel */
Patrick McHardyac6d4392005-08-14 19:29:52 -0700904 NETLINK_CB(skb).dst_group = 0; /* unicast */
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800905 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900906}
Robert Olsson246955f2005-06-20 13:36:39 -0700907
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +0000908static int __net_init nl_fib_lookup_init(struct net *net)
Robert Olsson246955f2005-06-20 13:36:39 -0700909{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800910 struct sock *sk;
911 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
912 nl_fib_input, NULL, THIS_MODULE);
913 if (sk == NULL)
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800914 return -EAFNOSUPPORT;
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800915 net->ipv4.fibnl = sk;
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800916 return 0;
917}
918
919static void nl_fib_lookup_exit(struct net *net)
920{
Denis V. Lunevb7c6ba62008-01-28 14:41:19 -0800921 netlink_kernel_release(net->ipv4.fibnl);
Denis V. Lunev775516b2008-01-18 23:55:19 -0800922 net->ipv4.fibnl = NULL;
Robert Olsson246955f2005-06-20 13:36:39 -0700923}
924
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000925static void fib_disable_ip(struct net_device *dev, int force, int delay)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926{
Denis V. Lunev85326fa2008-01-31 18:48:47 -0800927 if (fib_sync_down_dev(dev, force))
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900928 fib_flush(dev_net(dev));
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000929 rt_cache_flush(dev_net(dev), delay);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 arp_ifdown(dev);
931}
932
933static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
934{
Jianjun Kong6ed25332008-11-03 00:25:16 -0800935 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700936 struct net_device *dev = ifa->ifa_dev->dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937
938 switch (event) {
939 case NETDEV_UP:
940 fib_add_ifaddr(ifa);
941#ifdef CONFIG_IP_ROUTE_MULTIPATH
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700942 fib_sync_up(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943#endif
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700944 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945 break;
946 case NETDEV_DOWN:
947 fib_del_ifaddr(ifa);
Jayachandran C9fcc2e82005-10-27 15:10:01 -0700948 if (ifa->ifa_dev->ifa_list == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949 /* Last address was deleted from this interface.
950 Disable IP.
951 */
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000952 fib_disable_ip(dev, 1, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 } else {
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700954 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 }
956 break;
957 }
958 return NOTIFY_DONE;
959}
960
961static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
962{
963 struct net_device *dev = ptr;
Herbert Xue5ed6392005-10-03 14:35:55 -0700964 struct in_device *in_dev = __in_dev_get_rtnl(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965
966 if (event == NETDEV_UNREGISTER) {
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000967 fib_disable_ip(dev, 2, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 return NOTIFY_DONE;
969 }
970
971 if (!in_dev)
972 return NOTIFY_DONE;
973
974 switch (event) {
975 case NETDEV_UP:
976 for_ifa(in_dev) {
977 fib_add_ifaddr(ifa);
978 } endfor_ifa(in_dev);
979#ifdef CONFIG_IP_ROUTE_MULTIPATH
980 fib_sync_up(dev);
981#endif
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700982 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 break;
984 case NETDEV_DOWN:
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000985 fib_disable_ip(dev, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 break;
987 case NETDEV_CHANGEMTU:
988 case NETDEV_CHANGE:
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700989 rt_cache_flush(dev_net(dev), 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700990 break;
Eric W. Biedermana5ee1552009-11-29 15:45:58 +0000991 case NETDEV_UNREGISTER_BATCH:
992 rt_cache_flush_batch();
993 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994 }
995 return NOTIFY_DONE;
996}
997
998static struct notifier_block fib_inetaddr_notifier = {
Jianjun Kong6ed25332008-11-03 00:25:16 -0800999 .notifier_call = fib_inetaddr_event,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000};
1001
1002static struct notifier_block fib_netdev_notifier = {
Jianjun Kong6ed25332008-11-03 00:25:16 -08001003 .notifier_call = fib_netdev_event,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004};
1005
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001006static int __net_init ip_fib_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007{
Denis V. Lunevdce5cbe2008-01-31 18:44:53 -08001008 int err;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001009 unsigned int i;
1010
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001011 net->ipv4.fib_table_hash = kzalloc(
1012 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
1013 if (net->ipv4.fib_table_hash == NULL)
1014 return -ENOMEM;
1015
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001016 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001017 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -08001018
Denis V. Lunevdce5cbe2008-01-31 18:44:53 -08001019 err = fib4_rules_init(net);
1020 if (err < 0)
1021 goto fail;
1022 return 0;
1023
1024fail:
1025 kfree(net->ipv4.fib_table_hash);
1026 return err;
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001027}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001029static void ip_fib_net_exit(struct net *net)
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001030{
1031 unsigned int i;
Thomas Graf63f34442007-03-22 11:55:17 -07001032
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001033#ifdef CONFIG_IP_MULTIPLE_TABLES
1034 fib4_rules_exit(net);
1035#endif
1036
1037 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1038 struct fib_table *tb;
1039 struct hlist_head *head;
1040 struct hlist_node *node, *tmp;
1041
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001042 head = &net->ipv4.fib_table_hash[i];
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001043 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1044 hlist_del(node);
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001045 fib_table_flush(tb);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001046 kfree(tb);
1047 }
1048 }
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001049 kfree(net->ipv4.fib_table_hash);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001050}
1051
1052static int __net_init fib_net_init(struct net *net)
1053{
1054 int error;
1055
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001056 error = ip_fib_net_init(net);
1057 if (error < 0)
1058 goto out;
1059 error = nl_fib_lookup_init(net);
1060 if (error < 0)
1061 goto out_nlfl;
1062 error = fib_proc_init(net);
1063 if (error < 0)
1064 goto out_proc;
1065out:
1066 return error;
1067
1068out_proc:
1069 nl_fib_lookup_exit(net);
1070out_nlfl:
1071 ip_fib_net_exit(net);
1072 goto out;
1073}
1074
1075static void __net_exit fib_net_exit(struct net *net)
1076{
1077 fib_proc_exit(net);
1078 nl_fib_lookup_exit(net);
1079 ip_fib_net_exit(net);
1080}
1081
1082static struct pernet_operations fib_net_ops = {
1083 .init = fib_net_init,
1084 .exit = fib_net_exit,
1085};
1086
1087void __init ip_fib_init(void)
1088{
Thomas Graf63f34442007-03-22 11:55:17 -07001089 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1090 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1091 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001092
1093 register_pernet_subsys(&fib_net_ops);
1094 register_netdevice_notifier(&fib_netdev_notifier);
1095 register_inetaddr_notifier(&fib_inetaddr_notifier);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001096
1097 fib_hash_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098}