blob: 981f3c59b33412b8674b1974ca1b4877cde97511 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080020#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/types.h>
22#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020030#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031#include <linux/netdevice.h>
Thomas Graf18237302006-08-04 23:04:54 -070032#include <linux/if_addr.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/if_arp.h>
34#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/init.h>
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070036#include <linux/list.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090037#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044#include <net/arp.h>
45#include <net/ip_fib.h>
Thomas Graf63f34442007-03-22 11:55:17 -070046#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#ifndef CONFIG_IP_MULTIPLE_TABLES
49
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -080050static int __net_init fib4_rules_init(struct net *net)
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080051{
Denis V. Lunev93456b62008-01-10 03:23:38 -080052 struct fib_table *local_table, *main_table;
53
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080054 local_table = fib_hash_table(RT_TABLE_LOCAL);
Denis V. Lunev93456b62008-01-10 03:23:38 -080055 if (local_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080056 return -ENOMEM;
57
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080058 main_table = fib_hash_table(RT_TABLE_MAIN);
Denis V. Lunev93456b62008-01-10 03:23:38 -080059 if (main_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080060 goto fail;
61
Denis V. Lunev93456b62008-01-10 03:23:38 -080062 hlist_add_head_rcu(&local_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080063 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
Denis V. Lunev93456b62008-01-10 03:23:38 -080064 hlist_add_head_rcu(&main_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080065 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080066 return 0;
67
68fail:
Denis V. Lunev93456b62008-01-10 03:23:38 -080069 kfree(local_table);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080070 return -ENOMEM;
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080071}
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#else
73
Denis V. Lunev8ad49422008-01-10 03:24:11 -080074struct fib_table *fib_new_table(struct net *net, u32 id)
Linus Torvalds1da177e2005-04-16 15:20:36 -070075{
76 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070077 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070078
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070079 if (id == 0)
80 id = RT_TABLE_MAIN;
Denis V. Lunev8ad49422008-01-10 03:24:11 -080081 tb = fib_get_table(net, id);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070082 if (tb)
83 return tb;
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080084
85 tb = fib_hash_table(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -070086 if (!tb)
87 return NULL;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070088 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080089 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
Linus Torvalds1da177e2005-04-16 15:20:36 -070090 return tb;
91}
92
Denis V. Lunev8ad49422008-01-10 03:24:11 -080093struct fib_table *fib_get_table(struct net *net, u32 id)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070094{
95 struct fib_table *tb;
96 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080097 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070098 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070099
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700100 if (id == 0)
101 id = RT_TABLE_MAIN;
102 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800103
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700104 rcu_read_lock();
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800105 head = &net->ipv4.fib_table_hash[h];
106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700107 if (tb->tb_id == id) {
108 rcu_read_unlock();
109 return tb;
110 }
111 }
112 rcu_read_unlock();
113 return NULL;
114}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#endif /* CONFIG_IP_MULTIPLE_TABLES */
116
Denis V. Lunev010278e2008-01-22 22:04:04 -0800117void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800119{
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126#endif
Denis V. Lunev010278e2008-01-22 22:04:04 -0800127 tb = fib_get_table(net, table);
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000129 fib_table_select_default(tb, flp, res);
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800130}
131
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800132static void fib_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133{
134 int flushed = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700136 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800137 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700138 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800141 head = &net->ipv4.fib_table_hash[h];
142 hlist_for_each_entry(tb, node, head, tb_hlist)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000143 flushed += fib_table_flush(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145
146 if (flushed)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700147 rt_cache_flush(net, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148}
149
150/*
151 * Find the first device with a given source address.
152 */
153
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800154struct net_device * ip_dev_find(struct net *net, __be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155{
Tom Herbert4465b462010-05-23 19:54:12 +0000156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } },
157 .flags = FLOWI_FLAG_MATCH_ANY_IIF };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 struct fib_result res;
159 struct net_device *dev = NULL;
160
161#ifdef CONFIG_IP_MULTIPLE_TABLES
162 res.r = NULL;
163#endif
164
Tom Herbert4465b462010-05-23 19:54:12 +0000165 if (fib_lookup(net, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 return NULL;
167 if (res.type != RTN_LOCAL)
168 goto out;
169 dev = FIB_RES_DEV(res);
170
171 if (dev)
172 dev_hold(dev);
173out:
174 fib_res_put(&res);
175 return dev;
176}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000177EXPORT_SYMBOL(ip_dev_find);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800179/*
180 * Find address type as if only "dev" was present in the system. If
181 * on_dev is NULL then all interfaces are taken into consideration.
182 */
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800183static inline unsigned __inet_dev_addr_type(struct net *net,
184 const struct net_device *dev,
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800185 __be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186{
187 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
188 struct fib_result res;
189 unsigned ret = RTN_BROADCAST;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700190 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191
Jan Engelhardt1e637c72008-01-21 03:18:08 -0800192 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 return RTN_BROADCAST;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800194 if (ipv4_is_multicast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195 return RTN_MULTICAST;
196
197#ifdef CONFIG_IP_MULTIPLE_TABLES
198 res.r = NULL;
199#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900200
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800201 local_table = fib_get_table(net, RT_TABLE_LOCAL);
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700202 if (local_table) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203 ret = RTN_UNICAST;
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000204 if (!fib_table_lookup(local_table, &fl, &res)) {
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800205 if (!dev || dev == res.fi->fib_dev)
206 ret = res.type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 fib_res_put(&res);
208 }
209 }
210 return ret;
211}
212
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800213unsigned int inet_addr_type(struct net *net, __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800214{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800215 return __inet_dev_addr_type(net, NULL, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800216}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000217EXPORT_SYMBOL(inet_addr_type);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800218
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800219unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
220 __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800221{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800222 return __inet_dev_addr_type(net, dev, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800223}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000224EXPORT_SYMBOL(inet_dev_addr_type);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800225
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226/* Given (packet source, input interface) and optional (dst, oif, tos):
227 - (main) check, that source is valid i.e. not broadcast or our local
228 address.
229 - figure out what "logical" interface this packet arrived
230 and calculate "specific destination" address.
231 - check, that packet arrived from expected physical interface.
232 */
233
Al Virod9c9df82006-09-26 21:28:14 -0700234int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
jamalb0c110c2009-10-18 02:12:33 +0000235 struct net_device *dev, __be32 *spec_dst,
236 u32 *itag, u32 mark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237{
238 struct in_device *in_dev;
239 struct flowi fl = { .nl_u = { .ip4_u =
240 { .daddr = src,
241 .saddr = dst,
242 .tos = tos } },
jamalb0c110c2009-10-18 02:12:33 +0000243 .mark = mark,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 .iif = oif };
jamalb0c110c2009-10-18 02:12:33 +0000245
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 struct fib_result res;
Patrick McHardy8153a102009-12-03 01:25:58 +0000247 int no_addr, rpf, accept_local;
David S. Miller6f86b322010-09-06 22:36:19 -0700248 bool dev_match;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249 int ret;
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800250 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251
Patrick McHardy8153a102009-12-03 01:25:58 +0000252 no_addr = rpf = accept_local = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 rcu_read_lock();
Herbert Xue5ed6392005-10-03 14:35:55 -0700254 in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 if (in_dev) {
256 no_addr = in_dev->ifa_list == NULL;
257 rpf = IN_DEV_RPFILTER(in_dev);
Patrick McHardy8153a102009-12-03 01:25:58 +0000258 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
Jamal Hadi Salim28f6aee2009-12-25 17:30:22 -0800259 if (mark && !IN_DEV_SRC_VMARK(in_dev))
260 fl.mark = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 }
262 rcu_read_unlock();
263
264 if (in_dev == NULL)
265 goto e_inval;
266
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900267 net = dev_net(dev);
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800268 if (fib_lookup(net, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 goto last_resort;
Patrick McHardy8153a102009-12-03 01:25:58 +0000270 if (res.type != RTN_UNICAST) {
271 if (res.type != RTN_LOCAL || !accept_local)
272 goto e_inval_res;
273 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274 *spec_dst = FIB_RES_PREFSRC(res);
275 fib_combine_itag(itag, &res);
David S. Miller6f86b322010-09-06 22:36:19 -0700276 dev_match = false;
277
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Miller6f86b322010-09-06 22:36:19 -0700279 for (ret = 0; ret < res.fi->fib_nhs; ret++) {
280 struct fib_nh *nh = &res.fi->fib_nh[ret];
281
282 if (nh->nh_dev == dev) {
283 dev_match = true;
284 break;
285 }
286 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287#else
288 if (FIB_RES_DEV(res) == dev)
David S. Miller6f86b322010-09-06 22:36:19 -0700289 dev_match = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290#endif
David S. Miller6f86b322010-09-06 22:36:19 -0700291 if (dev_match) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
293 fib_res_put(&res);
294 return ret;
295 }
296 fib_res_put(&res);
297 if (no_addr)
298 goto last_resort;
Stephen Hemmingerc1cf8422009-02-20 08:25:36 +0000299 if (rpf == 1)
Eric Dumazetb5f7e752010-06-02 12:05:27 +0000300 goto e_rpf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301 fl.oif = dev->ifindex;
302
303 ret = 0;
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800304 if (fib_lookup(net, &fl, &res) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305 if (res.type == RTN_UNICAST) {
306 *spec_dst = FIB_RES_PREFSRC(res);
307 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
308 }
309 fib_res_put(&res);
310 }
311 return ret;
312
313last_resort:
314 if (rpf)
Eric Dumazetb5f7e752010-06-02 12:05:27 +0000315 goto e_rpf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
317 *itag = 0;
318 return 0;
319
320e_inval_res:
321 fib_res_put(&res);
322e_inval:
323 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +0000324e_rpf:
325 return -EXDEV;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326}
327
Al Viro81f7bf62006-09-27 18:40:00 -0700328static inline __be32 sk_extract_addr(struct sockaddr *addr)
Thomas Graf4e902c52006-08-17 18:14:52 -0700329{
330 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
331}
332
333static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
334{
335 struct nlattr *nla;
336
337 nla = (struct nlattr *) ((char *) mx + len);
338 nla->nla_type = type;
339 nla->nla_len = nla_attr_size(4);
340 *(u32 *) nla_data(nla) = value;
341
342 return len + nla_total_size(4);
343}
344
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800345static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
Thomas Graf4e902c52006-08-17 18:14:52 -0700346 struct fib_config *cfg)
347{
Al Viro6d85c102006-09-26 22:15:46 -0700348 __be32 addr;
Thomas Graf4e902c52006-08-17 18:14:52 -0700349 int plen;
350
351 memset(cfg, 0, sizeof(*cfg));
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800352 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700353
354 if (rt->rt_dst.sa_family != AF_INET)
355 return -EAFNOSUPPORT;
356
357 /*
358 * Check mask for validity:
359 * a) it must be contiguous.
360 * b) destination must have all host bits clear.
361 * c) if application forgot to set correct family (AF_INET),
362 * reject request unless it is absolutely clear i.e.
363 * both family and mask are zero.
364 */
365 plen = 32;
366 addr = sk_extract_addr(&rt->rt_dst);
367 if (!(rt->rt_flags & RTF_HOST)) {
Al Viro81f7bf62006-09-27 18:40:00 -0700368 __be32 mask = sk_extract_addr(&rt->rt_genmask);
Thomas Graf4e902c52006-08-17 18:14:52 -0700369
370 if (rt->rt_genmask.sa_family != AF_INET) {
371 if (mask || rt->rt_genmask.sa_family)
372 return -EAFNOSUPPORT;
373 }
374
375 if (bad_mask(mask, addr))
376 return -EINVAL;
377
378 plen = inet_mask_len(mask);
379 }
380
381 cfg->fc_dst_len = plen;
382 cfg->fc_dst = addr;
383
384 if (cmd != SIOCDELRT) {
385 cfg->fc_nlflags = NLM_F_CREATE;
386 cfg->fc_protocol = RTPROT_BOOT;
387 }
388
389 if (rt->rt_metric)
390 cfg->fc_priority = rt->rt_metric - 1;
391
392 if (rt->rt_flags & RTF_REJECT) {
393 cfg->fc_scope = RT_SCOPE_HOST;
394 cfg->fc_type = RTN_UNREACHABLE;
395 return 0;
396 }
397
398 cfg->fc_scope = RT_SCOPE_NOWHERE;
399 cfg->fc_type = RTN_UNICAST;
400
401 if (rt->rt_dev) {
402 char *colon;
403 struct net_device *dev;
404 char devname[IFNAMSIZ];
405
406 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
407 return -EFAULT;
408
409 devname[IFNAMSIZ-1] = 0;
410 colon = strchr(devname, ':');
411 if (colon)
412 *colon = 0;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800413 dev = __dev_get_by_name(net, devname);
Thomas Graf4e902c52006-08-17 18:14:52 -0700414 if (!dev)
415 return -ENODEV;
416 cfg->fc_oif = dev->ifindex;
417 if (colon) {
418 struct in_ifaddr *ifa;
419 struct in_device *in_dev = __in_dev_get_rtnl(dev);
420 if (!in_dev)
421 return -ENODEV;
422 *colon = ':';
423 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
424 if (strcmp(ifa->ifa_label, devname) == 0)
425 break;
426 if (ifa == NULL)
427 return -ENODEV;
428 cfg->fc_prefsrc = ifa->ifa_local;
429 }
430 }
431
432 addr = sk_extract_addr(&rt->rt_gateway);
433 if (rt->rt_gateway.sa_family == AF_INET && addr) {
434 cfg->fc_gw = addr;
435 if (rt->rt_flags & RTF_GATEWAY &&
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800436 inet_addr_type(net, addr) == RTN_UNICAST)
Thomas Graf4e902c52006-08-17 18:14:52 -0700437 cfg->fc_scope = RT_SCOPE_UNIVERSE;
438 }
439
440 if (cmd == SIOCDELRT)
441 return 0;
442
443 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
444 return -EINVAL;
445
446 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
447 cfg->fc_scope = RT_SCOPE_LINK;
448
449 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
450 struct nlattr *mx;
451 int len = 0;
452
453 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900454 if (mx == NULL)
Thomas Graf4e902c52006-08-17 18:14:52 -0700455 return -ENOMEM;
456
457 if (rt->rt_flags & RTF_MTU)
458 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
459
460 if (rt->rt_flags & RTF_WINDOW)
461 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
462
463 if (rt->rt_flags & RTF_IRTT)
464 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
465
466 cfg->fc_mx = mx;
467 cfg->fc_mx_len = len;
468 }
469
470 return 0;
471}
472
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473/*
474 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
475 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900476
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800477int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478{
Thomas Graf4e902c52006-08-17 18:14:52 -0700479 struct fib_config cfg;
480 struct rtentry rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482
483 switch (cmd) {
484 case SIOCADDRT: /* Add a route */
485 case SIOCDELRT: /* Delete a route */
486 if (!capable(CAP_NET_ADMIN))
487 return -EPERM;
Thomas Graf4e902c52006-08-17 18:14:52 -0700488
489 if (copy_from_user(&rt, arg, sizeof(rt)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490 return -EFAULT;
Thomas Graf4e902c52006-08-17 18:14:52 -0700491
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492 rtnl_lock();
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800493 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494 if (err == 0) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700495 struct fib_table *tb;
496
Linus Torvalds1da177e2005-04-16 15:20:36 -0700497 if (cmd == SIOCDELRT) {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800498 tb = fib_get_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499 if (tb)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000500 err = fib_table_delete(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700501 else
502 err = -ESRCH;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503 } else {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800504 tb = fib_new_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505 if (tb)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000506 err = fib_table_insert(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700507 else
508 err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700510
511 /* allocated by rtentry_to_fib_config() */
512 kfree(cfg.fc_mx);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513 }
514 rtnl_unlock();
515 return err;
516 }
517 return -EINVAL;
518}
519
Patrick McHardyef7c79e2007-06-05 12:38:30 -0700520const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
Thomas Graf4e902c52006-08-17 18:14:52 -0700521 [RTA_DST] = { .type = NLA_U32 },
522 [RTA_SRC] = { .type = NLA_U32 },
523 [RTA_IIF] = { .type = NLA_U32 },
524 [RTA_OIF] = { .type = NLA_U32 },
525 [RTA_GATEWAY] = { .type = NLA_U32 },
526 [RTA_PRIORITY] = { .type = NLA_U32 },
527 [RTA_PREFSRC] = { .type = NLA_U32 },
528 [RTA_METRICS] = { .type = NLA_NESTED },
Thomas Graf5176f912006-08-26 20:13:18 -0700529 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
Thomas Graf4e902c52006-08-17 18:14:52 -0700530 [RTA_FLOW] = { .type = NLA_U32 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700531};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800533static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
534 struct nlmsghdr *nlh, struct fib_config *cfg)
Thomas Graf4e902c52006-08-17 18:14:52 -0700535{
536 struct nlattr *attr;
537 int err, remaining;
538 struct rtmsg *rtm;
539
540 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
541 if (err < 0)
542 goto errout;
543
544 memset(cfg, 0, sizeof(*cfg));
545
546 rtm = nlmsg_data(nlh);
Thomas Graf4e902c52006-08-17 18:14:52 -0700547 cfg->fc_dst_len = rtm->rtm_dst_len;
Thomas Graf4e902c52006-08-17 18:14:52 -0700548 cfg->fc_tos = rtm->rtm_tos;
549 cfg->fc_table = rtm->rtm_table;
550 cfg->fc_protocol = rtm->rtm_protocol;
551 cfg->fc_scope = rtm->rtm_scope;
552 cfg->fc_type = rtm->rtm_type;
553 cfg->fc_flags = rtm->rtm_flags;
554 cfg->fc_nlflags = nlh->nlmsg_flags;
555
556 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
557 cfg->fc_nlinfo.nlh = nlh;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800558 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700559
Thomas Grafa0ee18b2007-03-24 20:32:54 -0700560 if (cfg->fc_type > RTN_MAX) {
561 err = -EINVAL;
562 goto errout;
563 }
564
Thomas Graf4e902c52006-08-17 18:14:52 -0700565 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200566 switch (nla_type(attr)) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700567 case RTA_DST:
Al Viro17fb2c62006-09-26 22:15:25 -0700568 cfg->fc_dst = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700569 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700570 case RTA_OIF:
571 cfg->fc_oif = nla_get_u32(attr);
572 break;
573 case RTA_GATEWAY:
Al Viro17fb2c62006-09-26 22:15:25 -0700574 cfg->fc_gw = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700575 break;
576 case RTA_PRIORITY:
577 cfg->fc_priority = nla_get_u32(attr);
578 break;
579 case RTA_PREFSRC:
Al Viro17fb2c62006-09-26 22:15:25 -0700580 cfg->fc_prefsrc = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700581 break;
582 case RTA_METRICS:
583 cfg->fc_mx = nla_data(attr);
584 cfg->fc_mx_len = nla_len(attr);
585 break;
586 case RTA_MULTIPATH:
587 cfg->fc_mp = nla_data(attr);
588 cfg->fc_mp_len = nla_len(attr);
589 break;
590 case RTA_FLOW:
591 cfg->fc_flow = nla_get_u32(attr);
592 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700593 case RTA_TABLE:
594 cfg->fc_table = nla_get_u32(attr);
595 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596 }
597 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700598
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700600errout:
601 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602}
603
Jianjun Kong6ed25332008-11-03 00:25:16 -0800604static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900606 struct net *net = sock_net(skb->sk);
Thomas Graf4e902c52006-08-17 18:14:52 -0700607 struct fib_config cfg;
608 struct fib_table *tb;
609 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800611 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700612 if (err < 0)
613 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614
Denis V. Lunev8ad49422008-01-10 03:24:11 -0800615 tb = fib_get_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700616 if (tb == NULL) {
617 err = -ESRCH;
618 goto errout;
619 }
620
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000621 err = fib_table_delete(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700622errout:
623 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624}
625
Jianjun Kong6ed25332008-11-03 00:25:16 -0800626static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900628 struct net *net = sock_net(skb->sk);
Thomas Graf4e902c52006-08-17 18:14:52 -0700629 struct fib_config cfg;
630 struct fib_table *tb;
631 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800633 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700634 if (err < 0)
635 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636
Denis V. Lunev226b0b4a52008-01-10 03:30:24 -0800637 tb = fib_new_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700638 if (tb == NULL) {
639 err = -ENOBUFS;
640 goto errout;
641 }
642
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000643 err = fib_table_insert(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700644errout:
645 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646}
647
Thomas Graf63f34442007-03-22 11:55:17 -0700648static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900650 struct net *net = sock_net(skb->sk);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700651 unsigned int h, s_h;
652 unsigned int e = 0, s_e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700654 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800655 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700656 int dumped = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657
Thomas Grafbe403ea2006-08-17 18:15:17 -0700658 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
659 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 return ip_rt_dump(skb, cb);
661
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700662 s_h = cb->args[0];
663 s_e = cb->args[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700664
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700665 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
666 e = 0;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800667 head = &net->ipv4.fib_table_hash[h];
668 hlist_for_each_entry(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700669 if (e < s_e)
670 goto next;
671 if (dumped)
672 memset(&cb->args[2], 0, sizeof(cb->args) -
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900673 2 * sizeof(cb->args[0]));
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000674 if (fib_table_dump(tb, skb, cb) < 0)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700675 goto out;
676 dumped = 1;
677next:
678 e++;
679 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700681out:
682 cb->args[1] = e;
683 cb->args[0] = h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684
685 return skb->len;
686}
687
688/* Prepare and feed intra-kernel routing request.
689 Really, it should be netlink message, but :-( netlink
690 can be not configured, so that we feed it directly
691 to fib engine. It is legal, because all events occur
692 only when netlink is already locked.
693 */
694
Al Viro81f7bf62006-09-27 18:40:00 -0700695static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900697 struct net *net = dev_net(ifa->ifa_dev->dev);
Thomas Graf4e902c52006-08-17 18:14:52 -0700698 struct fib_table *tb;
699 struct fib_config cfg = {
700 .fc_protocol = RTPROT_KERNEL,
701 .fc_type = type,
702 .fc_dst = dst,
703 .fc_dst_len = dst_len,
704 .fc_prefsrc = ifa->ifa_local,
705 .fc_oif = ifa->ifa_dev->dev->ifindex,
706 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800707 .fc_nlinfo = {
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800708 .nl_net = net,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800709 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700710 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700711
712 if (type == RTN_UNICAST)
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800713 tb = fib_new_table(net, RT_TABLE_MAIN);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 else
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800715 tb = fib_new_table(net, RT_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716
717 if (tb == NULL)
718 return;
719
Thomas Graf4e902c52006-08-17 18:14:52 -0700720 cfg.fc_table = tb->tb_id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721
Thomas Graf4e902c52006-08-17 18:14:52 -0700722 if (type != RTN_LOCAL)
723 cfg.fc_scope = RT_SCOPE_LINK;
724 else
725 cfg.fc_scope = RT_SCOPE_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726
727 if (cmd == RTM_NEWROUTE)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000728 fib_table_insert(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729 else
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000730 fib_table_delete(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731}
732
Jamal Hadi Salim0ff60a42005-11-22 14:47:37 -0800733void fib_add_ifaddr(struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734{
735 struct in_device *in_dev = ifa->ifa_dev;
736 struct net_device *dev = in_dev->dev;
737 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700738 __be32 mask = ifa->ifa_mask;
739 __be32 addr = ifa->ifa_local;
740 __be32 prefix = ifa->ifa_address&mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700741
742 if (ifa->ifa_flags&IFA_F_SECONDARY) {
743 prim = inet_ifa_byprefix(in_dev, prefix, mask);
744 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800745 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 return;
747 }
748 }
749
750 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
751
752 if (!(dev->flags&IFF_UP))
753 return;
754
755 /* Add broadcast address, if it is explicitly assigned. */
Al Viroa144ea42006-09-28 18:00:55 -0700756 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
758
Joe Perchesf97c1e02007-12-16 13:45:43 -0800759 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 (prefix != addr || ifa->ifa_prefixlen < 32)) {
761 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
762 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
763
764 /* Add network specific broadcasts, when it takes a sense */
765 if (ifa->ifa_prefixlen < 31) {
766 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
767 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
768 }
769 }
770}
771
772static void fib_del_ifaddr(struct in_ifaddr *ifa)
773{
774 struct in_device *in_dev = ifa->ifa_dev;
775 struct net_device *dev = in_dev->dev;
776 struct in_ifaddr *ifa1;
777 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700778 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
779 __be32 any = ifa->ifa_address&ifa->ifa_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780#define LOCAL_OK 1
781#define BRD_OK 2
782#define BRD0_OK 4
783#define BRD1_OK 8
784 unsigned ok = 0;
785
786 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
787 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
788 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
789 else {
790 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
791 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800792 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793 return;
794 }
795 }
796
797 /* Deletion is more complicated than add.
798 We should take care of not to delete too much :-)
799
800 Scan address list to be sure that addresses are really gone.
801 */
802
803 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
804 if (ifa->ifa_local == ifa1->ifa_local)
805 ok |= LOCAL_OK;
806 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
807 ok |= BRD_OK;
808 if (brd == ifa1->ifa_broadcast)
809 ok |= BRD1_OK;
810 if (any == ifa1->ifa_broadcast)
811 ok |= BRD0_OK;
812 }
813
814 if (!(ok&BRD_OK))
815 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
816 if (!(ok&BRD1_OK))
817 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
818 if (!(ok&BRD0_OK))
819 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
820 if (!(ok&LOCAL_OK)) {
821 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
822
823 /* Check, that this local address finally disappeared. */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900824 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825 /* And the last, but not the least thing.
826 We must flush stray FIB entries.
827
828 First of all, we scan fib_info list searching
829 for stray nexthop entries, then ignite fib_flush.
830 */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900831 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
832 fib_flush(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833 }
834 }
835#undef LOCAL_OK
836#undef BRD_OK
837#undef BRD0_OK
838#undef BRD1_OK
839}
840
Robert Olsson246955f2005-06-20 13:36:39 -0700841static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
842{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900843
Robert Olsson246955f2005-06-20 13:36:39 -0700844 struct fib_result res;
Thomas Graf5f3008932006-11-09 15:21:41 -0800845 struct flowi fl = { .mark = frn->fl_mark,
Thomas Graf47dcf0c2006-11-09 15:20:38 -0800846 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
Robert Olsson246955f2005-06-20 13:36:39 -0700847 .tos = frn->fl_tos,
848 .scope = frn->fl_scope } } };
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700849
Sergey Vlasov912a41a2007-04-27 02:17:19 -0700850#ifdef CONFIG_IP_MULTIPLE_TABLES
851 res.r = NULL;
852#endif
853
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700854 frn->err = -ENOENT;
Robert Olsson246955f2005-06-20 13:36:39 -0700855 if (tb) {
856 local_bh_disable();
857
858 frn->tb_id = tb->tb_id;
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000859 frn->err = fib_table_lookup(tb, &fl, &res);
Robert Olsson246955f2005-06-20 13:36:39 -0700860
861 if (!frn->err) {
862 frn->prefixlen = res.prefixlen;
863 frn->nh_sel = res.nh_sel;
864 frn->type = res.type;
865 frn->scope = res.scope;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700866 fib_res_put(&res);
Robert Olsson246955f2005-06-20 13:36:39 -0700867 }
868 local_bh_enable();
869 }
870}
871
David S. Miller28f7b0362007-10-10 21:32:39 -0700872static void nl_fib_input(struct sk_buff *skb)
Robert Olsson246955f2005-06-20 13:36:39 -0700873{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800874 struct net *net;
Robert Olsson246955f2005-06-20 13:36:39 -0700875 struct fib_result_nl *frn;
David S. Miller28f7b0362007-10-10 21:32:39 -0700876 struct nlmsghdr *nlh;
Robert Olsson246955f2005-06-20 13:36:39 -0700877 struct fib_table *tb;
David S. Miller28f7b0362007-10-10 21:32:39 -0700878 u32 pid;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700879
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900880 net = sock_net(skb->sk);
Arnaldo Carvalho de Melob529ccf2007-04-25 19:08:35 -0700881 nlh = nlmsg_hdr(skb);
Thomas Grafea865752005-12-01 14:30:00 -0800882 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
Denis V. Lunevd883a032007-12-21 02:01:53 -0800883 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
Thomas Grafea865752005-12-01 14:30:00 -0800884 return;
Denis V. Lunevd883a032007-12-21 02:01:53 -0800885
886 skb = skb_clone(skb, GFP_KERNEL);
887 if (skb == NULL)
888 return;
889 nlh = nlmsg_hdr(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900890
Robert Olsson246955f2005-06-20 13:36:39 -0700891 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800892 tb = fib_get_table(net, frn->tb_id_in);
Robert Olsson246955f2005-06-20 13:36:39 -0700893
894 nl_fib_lookup(frn, tb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900895
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700896 pid = NETLINK_CB(skb).pid; /* pid of sending process */
Robert Olsson246955f2005-06-20 13:36:39 -0700897 NETLINK_CB(skb).pid = 0; /* from kernel */
Patrick McHardyac6d4392005-08-14 19:29:52 -0700898 NETLINK_CB(skb).dst_group = 0; /* unicast */
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800899 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900900}
Robert Olsson246955f2005-06-20 13:36:39 -0700901
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +0000902static int __net_init nl_fib_lookup_init(struct net *net)
Robert Olsson246955f2005-06-20 13:36:39 -0700903{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800904 struct sock *sk;
905 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
906 nl_fib_input, NULL, THIS_MODULE);
907 if (sk == NULL)
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800908 return -EAFNOSUPPORT;
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800909 net->ipv4.fibnl = sk;
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800910 return 0;
911}
912
913static void nl_fib_lookup_exit(struct net *net)
914{
Denis V. Lunevb7c6ba62008-01-28 14:41:19 -0800915 netlink_kernel_release(net->ipv4.fibnl);
Denis V. Lunev775516b2008-01-18 23:55:19 -0800916 net->ipv4.fibnl = NULL;
Robert Olsson246955f2005-06-20 13:36:39 -0700917}
918
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000919static void fib_disable_ip(struct net_device *dev, int force, int delay)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920{
Denis V. Lunev85326fa2008-01-31 18:48:47 -0800921 if (fib_sync_down_dev(dev, force))
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900922 fib_flush(dev_net(dev));
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000923 rt_cache_flush(dev_net(dev), delay);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924 arp_ifdown(dev);
925}
926
927static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
928{
Jianjun Kong6ed25332008-11-03 00:25:16 -0800929 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700930 struct net_device *dev = ifa->ifa_dev->dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931
932 switch (event) {
933 case NETDEV_UP:
934 fib_add_ifaddr(ifa);
935#ifdef CONFIG_IP_ROUTE_MULTIPATH
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700936 fib_sync_up(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937#endif
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700938 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 break;
940 case NETDEV_DOWN:
941 fib_del_ifaddr(ifa);
Jayachandran C9fcc2e82005-10-27 15:10:01 -0700942 if (ifa->ifa_dev->ifa_list == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943 /* Last address was deleted from this interface.
944 Disable IP.
945 */
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000946 fib_disable_ip(dev, 1, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947 } else {
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700948 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949 }
950 break;
951 }
952 return NOTIFY_DONE;
953}
954
955static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
956{
957 struct net_device *dev = ptr;
Herbert Xue5ed6392005-10-03 14:35:55 -0700958 struct in_device *in_dev = __in_dev_get_rtnl(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959
960 if (event == NETDEV_UNREGISTER) {
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000961 fib_disable_ip(dev, 2, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962 return NOTIFY_DONE;
963 }
964
965 if (!in_dev)
966 return NOTIFY_DONE;
967
968 switch (event) {
969 case NETDEV_UP:
970 for_ifa(in_dev) {
971 fib_add_ifaddr(ifa);
972 } endfor_ifa(in_dev);
973#ifdef CONFIG_IP_ROUTE_MULTIPATH
974 fib_sync_up(dev);
975#endif
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700976 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977 break;
978 case NETDEV_DOWN:
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000979 fib_disable_ip(dev, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980 break;
981 case NETDEV_CHANGEMTU:
982 case NETDEV_CHANGE:
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700983 rt_cache_flush(dev_net(dev), 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984 break;
Eric W. Biedermana5ee1552009-11-29 15:45:58 +0000985 case NETDEV_UNREGISTER_BATCH:
986 rt_cache_flush_batch();
987 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988 }
989 return NOTIFY_DONE;
990}
991
992static struct notifier_block fib_inetaddr_notifier = {
Jianjun Kong6ed25332008-11-03 00:25:16 -0800993 .notifier_call = fib_inetaddr_event,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994};
995
996static struct notifier_block fib_netdev_notifier = {
Jianjun Kong6ed25332008-11-03 00:25:16 -0800997 .notifier_call = fib_netdev_event,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998};
999
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001000static int __net_init ip_fib_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001001{
Denis V. Lunevdce5cbe2008-01-31 18:44:53 -08001002 int err;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001003 unsigned int i;
1004
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001005 net->ipv4.fib_table_hash = kzalloc(
1006 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
1007 if (net->ipv4.fib_table_hash == NULL)
1008 return -ENOMEM;
1009
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001010 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001011 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -08001012
Denis V. Lunevdce5cbe2008-01-31 18:44:53 -08001013 err = fib4_rules_init(net);
1014 if (err < 0)
1015 goto fail;
1016 return 0;
1017
1018fail:
1019 kfree(net->ipv4.fib_table_hash);
1020 return err;
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001021}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001023static void ip_fib_net_exit(struct net *net)
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001024{
1025 unsigned int i;
Thomas Graf63f34442007-03-22 11:55:17 -07001026
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001027#ifdef CONFIG_IP_MULTIPLE_TABLES
1028 fib4_rules_exit(net);
1029#endif
1030
1031 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1032 struct fib_table *tb;
1033 struct hlist_head *head;
1034 struct hlist_node *node, *tmp;
1035
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001036 head = &net->ipv4.fib_table_hash[i];
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001037 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1038 hlist_del(node);
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001039 fib_table_flush(tb);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001040 kfree(tb);
1041 }
1042 }
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001043 kfree(net->ipv4.fib_table_hash);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001044}
1045
1046static int __net_init fib_net_init(struct net *net)
1047{
1048 int error;
1049
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001050 error = ip_fib_net_init(net);
1051 if (error < 0)
1052 goto out;
1053 error = nl_fib_lookup_init(net);
1054 if (error < 0)
1055 goto out_nlfl;
1056 error = fib_proc_init(net);
1057 if (error < 0)
1058 goto out_proc;
1059out:
1060 return error;
1061
1062out_proc:
1063 nl_fib_lookup_exit(net);
1064out_nlfl:
1065 ip_fib_net_exit(net);
1066 goto out;
1067}
1068
1069static void __net_exit fib_net_exit(struct net *net)
1070{
1071 fib_proc_exit(net);
1072 nl_fib_lookup_exit(net);
1073 ip_fib_net_exit(net);
1074}
1075
1076static struct pernet_operations fib_net_ops = {
1077 .init = fib_net_init,
1078 .exit = fib_net_exit,
1079};
1080
1081void __init ip_fib_init(void)
1082{
Thomas Graf63f34442007-03-22 11:55:17 -07001083 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1084 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1085 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001086
1087 register_pernet_subsys(&fib_net_ops);
1088 register_netdevice_notifier(&fib_netdev_notifier);
1089 register_inetaddr_notifier(&fib_inetaddr_notifier);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001090
1091 fib_hash_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092}