blob: 7fe54a3c168bd385ee81c7487001baed63aad184 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080022#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/types.h>
24#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070025#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020032#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/netdevice.h>
Thomas Graf18237302006-08-04 23:04:54 -070034#include <linux/if_addr.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/if_arp.h>
36#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070037#include <linux/init.h>
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070038#include <linux/list.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
Thomas Graf63f34442007-03-22 11:55:17 -070048#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
50#define FFprint(a...) printk(KERN_DEBUG a)
51
David S. Miller28f7b0362007-10-10 21:32:39 -070052static struct sock *fibnl;
53
Linus Torvalds1da177e2005-04-16 15:20:36 -070054#ifndef CONFIG_IP_MULTIPLE_TABLES
55
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -080056static int __net_init fib4_rules_init(struct net *net)
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080057{
Denis V. Lunev93456b62008-01-10 03:23:38 -080058 struct fib_table *local_table, *main_table;
59
60 local_table = fib_hash_init(RT_TABLE_LOCAL);
61 if (local_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080062 return -ENOMEM;
63
Denis V. Lunev93456b62008-01-10 03:23:38 -080064 main_table = fib_hash_init(RT_TABLE_MAIN);
65 if (main_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080066 goto fail;
67
Denis V. Lunev93456b62008-01-10 03:23:38 -080068 hlist_add_head_rcu(&local_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080069 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
Denis V. Lunev93456b62008-01-10 03:23:38 -080070 hlist_add_head_rcu(&main_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080071 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080072 return 0;
73
74fail:
Denis V. Lunev93456b62008-01-10 03:23:38 -080075 kfree(local_table);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080076 return -ENOMEM;
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080077}
Linus Torvalds1da177e2005-04-16 15:20:36 -070078#else
79
Denis V. Lunev8ad49422008-01-10 03:24:11 -080080struct fib_table *fib_new_table(struct net *net, u32 id)
Linus Torvalds1da177e2005-04-16 15:20:36 -070081{
82 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070083 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070085 if (id == 0)
86 id = RT_TABLE_MAIN;
Denis V. Lunev8ad49422008-01-10 03:24:11 -080087 tb = fib_get_table(net, id);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070088 if (tb)
89 return tb;
Linus Torvalds1da177e2005-04-16 15:20:36 -070090 tb = fib_hash_init(id);
91 if (!tb)
92 return NULL;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070093 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080094 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
Linus Torvalds1da177e2005-04-16 15:20:36 -070095 return tb;
96}
97
Denis V. Lunev8ad49422008-01-10 03:24:11 -080098struct fib_table *fib_get_table(struct net *net, u32 id)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070099{
100 struct fib_table *tb;
101 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800102 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700103 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700105 if (id == 0)
106 id = RT_TABLE_MAIN;
107 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800108
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700109 rcu_read_lock();
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800110 head = &net->ipv4.fib_table_hash[h];
111 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700112 if (tb->tb_id == id) {
113 rcu_read_unlock();
114 return tb;
115 }
116 }
117 rcu_read_unlock();
118 return NULL;
119}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120#endif /* CONFIG_IP_MULTIPLE_TABLES */
121
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800122static void fib_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123{
124 int flushed = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700126 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800127 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700128 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700130 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800131 head = &net->ipv4.fib_table_hash[h];
132 hlist_for_each_entry(tb, node, head, tb_hlist)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700133 flushed += tb->tb_flush(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135
136 if (flushed)
137 rt_cache_flush(-1);
138}
139
140/*
141 * Find the first device with a given source address.
142 */
143
Al Viro60cad5d2006-09-26 22:17:09 -0700144struct net_device * ip_dev_find(__be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145{
146 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
147 struct fib_result res;
148 struct net_device *dev = NULL;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700149 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150
151#ifdef CONFIG_IP_MULTIPLE_TABLES
152 res.r = NULL;
153#endif
154
Denis V. Lunev8ad49422008-01-10 03:24:11 -0800155 local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700156 if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157 return NULL;
158 if (res.type != RTN_LOCAL)
159 goto out;
160 dev = FIB_RES_DEV(res);
161
162 if (dev)
163 dev_hold(dev);
164out:
165 fib_res_put(&res);
166 return dev;
167}
168
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800169/*
170 * Find address type as if only "dev" was present in the system. If
171 * on_dev is NULL then all interfaces are taken into consideration.
172 */
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800173static inline unsigned __inet_dev_addr_type(struct net *net,
174 const struct net_device *dev,
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800175 __be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176{
177 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
178 struct fib_result res;
179 unsigned ret = RTN_BROADCAST;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700180 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181
Joe Perchesf97c1e02007-12-16 13:45:43 -0800182 if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 return RTN_BROADCAST;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800184 if (ipv4_is_multicast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185 return RTN_MULTICAST;
186
187#ifdef CONFIG_IP_MULTIPLE_TABLES
188 res.r = NULL;
189#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900190
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800191 local_table = fib_get_table(net, RT_TABLE_LOCAL);
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700192 if (local_table) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 ret = RTN_UNICAST;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700194 if (!local_table->tb_lookup(local_table, &fl, &res)) {
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800195 if (!dev || dev == res.fi->fib_dev)
196 ret = res.type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 fib_res_put(&res);
198 }
199 }
200 return ret;
201}
202
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800203unsigned int inet_addr_type(struct net *net, __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800204{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800205 return __inet_dev_addr_type(net, NULL, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800206}
207
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800208unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
209 __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800210{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800211 return __inet_dev_addr_type(net, dev, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800212}
213
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214/* Given (packet source, input interface) and optional (dst, oif, tos):
215 - (main) check, that source is valid i.e. not broadcast or our local
216 address.
217 - figure out what "logical" interface this packet arrived
218 and calculate "specific destination" address.
219 - check, that packet arrived from expected physical interface.
220 */
221
Al Virod9c9df82006-09-26 21:28:14 -0700222int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
223 struct net_device *dev, __be32 *spec_dst, u32 *itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224{
225 struct in_device *in_dev;
226 struct flowi fl = { .nl_u = { .ip4_u =
227 { .daddr = src,
228 .saddr = dst,
229 .tos = tos } },
230 .iif = oif };
231 struct fib_result res;
232 int no_addr, rpf;
233 int ret;
234
235 no_addr = rpf = 0;
236 rcu_read_lock();
Herbert Xue5ed6392005-10-03 14:35:55 -0700237 in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238 if (in_dev) {
239 no_addr = in_dev->ifa_list == NULL;
240 rpf = IN_DEV_RPFILTER(in_dev);
241 }
242 rcu_read_unlock();
243
244 if (in_dev == NULL)
245 goto e_inval;
246
247 if (fib_lookup(&fl, &res))
248 goto last_resort;
249 if (res.type != RTN_UNICAST)
250 goto e_inval_res;
251 *spec_dst = FIB_RES_PREFSRC(res);
252 fib_combine_itag(itag, &res);
253#ifdef CONFIG_IP_ROUTE_MULTIPATH
254 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
255#else
256 if (FIB_RES_DEV(res) == dev)
257#endif
258 {
259 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
260 fib_res_put(&res);
261 return ret;
262 }
263 fib_res_put(&res);
264 if (no_addr)
265 goto last_resort;
266 if (rpf)
267 goto e_inval;
268 fl.oif = dev->ifindex;
269
270 ret = 0;
271 if (fib_lookup(&fl, &res) == 0) {
272 if (res.type == RTN_UNICAST) {
273 *spec_dst = FIB_RES_PREFSRC(res);
274 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
275 }
276 fib_res_put(&res);
277 }
278 return ret;
279
280last_resort:
281 if (rpf)
282 goto e_inval;
283 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
284 *itag = 0;
285 return 0;
286
287e_inval_res:
288 fib_res_put(&res);
289e_inval:
290 return -EINVAL;
291}
292
Al Viro81f7bf62006-09-27 18:40:00 -0700293static inline __be32 sk_extract_addr(struct sockaddr *addr)
Thomas Graf4e902c52006-08-17 18:14:52 -0700294{
295 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
296}
297
298static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
299{
300 struct nlattr *nla;
301
302 nla = (struct nlattr *) ((char *) mx + len);
303 nla->nla_type = type;
304 nla->nla_len = nla_attr_size(4);
305 *(u32 *) nla_data(nla) = value;
306
307 return len + nla_total_size(4);
308}
309
310static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
311 struct fib_config *cfg)
312{
Al Viro6d85c102006-09-26 22:15:46 -0700313 __be32 addr;
Thomas Graf4e902c52006-08-17 18:14:52 -0700314 int plen;
315
316 memset(cfg, 0, sizeof(*cfg));
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800317 cfg->fc_nlinfo.nl_net = &init_net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700318
319 if (rt->rt_dst.sa_family != AF_INET)
320 return -EAFNOSUPPORT;
321
322 /*
323 * Check mask for validity:
324 * a) it must be contiguous.
325 * b) destination must have all host bits clear.
326 * c) if application forgot to set correct family (AF_INET),
327 * reject request unless it is absolutely clear i.e.
328 * both family and mask are zero.
329 */
330 plen = 32;
331 addr = sk_extract_addr(&rt->rt_dst);
332 if (!(rt->rt_flags & RTF_HOST)) {
Al Viro81f7bf62006-09-27 18:40:00 -0700333 __be32 mask = sk_extract_addr(&rt->rt_genmask);
Thomas Graf4e902c52006-08-17 18:14:52 -0700334
335 if (rt->rt_genmask.sa_family != AF_INET) {
336 if (mask || rt->rt_genmask.sa_family)
337 return -EAFNOSUPPORT;
338 }
339
340 if (bad_mask(mask, addr))
341 return -EINVAL;
342
343 plen = inet_mask_len(mask);
344 }
345
346 cfg->fc_dst_len = plen;
347 cfg->fc_dst = addr;
348
349 if (cmd != SIOCDELRT) {
350 cfg->fc_nlflags = NLM_F_CREATE;
351 cfg->fc_protocol = RTPROT_BOOT;
352 }
353
354 if (rt->rt_metric)
355 cfg->fc_priority = rt->rt_metric - 1;
356
357 if (rt->rt_flags & RTF_REJECT) {
358 cfg->fc_scope = RT_SCOPE_HOST;
359 cfg->fc_type = RTN_UNREACHABLE;
360 return 0;
361 }
362
363 cfg->fc_scope = RT_SCOPE_NOWHERE;
364 cfg->fc_type = RTN_UNICAST;
365
366 if (rt->rt_dev) {
367 char *colon;
368 struct net_device *dev;
369 char devname[IFNAMSIZ];
370
371 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
372 return -EFAULT;
373
374 devname[IFNAMSIZ-1] = 0;
375 colon = strchr(devname, ':');
376 if (colon)
377 *colon = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700378 dev = __dev_get_by_name(&init_net, devname);
Thomas Graf4e902c52006-08-17 18:14:52 -0700379 if (!dev)
380 return -ENODEV;
381 cfg->fc_oif = dev->ifindex;
382 if (colon) {
383 struct in_ifaddr *ifa;
384 struct in_device *in_dev = __in_dev_get_rtnl(dev);
385 if (!in_dev)
386 return -ENODEV;
387 *colon = ':';
388 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
389 if (strcmp(ifa->ifa_label, devname) == 0)
390 break;
391 if (ifa == NULL)
392 return -ENODEV;
393 cfg->fc_prefsrc = ifa->ifa_local;
394 }
395 }
396
397 addr = sk_extract_addr(&rt->rt_gateway);
398 if (rt->rt_gateway.sa_family == AF_INET && addr) {
399 cfg->fc_gw = addr;
400 if (rt->rt_flags & RTF_GATEWAY &&
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800401 inet_addr_type(&init_net, addr) == RTN_UNICAST)
Thomas Graf4e902c52006-08-17 18:14:52 -0700402 cfg->fc_scope = RT_SCOPE_UNIVERSE;
403 }
404
405 if (cmd == SIOCDELRT)
406 return 0;
407
408 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
409 return -EINVAL;
410
411 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
412 cfg->fc_scope = RT_SCOPE_LINK;
413
414 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
415 struct nlattr *mx;
416 int len = 0;
417
418 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900419 if (mx == NULL)
Thomas Graf4e902c52006-08-17 18:14:52 -0700420 return -ENOMEM;
421
422 if (rt->rt_flags & RTF_MTU)
423 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
424
425 if (rt->rt_flags & RTF_WINDOW)
426 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
427
428 if (rt->rt_flags & RTF_IRTT)
429 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
430
431 cfg->fc_mx = mx;
432 cfg->fc_mx_len = len;
433 }
434
435 return 0;
436}
437
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438/*
439 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
440 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900441
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442int ip_rt_ioctl(unsigned int cmd, void __user *arg)
443{
Thomas Graf4e902c52006-08-17 18:14:52 -0700444 struct fib_config cfg;
445 struct rtentry rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447
448 switch (cmd) {
449 case SIOCADDRT: /* Add a route */
450 case SIOCDELRT: /* Delete a route */
451 if (!capable(CAP_NET_ADMIN))
452 return -EPERM;
Thomas Graf4e902c52006-08-17 18:14:52 -0700453
454 if (copy_from_user(&rt, arg, sizeof(rt)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 return -EFAULT;
Thomas Graf4e902c52006-08-17 18:14:52 -0700456
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457 rtnl_lock();
Thomas Graf4e902c52006-08-17 18:14:52 -0700458 err = rtentry_to_fib_config(cmd, &rt, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459 if (err == 0) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700460 struct fib_table *tb;
461
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 if (cmd == SIOCDELRT) {
Denis V. Lunev8ad49422008-01-10 03:24:11 -0800463 tb = fib_get_table(&init_net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464 if (tb)
Thomas Graf4e902c52006-08-17 18:14:52 -0700465 err = tb->tb_delete(tb, &cfg);
466 else
467 err = -ESRCH;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 } else {
Denis V. Lunev8ad49422008-01-10 03:24:11 -0800469 tb = fib_new_table(&init_net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470 if (tb)
Thomas Graf4e902c52006-08-17 18:14:52 -0700471 err = tb->tb_insert(tb, &cfg);
472 else
473 err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700475
476 /* allocated by rtentry_to_fib_config() */
477 kfree(cfg.fc_mx);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478 }
479 rtnl_unlock();
480 return err;
481 }
482 return -EINVAL;
483}
484
Patrick McHardyef7c79e2007-06-05 12:38:30 -0700485const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
Thomas Graf4e902c52006-08-17 18:14:52 -0700486 [RTA_DST] = { .type = NLA_U32 },
487 [RTA_SRC] = { .type = NLA_U32 },
488 [RTA_IIF] = { .type = NLA_U32 },
489 [RTA_OIF] = { .type = NLA_U32 },
490 [RTA_GATEWAY] = { .type = NLA_U32 },
491 [RTA_PRIORITY] = { .type = NLA_U32 },
492 [RTA_PREFSRC] = { .type = NLA_U32 },
493 [RTA_METRICS] = { .type = NLA_NESTED },
Thomas Graf5176f912006-08-26 20:13:18 -0700494 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
Thomas Graf4e902c52006-08-17 18:14:52 -0700495 [RTA_PROTOINFO] = { .type = NLA_U32 },
496 [RTA_FLOW] = { .type = NLA_U32 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700497};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
Thomas Graf4e902c52006-08-17 18:14:52 -0700499static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
500 struct fib_config *cfg)
501{
502 struct nlattr *attr;
503 int err, remaining;
504 struct rtmsg *rtm;
505
506 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
507 if (err < 0)
508 goto errout;
509
510 memset(cfg, 0, sizeof(*cfg));
511
512 rtm = nlmsg_data(nlh);
Thomas Graf4e902c52006-08-17 18:14:52 -0700513 cfg->fc_dst_len = rtm->rtm_dst_len;
Thomas Graf4e902c52006-08-17 18:14:52 -0700514 cfg->fc_tos = rtm->rtm_tos;
515 cfg->fc_table = rtm->rtm_table;
516 cfg->fc_protocol = rtm->rtm_protocol;
517 cfg->fc_scope = rtm->rtm_scope;
518 cfg->fc_type = rtm->rtm_type;
519 cfg->fc_flags = rtm->rtm_flags;
520 cfg->fc_nlflags = nlh->nlmsg_flags;
521
522 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
523 cfg->fc_nlinfo.nlh = nlh;
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800524 cfg->fc_nlinfo.nl_net = &init_net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700525
Thomas Grafa0ee18b2007-03-24 20:32:54 -0700526 if (cfg->fc_type > RTN_MAX) {
527 err = -EINVAL;
528 goto errout;
529 }
530
Thomas Graf4e902c52006-08-17 18:14:52 -0700531 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200532 switch (nla_type(attr)) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700533 case RTA_DST:
Al Viro17fb2c62006-09-26 22:15:25 -0700534 cfg->fc_dst = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700535 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700536 case RTA_OIF:
537 cfg->fc_oif = nla_get_u32(attr);
538 break;
539 case RTA_GATEWAY:
Al Viro17fb2c62006-09-26 22:15:25 -0700540 cfg->fc_gw = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700541 break;
542 case RTA_PRIORITY:
543 cfg->fc_priority = nla_get_u32(attr);
544 break;
545 case RTA_PREFSRC:
Al Viro17fb2c62006-09-26 22:15:25 -0700546 cfg->fc_prefsrc = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700547 break;
548 case RTA_METRICS:
549 cfg->fc_mx = nla_data(attr);
550 cfg->fc_mx_len = nla_len(attr);
551 break;
552 case RTA_MULTIPATH:
553 cfg->fc_mp = nla_data(attr);
554 cfg->fc_mp_len = nla_len(attr);
555 break;
556 case RTA_FLOW:
557 cfg->fc_flow = nla_get_u32(attr);
558 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700559 case RTA_TABLE:
560 cfg->fc_table = nla_get_u32(attr);
561 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 }
563 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700564
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700566errout:
567 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568}
569
Thomas Graf63f34442007-03-22 11:55:17 -0700570static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571{
Denis V. Lunevb8542722007-12-01 00:21:31 +1100572 struct net *net = skb->sk->sk_net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700573 struct fib_config cfg;
574 struct fib_table *tb;
575 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576
Denis V. Lunevb8542722007-12-01 00:21:31 +1100577 if (net != &init_net)
578 return -EINVAL;
579
Thomas Graf4e902c52006-08-17 18:14:52 -0700580 err = rtm_to_fib_config(skb, nlh, &cfg);
581 if (err < 0)
582 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583
Denis V. Lunev8ad49422008-01-10 03:24:11 -0800584 tb = fib_get_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700585 if (tb == NULL) {
586 err = -ESRCH;
587 goto errout;
588 }
589
590 err = tb->tb_delete(tb, &cfg);
591errout:
592 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593}
594
Thomas Graf63f34442007-03-22 11:55:17 -0700595static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596{
Denis V. Lunevb8542722007-12-01 00:21:31 +1100597 struct net *net = skb->sk->sk_net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700598 struct fib_config cfg;
599 struct fib_table *tb;
600 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601
Denis V. Lunevb8542722007-12-01 00:21:31 +1100602 if (net != &init_net)
603 return -EINVAL;
604
Thomas Graf4e902c52006-08-17 18:14:52 -0700605 err = rtm_to_fib_config(skb, nlh, &cfg);
606 if (err < 0)
607 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608
Denis V. Lunev8ad49422008-01-10 03:24:11 -0800609 tb = fib_new_table(&init_net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700610 if (tb == NULL) {
611 err = -ENOBUFS;
612 goto errout;
613 }
614
615 err = tb->tb_insert(tb, &cfg);
616errout:
617 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618}
619
Thomas Graf63f34442007-03-22 11:55:17 -0700620static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621{
Denis V. Lunevb8542722007-12-01 00:21:31 +1100622 struct net *net = skb->sk->sk_net;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700623 unsigned int h, s_h;
624 unsigned int e = 0, s_e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700626 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800627 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700628 int dumped = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629
Denis V. Lunevb8542722007-12-01 00:21:31 +1100630 if (net != &init_net)
631 return 0;
632
Thomas Grafbe403ea2006-08-17 18:15:17 -0700633 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
634 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635 return ip_rt_dump(skb, cb);
636
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700637 s_h = cb->args[0];
638 s_e = cb->args[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700640 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
641 e = 0;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800642 head = &net->ipv4.fib_table_hash[h];
643 hlist_for_each_entry(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700644 if (e < s_e)
645 goto next;
646 if (dumped)
647 memset(&cb->args[2], 0, sizeof(cb->args) -
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900648 2 * sizeof(cb->args[0]));
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700649 if (tb->tb_dump(tb, skb, cb) < 0)
650 goto out;
651 dumped = 1;
652next:
653 e++;
654 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700656out:
657 cb->args[1] = e;
658 cb->args[0] = h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659
660 return skb->len;
661}
662
663/* Prepare and feed intra-kernel routing request.
664 Really, it should be netlink message, but :-( netlink
665 can be not configured, so that we feed it directly
666 to fib engine. It is legal, because all events occur
667 only when netlink is already locked.
668 */
669
Al Viro81f7bf62006-09-27 18:40:00 -0700670static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671{
Thomas Graf4e902c52006-08-17 18:14:52 -0700672 struct fib_table *tb;
673 struct fib_config cfg = {
674 .fc_protocol = RTPROT_KERNEL,
675 .fc_type = type,
676 .fc_dst = dst,
677 .fc_dst_len = dst_len,
678 .fc_prefsrc = ifa->ifa_local,
679 .fc_oif = ifa->ifa_dev->dev->ifindex,
680 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800681 .fc_nlinfo = {
682 .nl_net = &init_net,
683 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700684 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685
686 if (type == RTN_UNICAST)
Denis V. Lunev8ad49422008-01-10 03:24:11 -0800687 tb = fib_new_table(&init_net, RT_TABLE_MAIN);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688 else
Denis V. Lunev8ad49422008-01-10 03:24:11 -0800689 tb = fib_new_table(&init_net, RT_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690
691 if (tb == NULL)
692 return;
693
Thomas Graf4e902c52006-08-17 18:14:52 -0700694 cfg.fc_table = tb->tb_id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695
Thomas Graf4e902c52006-08-17 18:14:52 -0700696 if (type != RTN_LOCAL)
697 cfg.fc_scope = RT_SCOPE_LINK;
698 else
699 cfg.fc_scope = RT_SCOPE_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700
701 if (cmd == RTM_NEWROUTE)
Thomas Graf4e902c52006-08-17 18:14:52 -0700702 tb->tb_insert(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700703 else
Thomas Graf4e902c52006-08-17 18:14:52 -0700704 tb->tb_delete(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705}
706
Jamal Hadi Salim0ff60a42005-11-22 14:47:37 -0800707void fib_add_ifaddr(struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708{
709 struct in_device *in_dev = ifa->ifa_dev;
710 struct net_device *dev = in_dev->dev;
711 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700712 __be32 mask = ifa->ifa_mask;
713 __be32 addr = ifa->ifa_local;
714 __be32 prefix = ifa->ifa_address&mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715
716 if (ifa->ifa_flags&IFA_F_SECONDARY) {
717 prim = inet_ifa_byprefix(in_dev, prefix, mask);
718 if (prim == NULL) {
719 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
720 return;
721 }
722 }
723
724 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
725
726 if (!(dev->flags&IFF_UP))
727 return;
728
729 /* Add broadcast address, if it is explicitly assigned. */
Al Viroa144ea42006-09-28 18:00:55 -0700730 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
732
Joe Perchesf97c1e02007-12-16 13:45:43 -0800733 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 (prefix != addr || ifa->ifa_prefixlen < 32)) {
735 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
736 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
737
738 /* Add network specific broadcasts, when it takes a sense */
739 if (ifa->ifa_prefixlen < 31) {
740 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
741 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
742 }
743 }
744}
745
746static void fib_del_ifaddr(struct in_ifaddr *ifa)
747{
748 struct in_device *in_dev = ifa->ifa_dev;
749 struct net_device *dev = in_dev->dev;
750 struct in_ifaddr *ifa1;
751 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700752 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
753 __be32 any = ifa->ifa_address&ifa->ifa_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754#define LOCAL_OK 1
755#define BRD_OK 2
756#define BRD0_OK 4
757#define BRD1_OK 8
758 unsigned ok = 0;
759
760 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
761 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
762 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
763 else {
764 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
765 if (prim == NULL) {
766 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
767 return;
768 }
769 }
770
771 /* Deletion is more complicated than add.
772 We should take care of not to delete too much :-)
773
774 Scan address list to be sure that addresses are really gone.
775 */
776
777 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
778 if (ifa->ifa_local == ifa1->ifa_local)
779 ok |= LOCAL_OK;
780 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
781 ok |= BRD_OK;
782 if (brd == ifa1->ifa_broadcast)
783 ok |= BRD1_OK;
784 if (any == ifa1->ifa_broadcast)
785 ok |= BRD0_OK;
786 }
787
788 if (!(ok&BRD_OK))
789 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
790 if (!(ok&BRD1_OK))
791 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
792 if (!(ok&BRD0_OK))
793 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
794 if (!(ok&LOCAL_OK)) {
795 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
796
797 /* Check, that this local address finally disappeared. */
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800798 if (inet_addr_type(&init_net, ifa->ifa_local) != RTN_LOCAL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799 /* And the last, but not the least thing.
800 We must flush stray FIB entries.
801
802 First of all, we scan fib_info list searching
803 for stray nexthop entries, then ignite fib_flush.
804 */
805 if (fib_sync_down(ifa->ifa_local, NULL, 0))
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800806 fib_flush(&init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 }
808 }
809#undef LOCAL_OK
810#undef BRD_OK
811#undef BRD0_OK
812#undef BRD1_OK
813}
814
Robert Olsson246955f2005-06-20 13:36:39 -0700815static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
816{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900817
Robert Olsson246955f2005-06-20 13:36:39 -0700818 struct fib_result res;
Thomas Graf5f300892006-11-09 15:21:41 -0800819 struct flowi fl = { .mark = frn->fl_mark,
Thomas Graf47dcf0c2006-11-09 15:20:38 -0800820 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
Robert Olsson246955f2005-06-20 13:36:39 -0700821 .tos = frn->fl_tos,
822 .scope = frn->fl_scope } } };
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700823
Sergey Vlasov912a41a2007-04-27 02:17:19 -0700824#ifdef CONFIG_IP_MULTIPLE_TABLES
825 res.r = NULL;
826#endif
827
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700828 frn->err = -ENOENT;
Robert Olsson246955f2005-06-20 13:36:39 -0700829 if (tb) {
830 local_bh_disable();
831
832 frn->tb_id = tb->tb_id;
833 frn->err = tb->tb_lookup(tb, &fl, &res);
834
835 if (!frn->err) {
836 frn->prefixlen = res.prefixlen;
837 frn->nh_sel = res.nh_sel;
838 frn->type = res.type;
839 frn->scope = res.scope;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700840 fib_res_put(&res);
Robert Olsson246955f2005-06-20 13:36:39 -0700841 }
842 local_bh_enable();
843 }
844}
845
David S. Miller28f7b0362007-10-10 21:32:39 -0700846static void nl_fib_input(struct sk_buff *skb)
Robert Olsson246955f2005-06-20 13:36:39 -0700847{
Robert Olsson246955f2005-06-20 13:36:39 -0700848 struct fib_result_nl *frn;
David S. Miller28f7b0362007-10-10 21:32:39 -0700849 struct nlmsghdr *nlh;
Robert Olsson246955f2005-06-20 13:36:39 -0700850 struct fib_table *tb;
David S. Miller28f7b0362007-10-10 21:32:39 -0700851 u32 pid;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700852
Arnaldo Carvalho de Melob529ccf2007-04-25 19:08:35 -0700853 nlh = nlmsg_hdr(skb);
Thomas Grafea865752005-12-01 14:30:00 -0800854 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
Denis V. Lunevd883a032007-12-21 02:01:53 -0800855 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
Thomas Grafea865752005-12-01 14:30:00 -0800856 return;
Denis V. Lunevd883a032007-12-21 02:01:53 -0800857
858 skb = skb_clone(skb, GFP_KERNEL);
859 if (skb == NULL)
860 return;
861 nlh = nlmsg_hdr(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900862
Robert Olsson246955f2005-06-20 13:36:39 -0700863 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
Denis V. Lunev8ad49422008-01-10 03:24:11 -0800864 tb = fib_get_table(&init_net, frn->tb_id_in);
Robert Olsson246955f2005-06-20 13:36:39 -0700865
866 nl_fib_lookup(frn, tb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900867
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700868 pid = NETLINK_CB(skb).pid; /* pid of sending process */
Robert Olsson246955f2005-06-20 13:36:39 -0700869 NETLINK_CB(skb).pid = 0; /* from kernel */
Patrick McHardyac6d4392005-08-14 19:29:52 -0700870 NETLINK_CB(skb).dst_group = 0; /* unicast */
Denis V. Lunevcd40b7d2007-10-10 21:15:29 -0700871 netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900872}
Robert Olsson246955f2005-06-20 13:36:39 -0700873
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800874static int nl_fib_lookup_init(struct net *net)
Robert Olsson246955f2005-06-20 13:36:39 -0700875{
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800876 fibnl = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
Denis V. Lunevcd40b7d2007-10-10 21:15:29 -0700877 nl_fib_input, NULL, THIS_MODULE);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800878 if (fibnl == NULL)
879 return -EAFNOSUPPORT;
880 return 0;
881}
882
883static void nl_fib_lookup_exit(struct net *net)
884{
885 sock_put(fibnl);
Robert Olsson246955f2005-06-20 13:36:39 -0700886}
887
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888static void fib_disable_ip(struct net_device *dev, int force)
889{
890 if (fib_sync_down(0, dev, force))
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800891 fib_flush(&init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700892 rt_cache_flush(0);
893 arp_ifdown(dev);
894}
895
896static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
897{
898 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
899
900 switch (event) {
901 case NETDEV_UP:
902 fib_add_ifaddr(ifa);
903#ifdef CONFIG_IP_ROUTE_MULTIPATH
904 fib_sync_up(ifa->ifa_dev->dev);
905#endif
906 rt_cache_flush(-1);
907 break;
908 case NETDEV_DOWN:
909 fib_del_ifaddr(ifa);
Jayachandran C9fcc2e82005-10-27 15:10:01 -0700910 if (ifa->ifa_dev->ifa_list == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 /* Last address was deleted from this interface.
912 Disable IP.
913 */
914 fib_disable_ip(ifa->ifa_dev->dev, 1);
915 } else {
916 rt_cache_flush(-1);
917 }
918 break;
919 }
920 return NOTIFY_DONE;
921}
922
923static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
924{
925 struct net_device *dev = ptr;
Herbert Xue5ed6392005-10-03 14:35:55 -0700926 struct in_device *in_dev = __in_dev_get_rtnl(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927
Eric W. Biedermane9dc8652007-09-12 13:02:17 +0200928 if (dev->nd_net != &init_net)
929 return NOTIFY_DONE;
930
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931 if (event == NETDEV_UNREGISTER) {
932 fib_disable_ip(dev, 2);
933 return NOTIFY_DONE;
934 }
935
936 if (!in_dev)
937 return NOTIFY_DONE;
938
939 switch (event) {
940 case NETDEV_UP:
941 for_ifa(in_dev) {
942 fib_add_ifaddr(ifa);
943 } endfor_ifa(in_dev);
944#ifdef CONFIG_IP_ROUTE_MULTIPATH
945 fib_sync_up(dev);
946#endif
947 rt_cache_flush(-1);
948 break;
949 case NETDEV_DOWN:
950 fib_disable_ip(dev, 0);
951 break;
952 case NETDEV_CHANGEMTU:
953 case NETDEV_CHANGE:
954 rt_cache_flush(0);
955 break;
956 }
957 return NOTIFY_DONE;
958}
959
960static struct notifier_block fib_inetaddr_notifier = {
961 .notifier_call =fib_inetaddr_event,
962};
963
964static struct notifier_block fib_netdev_notifier = {
965 .notifier_call =fib_netdev_event,
966};
967
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800968static int __net_init ip_fib_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969{
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700970 unsigned int i;
971
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800972 net->ipv4.fib_table_hash = kzalloc(
973 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
974 if (net->ipv4.fib_table_hash == NULL)
975 return -ENOMEM;
976
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700977 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800978 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -0800979
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800980 return fib4_rules_init(net);
981}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800983static void __net_exit ip_fib_net_exit(struct net *net)
984{
985 unsigned int i;
Thomas Graf63f34442007-03-22 11:55:17 -0700986
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800987#ifdef CONFIG_IP_MULTIPLE_TABLES
988 fib4_rules_exit(net);
989#endif
990
991 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
992 struct fib_table *tb;
993 struct hlist_head *head;
994 struct hlist_node *node, *tmp;
995
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800996 head = &net->ipv4.fib_table_hash[i];
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800997 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
998 hlist_del(node);
999 tb->tb_flush(tb);
1000 kfree(tb);
1001 }
1002 }
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001003 kfree(net->ipv4.fib_table_hash);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001004}
1005
1006static int __net_init fib_net_init(struct net *net)
1007{
1008 int error;
1009
1010 error = 0;
1011 if (net != &init_net)
1012 goto out;
1013
1014 error = ip_fib_net_init(net);
1015 if (error < 0)
1016 goto out;
1017 error = nl_fib_lookup_init(net);
1018 if (error < 0)
1019 goto out_nlfl;
1020 error = fib_proc_init(net);
1021 if (error < 0)
1022 goto out_proc;
1023out:
1024 return error;
1025
1026out_proc:
1027 nl_fib_lookup_exit(net);
1028out_nlfl:
1029 ip_fib_net_exit(net);
1030 goto out;
1031}
1032
1033static void __net_exit fib_net_exit(struct net *net)
1034{
1035 fib_proc_exit(net);
1036 nl_fib_lookup_exit(net);
1037 ip_fib_net_exit(net);
1038}
1039
1040static struct pernet_operations fib_net_ops = {
1041 .init = fib_net_init,
1042 .exit = fib_net_exit,
1043};
1044
1045void __init ip_fib_init(void)
1046{
Thomas Graf63f34442007-03-22 11:55:17 -07001047 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1048 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1049 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001050
1051 register_pernet_subsys(&fib_net_ops);
1052 register_netdevice_notifier(&fib_netdev_notifier);
1053 register_inetaddr_notifier(&fib_inetaddr_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054}
1055
1056EXPORT_SYMBOL(inet_addr_type);
Laszlo Attila Toth05538112007-12-04 23:28:46 -08001057EXPORT_SYMBOL(inet_dev_addr_type);
Sean Heftya1e87332006-06-17 20:37:28 -07001058EXPORT_SYMBOL(ip_dev_find);