blob: 4f1aafd3ba89f92a65d01488a37591c35b0f669c [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 FIB: lookup engine and maintenance routines.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <asm/uaccess.h>
17#include <asm/system.h>
18#include <linux/bitops.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/mm.h>
22#include <linux/string.h>
23#include <linux/socket.h>
24#include <linux/sockios.h>
25#include <linux/errno.h>
26#include <linux/in.h>
27#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020028#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <linux/netdevice.h>
30#include <linux/if_arp.h>
31#include <linux/proc_fs.h>
32#include <linux/skbuff.h>
33#include <linux/netlink.h>
34#include <linux/init.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090035#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020037#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
43#include <net/ip_fib.h>
44
45#include "fib_lookup.h"
46
Christoph Lametere18b8902006-12-06 20:33:20 -080047static struct kmem_cache *fn_hash_kmem __read_mostly;
48static struct kmem_cache *fn_alias_kmem __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
50struct fib_node {
51 struct hlist_node fn_hash;
52 struct list_head fn_alias;
Al Virob6e80c62006-09-26 22:20:01 -070053 __be32 fn_key;
Eric Dumazeta6501e02008-01-18 03:33:26 -080054 struct fib_alias fn_embedded_alias;
Linus Torvalds1da177e2005-04-16 15:20:36 -070055};
56
Eric Dumazet9bef83e2010-10-14 20:53:04 +000057#define EMBEDDED_HASH_SIZE (L1_CACHE_BYTES / sizeof(struct hlist_head))
58
Linus Torvalds1da177e2005-04-16 15:20:36 -070059struct fn_zone {
Eric Dumazet117a8cd2010-10-14 20:53:34 +000060 struct fn_zone __rcu *fz_next; /* Next not empty zone */
Eric Dumazet19f57252010-10-14 20:56:39 +000061 struct hlist_head __rcu *fz_hash; /* Hash table pointer */
62 seqlock_t fz_lock;
Linus Torvalds1da177e2005-04-16 15:20:36 -070063 u32 fz_hashmask; /* (fz_divisor - 1) */
Linus Torvalds1da177e2005-04-16 15:20:36 -070064
Eric Dumazet9bef83e2010-10-14 20:53:04 +000065 u8 fz_order; /* Zone order (0..32) */
66 u8 fz_revorder; /* 32 - fz_order */
67 __be32 fz_mask; /* inet_make_mask(order) */
Linus Torvalds1da177e2005-04-16 15:20:36 -070068#define FZ_MASK(fz) ((fz)->fz_mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -070069
Eric Dumazet9bef83e2010-10-14 20:53:04 +000070 struct hlist_head fz_embedded_hash[EMBEDDED_HASH_SIZE];
71
72 int fz_nent; /* Number of entries */
73 int fz_divisor; /* Hash size (mask+1) */
74};
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
76struct fn_hash {
Eric Dumazet117a8cd2010-10-14 20:53:34 +000077 struct fn_zone *fn_zones[33];
78 struct fn_zone __rcu *fn_zone_list;
Linus Torvalds1da177e2005-04-16 15:20:36 -070079};
80
Al Virob6e80c62006-09-26 22:20:01 -070081static inline u32 fn_hash(__be32 key, struct fn_zone *fz)
Linus Torvalds1da177e2005-04-16 15:20:36 -070082{
Eric Dumazet9bef83e2010-10-14 20:53:04 +000083 u32 h = ntohl(key) >> fz->fz_revorder;
Linus Torvalds1da177e2005-04-16 15:20:36 -070084 h ^= (h>>20);
85 h ^= (h>>10);
86 h ^= (h>>5);
Eric Dumazet9bef83e2010-10-14 20:53:04 +000087 h &= fz->fz_hashmask;
Linus Torvalds1da177e2005-04-16 15:20:36 -070088 return h;
89}
90
Al Virob6e80c62006-09-26 22:20:01 -070091static inline __be32 fz_key(__be32 dst, struct fn_zone *fz)
Linus Torvalds1da177e2005-04-16 15:20:36 -070092{
93 return dst & FZ_MASK(fz);
94}
95
Linus Torvalds1da177e2005-04-16 15:20:36 -070096static unsigned int fib_hash_genid;
97
98#define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head))
99
100static struct hlist_head *fz_hash_alloc(int divisor)
101{
102 unsigned long size = divisor * sizeof(struct hlist_head);
103
Eric Dumazet19f57252010-10-14 20:56:39 +0000104 if (size <= PAGE_SIZE)
Joonwoo Park3015a342007-11-26 23:31:24 +0800105 return kzalloc(size, GFP_KERNEL);
Eric Dumazet19f57252010-10-14 20:56:39 +0000106
107 return (struct hlist_head *)
108 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109}
110
111/* The fib hash lock must be held when this is called. */
112static inline void fn_rebuild_zone(struct fn_zone *fz,
113 struct hlist_head *old_ht,
114 int old_divisor)
115{
116 int i;
117
118 for (i = 0; i < old_divisor; i++) {
119 struct hlist_node *node, *n;
120 struct fib_node *f;
121
122 hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) {
Eric Dumazet19f57252010-10-14 20:56:39 +0000123 struct hlist_head __rcu *new_head;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124
Eric Dumazet19f57252010-10-14 20:56:39 +0000125 hlist_del_rcu(&f->fn_hash);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126
127 new_head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
Eric Dumazet19f57252010-10-14 20:56:39 +0000128 hlist_add_head_rcu(&f->fn_hash, new_head);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129 }
130 }
131}
132
133static void fz_hash_free(struct hlist_head *hash, int divisor)
134{
135 unsigned long size = divisor * sizeof(struct hlist_head);
136
137 if (size <= PAGE_SIZE)
138 kfree(hash);
139 else
140 free_pages((unsigned long)hash, get_order(size));
141}
142
143static void fn_rehash_zone(struct fn_zone *fz)
144{
145 struct hlist_head *ht, *old_ht;
146 int old_divisor, new_divisor;
147 u32 new_hashmask;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900148
Eric Dumazet9bef83e2010-10-14 20:53:04 +0000149 new_divisor = old_divisor = fz->fz_divisor;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150
151 switch (old_divisor) {
Eric Dumazet9bef83e2010-10-14 20:53:04 +0000152 case EMBEDDED_HASH_SIZE:
153 new_divisor *= EMBEDDED_HASH_SIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154 break;
Eric Dumazet9bef83e2010-10-14 20:53:04 +0000155 case EMBEDDED_HASH_SIZE*EMBEDDED_HASH_SIZE:
156 new_divisor *= (EMBEDDED_HASH_SIZE/2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157 break;
158 default:
159 if ((old_divisor << 1) > FZ_MAX_DIVISOR) {
160 printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor);
161 return;
162 }
163 new_divisor = (old_divisor << 1);
164 break;
165 }
166
167 new_hashmask = (new_divisor - 1);
168
169#if RT_CACHE_DEBUG >= 2
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800170 printk(KERN_DEBUG "fn_rehash_zone: hash for zone %d grows from %d\n",
171 fz->fz_order, old_divisor);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172#endif
173
174 ht = fz_hash_alloc(new_divisor);
175
176 if (ht) {
Eric Dumazet19f57252010-10-14 20:56:39 +0000177 struct fn_zone nfz;
178
179 memcpy(&nfz, fz, sizeof(nfz));
180
181 write_seqlock_bh(&fz->fz_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 old_ht = fz->fz_hash;
Eric Dumazet19f57252010-10-14 20:56:39 +0000183 nfz.fz_hash = ht;
184 nfz.fz_hashmask = new_hashmask;
185 nfz.fz_divisor = new_divisor;
186 fn_rebuild_zone(&nfz, old_ht, old_divisor);
187 fib_hash_genid++;
188 rcu_assign_pointer(fz->fz_hash, ht);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 fz->fz_hashmask = new_hashmask;
190 fz->fz_divisor = new_divisor;
Eric Dumazet19f57252010-10-14 20:56:39 +0000191 write_sequnlock_bh(&fz->fz_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192
Eric Dumazet19f57252010-10-14 20:56:39 +0000193 if (old_ht != fz->fz_embedded_hash) {
194 synchronize_rcu();
Eric Dumazet9bef83e2010-10-14 20:53:04 +0000195 fz_hash_free(old_ht, old_divisor);
Eric Dumazet19f57252010-10-14 20:56:39 +0000196 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 }
198}
199
Eric Dumazet19f57252010-10-14 20:56:39 +0000200static void fn_free_node_rcu(struct rcu_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201{
Eric Dumazet19f57252010-10-14 20:56:39 +0000202 struct fib_node *f = container_of(head, struct fib_node, fn_embedded_alias.rcu);
203
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204 kmem_cache_free(fn_hash_kmem, f);
205}
206
Eric Dumazet19f57252010-10-14 20:56:39 +0000207static inline void fn_free_node(struct fib_node *f)
208{
209 call_rcu(&f->fn_embedded_alias.rcu, fn_free_node_rcu);
210}
211
212static void fn_free_alias_rcu(struct rcu_head *head)
213{
214 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
215
216 kmem_cache_free(fn_alias_kmem, fa);
217}
218
Eric Dumazeta6501e02008-01-18 03:33:26 -0800219static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220{
221 fib_release_info(fa->fa_info);
Eric Dumazeta6501e02008-01-18 03:33:26 -0800222 if (fa == &f->fn_embedded_alias)
223 fa->fa_info = NULL;
224 else
Eric Dumazet19f57252010-10-14 20:56:39 +0000225 call_rcu(&fa->rcu, fn_free_alias_rcu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226}
227
228static struct fn_zone *
229fn_new_zone(struct fn_hash *table, int z)
230{
231 int i;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700232 struct fn_zone *fz = kzalloc(sizeof(struct fn_zone), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233 if (!fz)
234 return NULL;
235
Eric Dumazet19f57252010-10-14 20:56:39 +0000236 seqlock_init(&fz->fz_lock);
Eric Dumazet9bef83e2010-10-14 20:53:04 +0000237 fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1;
238 fz->fz_hashmask = fz->fz_divisor - 1;
239 fz->fz_hash = fz->fz_embedded_hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240 fz->fz_order = z;
Eric Dumazet9bef83e2010-10-14 20:53:04 +0000241 fz->fz_revorder = 32 - z;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 fz->fz_mask = inet_make_mask(z);
243
244 /* Find the first not empty zone with more specific mask */
Eric Dumazet117a8cd2010-10-14 20:53:34 +0000245 for (i = z + 1; i <= 32; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 if (table->fn_zones[i])
247 break;
Eric Dumazet117a8cd2010-10-14 20:53:34 +0000248 if (i > 32) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249 /* No more specific masks, we are the first. */
Eric Dumazet117a8cd2010-10-14 20:53:34 +0000250 rcu_assign_pointer(fz->fz_next,
251 rtnl_dereference(table->fn_zone_list));
252 rcu_assign_pointer(table->fn_zone_list, fz);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 } else {
Eric Dumazet117a8cd2010-10-14 20:53:34 +0000254 rcu_assign_pointer(fz->fz_next,
255 rtnl_dereference(table->fn_zones[i]->fz_next));
256 rcu_assign_pointer(table->fn_zones[i]->fz_next, fz);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 }
258 table->fn_zones[z] = fz;
259 fib_hash_genid++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 return fz;
261}
262
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000263int fib_table_lookup(struct fib_table *tb,
Eric Dumazetebc0ffa2010-10-05 10:41:36 +0000264 const struct flowi *flp, struct fib_result *res,
265 int fib_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266{
267 int err;
268 struct fn_zone *fz;
Jianjun Kong6ed25332008-11-03 00:25:16 -0800269 struct fn_hash *t = (struct fn_hash *)tb->tb_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270
Eric Dumazet117a8cd2010-10-14 20:53:34 +0000271 rcu_read_lock();
Eric Dumazet117a8cd2010-10-14 20:53:34 +0000272 for (fz = rcu_dereference(t->fn_zone_list);
273 fz != NULL;
274 fz = rcu_dereference(fz->fz_next)) {
Eric Dumazet19f57252010-10-14 20:56:39 +0000275 struct hlist_head __rcu *head;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276 struct hlist_node *node;
277 struct fib_node *f;
Eric Dumazet19f57252010-10-14 20:56:39 +0000278 __be32 k;
279 unsigned int seq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280
Eric Dumazet19f57252010-10-14 20:56:39 +0000281 do {
282 seq = read_seqbegin(&fz->fz_lock);
283 k = fz_key(flp->fl4_dst, fz);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284
Eric Dumazet19f57252010-10-14 20:56:39 +0000285 head = &fz->fz_hash[fn_hash(k, fz)];
286 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
287 if (f->fn_key != k)
288 continue;
289
290 err = fib_semantic_match(&f->fn_alias,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 flp, res,
Eric Dumazetebc0ffa2010-10-05 10:41:36 +0000292 fz->fz_order, fib_flags);
Eric Dumazet19f57252010-10-14 20:56:39 +0000293 if (err <= 0)
294 goto out;
295 }
296 } while (read_seqretry(&fz->fz_lock, seq));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297 }
298 err = 1;
299out:
Eric Dumazet117a8cd2010-10-14 20:53:34 +0000300 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301 return err;
302}
303
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000304void fib_table_select_default(struct fib_table *tb,
305 const struct flowi *flp, struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306{
307 int order, last_idx;
308 struct hlist_node *node;
309 struct fib_node *f;
310 struct fib_info *fi = NULL;
311 struct fib_info *last_resort;
Jianjun Kong6ed25332008-11-03 00:25:16 -0800312 struct fn_hash *t = (struct fn_hash *)tb->tb_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 struct fn_zone *fz = t->fn_zones[0];
314
315 if (fz == NULL)
316 return;
317
318 last_idx = -1;
319 last_resort = NULL;
320 order = -1;
321
Eric Dumazet19f57252010-10-14 20:56:39 +0000322 rcu_read_lock();
323 hlist_for_each_entry_rcu(f, node, &fz->fz_hash[0], fn_hash) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 struct fib_alias *fa;
325
Eric Dumazet19f57252010-10-14 20:56:39 +0000326 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 struct fib_info *next_fi = fa->fa_info;
328
329 if (fa->fa_scope != res->scope ||
330 fa->fa_type != RTN_UNICAST)
331 continue;
332
333 if (next_fi->fib_priority > res->fi->fib_priority)
334 break;
335 if (!next_fi->fib_nh[0].nh_gw ||
336 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
337 continue;
338 fa->fa_state |= FA_S_ACCESSED;
339
340 if (fi == NULL) {
341 if (next_fi != res->fi)
342 break;
343 } else if (!fib_detect_death(fi, order, &last_resort,
Denis V. Lunev971b8932007-12-08 00:32:23 -0800344 &last_idx, tb->tb_default)) {
Denis V. Luneva2bbe682007-12-08 00:31:44 -0800345 fib_result_assign(res, fi);
Denis V. Lunev971b8932007-12-08 00:32:23 -0800346 tb->tb_default = order;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347 goto out;
348 }
349 fi = next_fi;
350 order++;
351 }
352 }
353
354 if (order <= 0 || fi == NULL) {
Denis V. Lunev971b8932007-12-08 00:32:23 -0800355 tb->tb_default = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356 goto out;
357 }
358
Denis V. Lunev971b8932007-12-08 00:32:23 -0800359 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
360 tb->tb_default)) {
Denis V. Luneva2bbe682007-12-08 00:31:44 -0800361 fib_result_assign(res, fi);
Denis V. Lunev971b8932007-12-08 00:32:23 -0800362 tb->tb_default = order;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 goto out;
364 }
365
Denis V. Luneva2bbe682007-12-08 00:31:44 -0800366 if (last_idx >= 0)
367 fib_result_assign(res, last_resort);
Denis V. Lunev971b8932007-12-08 00:32:23 -0800368 tb->tb_default = last_idx;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369out:
Eric Dumazet19f57252010-10-14 20:56:39 +0000370 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371}
372
373/* Insert node F to FZ. */
374static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
375{
376 struct hlist_head *head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
377
Eric Dumazet19f57252010-10-14 20:56:39 +0000378 hlist_add_head_rcu(&f->fn_hash, head);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379}
380
381/* Return the node in FZ matching KEY. */
Al Virob6e80c62006-09-26 22:20:01 -0700382static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383{
384 struct hlist_head *head = &fz->fz_hash[fn_hash(key, fz)];
385 struct hlist_node *node;
386 struct fib_node *f;
387
Eric Dumazet19f57252010-10-14 20:56:39 +0000388 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389 if (f->fn_key == key)
390 return f;
391 }
392
393 return NULL;
394}
395
Eric Dumazet19f57252010-10-14 20:56:39 +0000396
397static struct fib_alias *fib_fast_alloc(struct fib_node *f)
398{
399 struct fib_alias *fa = &f->fn_embedded_alias;
400
401 if (fa->fa_info != NULL)
402 fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
403 return fa;
404}
405
Eric Dumazet117a8cd2010-10-14 20:53:34 +0000406/* Caller must hold RTNL. */
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000407int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408{
409 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
Adrian Bunk94cb1502008-02-19 16:28:54 -0800410 struct fib_node *new_f = NULL;
411 struct fib_node *f;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 struct fib_alias *fa, *new_fa;
413 struct fn_zone *fz;
414 struct fib_info *fi;
Thomas Graf4e902c52006-08-17 18:14:52 -0700415 u8 tos = cfg->fc_tos;
Al Virob6e80c62006-09-26 22:20:01 -0700416 __be32 key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417 int err;
418
Thomas Graf4e902c52006-08-17 18:14:52 -0700419 if (cfg->fc_dst_len > 32)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700421
422 fz = table->fn_zones[cfg->fc_dst_len];
423 if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424 return -ENOBUFS;
425
426 key = 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700427 if (cfg->fc_dst) {
428 if (cfg->fc_dst & ~FZ_MASK(fz))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700430 key = fz_key(cfg->fc_dst, fz);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 }
432
Thomas Graf4e902c52006-08-17 18:14:52 -0700433 fi = fib_create_info(cfg);
434 if (IS_ERR(fi))
435 return PTR_ERR(fi);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436
437 if (fz->fz_nent > (fz->fz_divisor<<1) &&
438 fz->fz_divisor < FZ_MAX_DIVISOR &&
Thomas Graf4e902c52006-08-17 18:14:52 -0700439 (cfg->fc_dst_len == 32 ||
440 (1 << cfg->fc_dst_len) > fz->fz_divisor))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 fn_rehash_zone(fz);
442
443 f = fib_find_node(fz, key);
444
445 if (!f)
446 fa = NULL;
447 else
448 fa = fib_find_alias(&f->fn_alias, tos, fi->fib_priority);
449
450 /* Now fa, if non-NULL, points to the first fib alias
451 * with the same keys [prefix,tos,priority], if such key already
452 * exists or to the node before which we will insert new one.
453 *
454 * If fa is NULL, we will need to allocate a new one and
455 * insert to the head of f.
456 *
457 * If f is NULL, no fib node matched the destination key
458 * and we need to allocate a new one of those as well.
459 */
460
461 if (fa && fa->fa_tos == tos &&
462 fa->fa_info->fib_priority == fi->fib_priority) {
Julian Anastasovc18865f2008-01-28 21:14:10 -0800463 struct fib_alias *fa_first, *fa_match;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464
465 err = -EEXIST;
Thomas Graf4e902c52006-08-17 18:14:52 -0700466 if (cfg->fc_nlflags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467 goto out;
468
Julian Anastasovc18865f2008-01-28 21:14:10 -0800469 /* We have 2 goals:
470 * 1. Find exact match for type, scope, fib_info to avoid
471 * duplicate routes
472 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
473 */
474 fa_match = NULL;
475 fa_first = fa;
476 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
477 list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
478 if (fa->fa_tos != tos)
479 break;
480 if (fa->fa_info->fib_priority != fi->fib_priority)
481 break;
482 if (fa->fa_type == cfg->fc_type &&
483 fa->fa_scope == cfg->fc_scope &&
484 fa->fa_info == fi) {
485 fa_match = fa;
486 break;
487 }
488 }
489
Thomas Graf4e902c52006-08-17 18:14:52 -0700490 if (cfg->fc_nlflags & NLM_F_REPLACE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491 u8 state;
492
Julian Anastasovc18865f2008-01-28 21:14:10 -0800493 fa = fa_first;
494 if (fa_match) {
495 if (fa == fa_match)
496 err = 0;
Joonwoo Parkbd566e72008-01-18 03:44:48 -0800497 goto out;
Julian Anastasovc18865f2008-01-28 21:14:10 -0800498 }
Eric Dumazet19f57252010-10-14 20:56:39 +0000499 err = -ENOBUFS;
500 new_fa = fib_fast_alloc(f);
501 if (new_fa == NULL)
502 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503
Eric Dumazet19f57252010-10-14 20:56:39 +0000504 new_fa->fa_tos = fa->fa_tos;
505 new_fa->fa_info = fi;
506 new_fa->fa_type = cfg->fc_type;
507 new_fa->fa_scope = cfg->fc_scope;
508 state = fa->fa_state;
509 new_fa->fa_state = state & ~FA_S_ACCESSED;
510 fib_hash_genid++;
511 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
512
513 fn_free_alias(fa, f);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514 if (state & FA_S_ACCESSED)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700515 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
Eric Dumazet19f57252010-10-14 20:56:39 +0000516 rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len,
517 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518 return 0;
519 }
520
521 /* Error if we find a perfect match which
522 * uses the same scope, type, and nexthop
523 * information.
524 */
Julian Anastasovc18865f2008-01-28 21:14:10 -0800525 if (fa_match)
526 goto out;
527
Thomas Graf4e902c52006-08-17 18:14:52 -0700528 if (!(cfg->fc_nlflags & NLM_F_APPEND))
Julian Anastasovc18865f2008-01-28 21:14:10 -0800529 fa = fa_first;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530 }
531
532 err = -ENOENT;
Thomas Graf4e902c52006-08-17 18:14:52 -0700533 if (!(cfg->fc_nlflags & NLM_F_CREATE))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534 goto out;
535
536 err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 if (!f) {
Eric Dumazeta6501e02008-01-18 03:33:26 -0800539 new_f = kmem_cache_zalloc(fn_hash_kmem, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540 if (new_f == NULL)
Eric Dumazeta6501e02008-01-18 03:33:26 -0800541 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542
543 INIT_HLIST_NODE(&new_f->fn_hash);
544 INIT_LIST_HEAD(&new_f->fn_alias);
545 new_f->fn_key = key;
546 f = new_f;
547 }
548
Eric Dumazet19f57252010-10-14 20:56:39 +0000549 new_fa = fib_fast_alloc(f);
550 if (new_fa == NULL)
551 goto out;
552
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553 new_fa->fa_info = fi;
554 new_fa->fa_tos = tos;
Thomas Graf4e902c52006-08-17 18:14:52 -0700555 new_fa->fa_type = cfg->fc_type;
556 new_fa->fa_scope = cfg->fc_scope;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557 new_fa->fa_state = 0;
558
559 /*
560 * Insert new entry to the list.
561 */
562
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563 if (new_f)
564 fib_insert_node(fz, new_f);
Eric Dumazet19f57252010-10-14 20:56:39 +0000565 list_add_tail_rcu(&new_fa->fa_list,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 (fa ? &fa->fa_list : &f->fn_alias));
567 fib_hash_genid++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568
569 if (new_f)
570 fz->fz_nent++;
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700571 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572
Thomas Graf4e902c52006-08-17 18:14:52 -0700573 rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -0700574 &cfg->fc_nlinfo, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575 return 0;
576
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577out:
Adrian Bunk94cb1502008-02-19 16:28:54 -0800578 if (new_f)
579 kmem_cache_free(fn_hash_kmem, new_f);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580 fib_release_info(fi);
581 return err;
582}
583
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000584int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585{
Jianjun Kong6ed25332008-11-03 00:25:16 -0800586 struct fn_hash *table = (struct fn_hash *)tb->tb_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587 struct fib_node *f;
588 struct fib_alias *fa, *fa_to_delete;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 struct fn_zone *fz;
Al Virob6e80c62006-09-26 22:20:01 -0700590 __be32 key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591
Thomas Graf4e902c52006-08-17 18:14:52 -0700592 if (cfg->fc_dst_len > 32)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700594
595 if ((fz = table->fn_zones[cfg->fc_dst_len]) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596 return -ESRCH;
597
598 key = 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700599 if (cfg->fc_dst) {
600 if (cfg->fc_dst & ~FZ_MASK(fz))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700602 key = fz_key(cfg->fc_dst, fz);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 }
604
605 f = fib_find_node(fz, key);
606
607 if (!f)
608 fa = NULL;
609 else
Thomas Graf4e902c52006-08-17 18:14:52 -0700610 fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611 if (!fa)
612 return -ESRCH;
613
614 fa_to_delete = NULL;
615 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
616 list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
617 struct fib_info *fi = fa->fa_info;
618
Thomas Graf4e902c52006-08-17 18:14:52 -0700619 if (fa->fa_tos != cfg->fc_tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 break;
621
Thomas Graf4e902c52006-08-17 18:14:52 -0700622 if ((!cfg->fc_type ||
623 fa->fa_type == cfg->fc_type) &&
624 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
625 fa->fa_scope == cfg->fc_scope) &&
626 (!cfg->fc_protocol ||
627 fi->fib_protocol == cfg->fc_protocol) &&
628 fib_nh_match(cfg, fi) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629 fa_to_delete = fa;
630 break;
631 }
632 }
633
634 if (fa_to_delete) {
635 int kill_fn;
636
637 fa = fa_to_delete;
Thomas Graf4e902c52006-08-17 18:14:52 -0700638 rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len,
Milan Kocianb8f55832007-05-23 14:55:06 -0700639 tb->tb_id, &cfg->fc_nlinfo, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640
641 kill_fn = 0;
Eric Dumazet19f57252010-10-14 20:56:39 +0000642 list_del_rcu(&fa->fa_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643 if (list_empty(&f->fn_alias)) {
Eric Dumazet19f57252010-10-14 20:56:39 +0000644 hlist_del_rcu(&f->fn_hash);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 kill_fn = 1;
646 }
647 fib_hash_genid++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648
649 if (fa->fa_state & FA_S_ACCESSED)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700650 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
Eric Dumazeta6501e02008-01-18 03:33:26 -0800651 fn_free_alias(fa, f);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652 if (kill_fn) {
653 fn_free_node(f);
654 fz->fz_nent--;
655 }
656
657 return 0;
658 }
659 return -ESRCH;
660}
661
662static int fn_flush_list(struct fn_zone *fz, int idx)
663{
664 struct hlist_head *head = &fz->fz_hash[idx];
665 struct hlist_node *node, *n;
666 struct fib_node *f;
667 int found = 0;
668
669 hlist_for_each_entry_safe(f, node, n, head, fn_hash) {
670 struct fib_alias *fa, *fa_node;
671 int kill_f;
672
673 kill_f = 0;
674 list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) {
675 struct fib_info *fi = fa->fa_info;
676
677 if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
Eric Dumazet19f57252010-10-14 20:56:39 +0000678 list_del_rcu(&fa->fa_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679 if (list_empty(&f->fn_alias)) {
Eric Dumazet19f57252010-10-14 20:56:39 +0000680 hlist_del_rcu(&f->fn_hash);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681 kill_f = 1;
682 }
683 fib_hash_genid++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684
Eric Dumazeta6501e02008-01-18 03:33:26 -0800685 fn_free_alias(fa, f);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686 found++;
687 }
688 }
689 if (kill_f) {
690 fn_free_node(f);
691 fz->fz_nent--;
692 }
693 }
694 return found;
695}
696
Eric Dumazet117a8cd2010-10-14 20:53:34 +0000697/* caller must hold RTNL. */
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000698int fib_table_flush(struct fib_table *tb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699{
700 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
701 struct fn_zone *fz;
702 int found = 0;
703
Eric Dumazet117a8cd2010-10-14 20:53:34 +0000704 for (fz = rtnl_dereference(table->fn_zone_list);
705 fz != NULL;
706 fz = rtnl_dereference(fz->fz_next)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707 int i;
708
709 for (i = fz->fz_divisor - 1; i >= 0; i--)
710 found += fn_flush_list(fz, i);
711 }
712 return found;
713}
714
715
716static inline int
717fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
718 struct fib_table *tb,
719 struct fn_zone *fz,
720 struct hlist_head *head)
721{
722 struct hlist_node *node;
723 struct fib_node *f;
724 int i, s_i;
725
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700726 s_i = cb->args[4];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727 i = 0;
Eric Dumazet19f57252010-10-14 20:56:39 +0000728 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729 struct fib_alias *fa;
730
Eric Dumazet19f57252010-10-14 20:56:39 +0000731 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 if (i < s_i)
733 goto next;
734
735 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
736 cb->nlh->nlmsg_seq,
737 RTM_NEWROUTE,
738 tb->tb_id,
739 fa->fa_type,
740 fa->fa_scope,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700741 f->fn_key,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742 fz->fz_order,
743 fa->fa_tos,
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -0700744 fa->fa_info,
745 NLM_F_MULTI) < 0) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700746 cb->args[4] = i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 return -1;
748 }
Eric Dumazet19f57252010-10-14 20:56:39 +0000749next:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 i++;
751 }
752 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700753 cb->args[4] = i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 return skb->len;
755}
756
757static inline int
758fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
759 struct fib_table *tb,
760 struct fn_zone *fz)
761{
762 int h, s_h;
763
Eric Dumazet8d3f0992008-01-18 04:30:21 -0800764 if (fz->fz_hash == NULL)
765 return skb->len;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700766 s_h = cb->args[3];
Eric Dumazet8d3f0992008-01-18 04:30:21 -0800767 for (h = s_h; h < fz->fz_divisor; h++) {
768 if (hlist_empty(&fz->fz_hash[h]))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769 continue;
Eric Dumazet8d3f0992008-01-18 04:30:21 -0800770 if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h]) < 0) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700771 cb->args[3] = h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 return -1;
773 }
Eric Dumazet8d3f0992008-01-18 04:30:21 -0800774 memset(&cb->args[4], 0,
775 sizeof(cb->args) - 4*sizeof(cb->args[0]));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700777 cb->args[3] = h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778 return skb->len;
779}
780
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000781int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
782 struct netlink_callback *cb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783{
Eric Dumazet117a8cd2010-10-14 20:53:34 +0000784 int m = 0, s_m;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785 struct fn_zone *fz;
Jianjun Kong6ed25332008-11-03 00:25:16 -0800786 struct fn_hash *table = (struct fn_hash *)tb->tb_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700788 s_m = cb->args[2];
Eric Dumazet117a8cd2010-10-14 20:53:34 +0000789 rcu_read_lock();
Eric Dumazet117a8cd2010-10-14 20:53:34 +0000790 for (fz = rcu_dereference(table->fn_zone_list);
791 fz != NULL;
792 fz = rcu_dereference(fz->fz_next), m++) {
793 if (m < s_m)
794 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795 if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700796 cb->args[2] = m;
Eric Dumazet117a8cd2010-10-14 20:53:34 +0000797 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 return -1;
799 }
Eric Dumazet8d3f0992008-01-18 04:30:21 -0800800 memset(&cb->args[3], 0,
801 sizeof(cb->args) - 3*sizeof(cb->args[0]));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 }
Eric Dumazet117a8cd2010-10-14 20:53:34 +0000803 rcu_read_unlock();
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700804 cb->args[2] = m;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805 return skb->len;
806}
807
Stephen Hemminger7f9b8052008-01-14 23:14:20 -0800808void __init fib_hash_init(void)
809{
810 fn_hash_kmem = kmem_cache_create("ip_fib_hash", sizeof(struct fib_node),
Eric Dumazeta6501e02008-01-18 03:33:26 -0800811 0, SLAB_PANIC, NULL);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -0800812
813 fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias),
Eric Dumazeta6501e02008-01-18 03:33:26 -0800814 0, SLAB_PANIC, NULL);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -0800815
816}
817
818struct fib_table *fib_hash_table(u32 id)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819{
820 struct fib_table *tb;
821
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash),
823 GFP_KERNEL);
824 if (tb == NULL)
825 return NULL;
826
827 tb->tb_id = id;
Denis V. Lunev971b8932007-12-08 00:32:23 -0800828 tb->tb_default = -1;
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000829
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830 memset(tb->tb_data, 0, sizeof(struct fn_hash));
831 return tb;
832}
833
834/* ------------------------------------------------------------------------ */
835#ifdef CONFIG_PROC_FS
836
837struct fib_iter_state {
Denis V. Lunev6e04d012008-01-10 03:26:50 -0800838 struct seq_net_private p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 struct fn_zone *zone;
840 int bucket;
841 struct hlist_head *hash_head;
842 struct fib_node *fn;
843 struct fib_alias *fa;
844 loff_t pos;
845 unsigned int genid;
846 int valid;
847};
848
849static struct fib_alias *fib_get_first(struct seq_file *seq)
850{
851 struct fib_iter_state *iter = seq->private;
Denis V. Lunev6e04d012008-01-10 03:26:50 -0800852 struct fib_table *main_table;
853 struct fn_hash *table;
854
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +0900855 main_table = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
Denis V. Lunev6e04d012008-01-10 03:26:50 -0800856 table = (struct fn_hash *)main_table->tb_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857
858 iter->bucket = 0;
859 iter->hash_head = NULL;
860 iter->fn = NULL;
861 iter->fa = NULL;
862 iter->pos = 0;
863 iter->genid = fib_hash_genid;
864 iter->valid = 1;
865
Eric Dumazet117a8cd2010-10-14 20:53:34 +0000866 for (iter->zone = rcu_dereference(table->fn_zone_list);
867 iter->zone != NULL;
868 iter->zone = rcu_dereference(iter->zone->fz_next)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 int maxslot;
870
871 if (!iter->zone->fz_nent)
872 continue;
873
874 iter->hash_head = iter->zone->fz_hash;
875 maxslot = iter->zone->fz_divisor;
876
877 for (iter->bucket = 0; iter->bucket < maxslot;
878 ++iter->bucket, ++iter->hash_head) {
879 struct hlist_node *node;
880 struct fib_node *fn;
881
Jianjun Kong6ed25332008-11-03 00:25:16 -0800882 hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883 struct fib_alias *fa;
884
Jianjun Kong6ed25332008-11-03 00:25:16 -0800885 list_for_each_entry(fa, &fn->fn_alias, fa_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 iter->fn = fn;
887 iter->fa = fa;
888 goto out;
889 }
890 }
891 }
892 }
893out:
894 return iter->fa;
895}
896
897static struct fib_alias *fib_get_next(struct seq_file *seq)
898{
899 struct fib_iter_state *iter = seq->private;
900 struct fib_node *fn;
901 struct fib_alias *fa;
902
903 /* Advance FA, if any. */
904 fn = iter->fn;
905 fa = iter->fa;
906 if (fa) {
907 BUG_ON(!fn);
908 list_for_each_entry_continue(fa, &fn->fn_alias, fa_list) {
909 iter->fa = fa;
910 goto out;
911 }
912 }
913
914 fa = iter->fa = NULL;
915
916 /* Advance FN. */
917 if (fn) {
918 struct hlist_node *node = &fn->fn_hash;
919 hlist_for_each_entry_continue(fn, node, fn_hash) {
920 iter->fn = fn;
921
922 list_for_each_entry(fa, &fn->fn_alias, fa_list) {
923 iter->fa = fa;
924 goto out;
925 }
926 }
927 }
928
929 fn = iter->fn = NULL;
930
931 /* Advance hash chain. */
932 if (!iter->zone)
933 goto out;
934
935 for (;;) {
936 struct hlist_node *node;
937 int maxslot;
938
939 maxslot = iter->zone->fz_divisor;
940
941 while (++iter->bucket < maxslot) {
942 iter->hash_head++;
943
944 hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
945 list_for_each_entry(fa, &fn->fn_alias, fa_list) {
946 iter->fn = fn;
947 iter->fa = fa;
948 goto out;
949 }
950 }
951 }
952
Eric Dumazet117a8cd2010-10-14 20:53:34 +0000953 iter->zone = rcu_dereference(iter->zone->fz_next);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954
955 if (!iter->zone)
956 goto out;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900957
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958 iter->bucket = 0;
959 iter->hash_head = iter->zone->fz_hash;
960
961 hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
962 list_for_each_entry(fa, &fn->fn_alias, fa_list) {
963 iter->fn = fn;
964 iter->fa = fa;
965 goto out;
966 }
967 }
968 }
969out:
970 iter->pos++;
971 return fa;
972}
973
974static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos)
975{
976 struct fib_iter_state *iter = seq->private;
977 struct fib_alias *fa;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900978
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979 if (iter->valid && pos >= iter->pos && iter->genid == fib_hash_genid) {
980 fa = iter->fa;
981 pos -= iter->pos;
982 } else
983 fa = fib_get_first(seq);
984
985 if (fa)
986 while (pos && (fa = fib_get_next(seq)))
987 --pos;
988 return pos ? NULL : fa;
989}
990
991static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazet117a8cd2010-10-14 20:53:34 +0000992 __acquires(RCU)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993{
994 void *v = NULL;
995
Eric Dumazet117a8cd2010-10-14 20:53:34 +0000996 rcu_read_lock();
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +0900997 if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
999 return v;
1000}
1001
1002static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1003{
1004 ++*pos;
1005 return v == SEQ_START_TOKEN ? fib_get_first(seq) : fib_get_next(seq);
1006}
1007
1008static void fib_seq_stop(struct seq_file *seq, void *v)
Eric Dumazet117a8cd2010-10-14 20:53:34 +00001009 __releases(RCU)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010{
Eric Dumazet117a8cd2010-10-14 20:53:34 +00001011 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012}
1013
Al Virob6e80c62006-09-26 22:20:01 -07001014static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015{
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -08001016 static const unsigned type2flags[RTN_MAX + 1] = {
Eric Dumazet19f57252010-10-14 20:56:39 +00001017 [7] = RTF_REJECT,
1018 [8] = RTF_REJECT,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 };
1020 unsigned flags = type2flags[type];
1021
1022 if (fi && fi->fib_nh->nh_gw)
1023 flags |= RTF_GATEWAY;
Al Virob6e80c62006-09-26 22:20:01 -07001024 if (mask == htonl(0xFFFFFFFF))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025 flags |= RTF_HOST;
1026 flags |= RTF_UP;
1027 return flags;
1028}
1029
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001030/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031 * This outputs /proc/net/route.
1032 *
1033 * It always works in backward compatibility mode.
1034 * The format of the file is not supposed to be changed.
1035 */
1036static int fib_seq_show(struct seq_file *seq, void *v)
1037{
1038 struct fib_iter_state *iter;
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07001039 int len;
Al Virob6e80c62006-09-26 22:20:01 -07001040 __be32 prefix, mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041 unsigned flags;
1042 struct fib_node *f;
1043 struct fib_alias *fa;
1044 struct fib_info *fi;
1045
1046 if (v == SEQ_START_TOKEN) {
1047 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
1048 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
1049 "\tWindow\tIRTT");
1050 goto out;
1051 }
1052
1053 iter = seq->private;
1054 f = iter->fn;
1055 fa = iter->fa;
1056 fi = fa->fa_info;
1057 prefix = f->fn_key;
1058 mask = FZ_MASK(iter->zone);
1059 flags = fib_flag_trans(fa->fa_type, mask, fi);
1060 if (fi)
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07001061 seq_printf(seq,
1062 "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063 fi->fib_dev ? fi->fib_dev->name : "*", prefix,
1064 fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority,
1065 mask, (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
1066 fi->fib_window,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07001067 fi->fib_rtt >> 3, &len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 else
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07001069 seq_printf(seq,
1070 "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n",
1071 prefix, 0, flags, 0, 0, 0, mask, 0, 0, 0, &len);
1072
1073 seq_printf(seq, "%*s\n", 127 - len, "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074out:
1075 return 0;
1076}
1077
Stephen Hemmingerf6908082007-03-12 14:34:29 -07001078static const struct seq_operations fib_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079 .start = fib_seq_start,
1080 .next = fib_seq_next,
1081 .stop = fib_seq_stop,
1082 .show = fib_seq_show,
1083};
1084
1085static int fib_seq_open(struct inode *inode, struct file *file)
1086{
Denis V. Lunev6e04d012008-01-10 03:26:50 -08001087 return seq_open_net(inode, file, &fib_seq_ops,
1088 sizeof(struct fib_iter_state));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089}
1090
Arjan van de Ven9a321442007-02-12 00:55:35 -08001091static const struct file_operations fib_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092 .owner = THIS_MODULE,
1093 .open = fib_seq_open,
1094 .read = seq_read,
1095 .llseek = seq_lseek,
Denis V. Lunev6e04d012008-01-10 03:26:50 -08001096 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097};
1098
Denis V. Lunev61a02652008-01-10 03:21:09 -08001099int __net_init fib_proc_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100{
Denis V. Lunev61a02652008-01-10 03:21:09 -08001101 if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102 return -ENOMEM;
1103 return 0;
1104}
1105
Denis V. Lunev61a02652008-01-10 03:21:09 -08001106void __net_exit fib_proc_exit(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107{
Denis V. Lunev61a02652008-01-10 03:21:09 -08001108 proc_net_remove(net, "route");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109}
1110#endif /* CONFIG_PROC_FS */