blob: ba87419716290204d4f1708ac978f74183c09804 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 */
11
12/*
13 Comparing to general packet classification problem,
14 RSVP needs only sevaral relatively simple rules:
15
16 * (dst, protocol) are always specified,
17 so that we are able to hash them.
18 * src may be exact, or may be wildcard, so that
19 we can keep a hash table plus one wildcard entry.
20 * source port (or flow label) is important only if src is given.
21
22 IMPLEMENTATION.
23
24 We use a two level hash table: The top level is keyed by
25 destination address and protocol ID, every bucket contains a list
26 of "rsvp sessions", identified by destination address, protocol and
27 DPI(="Destination Port ID"): triple (key, mask, offset).
28
29 Every bucket has a smaller hash table keyed by source address
30 (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31 Every bucket is again a list of "RSVP flows", selected by
32 source address and SPI(="Source Port ID" here rather than
33 "security parameter index"): triple (key, mask, offset).
34
35
36 NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37 and all fragmented packets go to the best-effort traffic class.
38
39
40 NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41 only one "Generalized Port Identifier". So that for classic
42 ah, esp (and udp,tcp) both *pi should coincide or one of them
43 should be wildcard.
44
45 At first sight, this redundancy is just a waste of CPU
46 resources. But DPI and SPI add the possibility to assign different
47 priorities to GPIs. Look also at note 4 about tunnels below.
48
49
50 NOTE 3. One complication is the case of tunneled packets.
51 We implement it as following: if the first lookup
52 matches a special session with "tunnelhdr" value not zero,
53 flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54 In this case, we pull tunnelhdr bytes and restart lookup
55 with tunnel ID added to the list of keys. Simple and stupid 8)8)
56 It's enough for PIMREG and IPIP.
57
58
59 NOTE 4. Two GPIs make it possible to parse even GRE packets.
60 F.e. DPI can select ETH_P_IP (and necessary flags to make
61 tunnelhdr correct) in GRE protocol field and SPI matches
62 GRE key. Is it not nice? 8)8)
63
64
65 Well, as result, despite its simplicity, we get a pretty
66 powerful classification engine. */
67
Linus Torvalds1da177e2005-04-16 15:20:36 -070068
69struct rsvp_head
70{
71 u32 tmap[256/32];
72 u32 hgenerator;
73 u8 tgenerator;
74 struct rsvp_session *ht[256];
75};
76
77struct rsvp_session
78{
79 struct rsvp_session *next;
80 u32 dst[RSVP_DST_LEN];
81 struct tc_rsvp_gpi dpi;
82 u8 protocol;
83 u8 tunnelid;
84 /* 16 (src,sport) hash slots, and one wildcard source slot */
85 struct rsvp_filter *ht[16+1];
86};
87
88
89struct rsvp_filter
90{
91 struct rsvp_filter *next;
92 u32 src[RSVP_DST_LEN];
93 struct tc_rsvp_gpi spi;
94 u8 tunnelhdr;
95
96 struct tcf_result res;
97 struct tcf_exts exts;
98
99 u32 handle;
100 struct rsvp_session *sess;
101};
102
103static __inline__ unsigned hash_dst(u32 *dst, u8 protocol, u8 tunnelid)
104{
105 unsigned h = dst[RSVP_DST_LEN-1];
106 h ^= h>>16;
107 h ^= h>>8;
108 return (h ^ protocol ^ tunnelid) & 0xFF;
109}
110
111static __inline__ unsigned hash_src(u32 *src)
112{
113 unsigned h = src[RSVP_DST_LEN-1];
114 h ^= h>>16;
115 h ^= h>>8;
116 h ^= h>>4;
117 return h & 0xF;
118}
119
120static struct tcf_ext_map rsvp_ext_map = {
121 .police = TCA_RSVP_POLICE,
122 .action = TCA_RSVP_ACT
123};
124
125#define RSVP_APPLY_RESULT() \
126{ \
127 int r = tcf_exts_exec(skb, &f->exts, res); \
128 if (r < 0) \
129 continue; \
130 else if (r > 0) \
131 return r; \
132}
133
134static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
135 struct tcf_result *res)
136{
137 struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
138 struct rsvp_session *s;
139 struct rsvp_filter *f;
140 unsigned h1, h2;
141 u32 *dst, *src;
142 u8 protocol;
143 u8 tunnelid = 0;
144 u8 *xprt;
145#if RSVP_DST_LEN == 4
146 struct ipv6hdr *nhptr = skb->nh.ipv6h;
147#else
148 struct iphdr *nhptr = skb->nh.iph;
149#endif
150
151restart:
152
153#if RSVP_DST_LEN == 4
154 src = &nhptr->saddr.s6_addr32[0];
155 dst = &nhptr->daddr.s6_addr32[0];
156 protocol = nhptr->nexthdr;
157 xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
158#else
159 src = &nhptr->saddr;
160 dst = &nhptr->daddr;
161 protocol = nhptr->protocol;
162 xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
163 if (nhptr->frag_off&__constant_htons(IP_MF|IP_OFFSET))
164 return -1;
165#endif
166
167 h1 = hash_dst(dst, protocol, tunnelid);
168 h2 = hash_src(src);
169
170 for (s = sht[h1]; s; s = s->next) {
171 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
172 protocol == s->protocol &&
173 !(s->dpi.mask & (*(u32*)(xprt+s->dpi.offset)^s->dpi.key))
174#if RSVP_DST_LEN == 4
175 && dst[0] == s->dst[0]
176 && dst[1] == s->dst[1]
177 && dst[2] == s->dst[2]
178#endif
179 && tunnelid == s->tunnelid) {
180
181 for (f = s->ht[h2]; f; f = f->next) {
182 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
183 !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
184#if RSVP_DST_LEN == 4
185 && src[0] == f->src[0]
186 && src[1] == f->src[1]
187 && src[2] == f->src[2]
188#endif
189 ) {
190 *res = f->res;
191 RSVP_APPLY_RESULT();
192
193matched:
194 if (f->tunnelhdr == 0)
195 return 0;
196
197 tunnelid = f->res.classid;
198 nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
199 goto restart;
200 }
201 }
202
203 /* And wildcard bucket... */
204 for (f = s->ht[16]; f; f = f->next) {
205 *res = f->res;
206 RSVP_APPLY_RESULT();
207 goto matched;
208 }
209 return -1;
210 }
211 }
212 return -1;
213}
214
215static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
216{
217 struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
218 struct rsvp_session *s;
219 struct rsvp_filter *f;
220 unsigned h1 = handle&0xFF;
221 unsigned h2 = (handle>>8)&0xFF;
222
223 if (h2 > 16)
224 return 0;
225
226 for (s = sht[h1]; s; s = s->next) {
227 for (f = s->ht[h2]; f; f = f->next) {
228 if (f->handle == handle)
229 return (unsigned long)f;
230 }
231 }
232 return 0;
233}
234
235static void rsvp_put(struct tcf_proto *tp, unsigned long f)
236{
237}
238
239static int rsvp_init(struct tcf_proto *tp)
240{
241 struct rsvp_head *data;
242
243 data = kmalloc(sizeof(struct rsvp_head), GFP_KERNEL);
244 if (data) {
245 memset(data, 0, sizeof(struct rsvp_head));
246 tp->root = data;
247 return 0;
248 }
249 return -ENOBUFS;
250}
251
252static inline void
253rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
254{
255 tcf_unbind_filter(tp, &f->res);
256 tcf_exts_destroy(tp, &f->exts);
257 kfree(f);
258}
259
260static void rsvp_destroy(struct tcf_proto *tp)
261{
262 struct rsvp_head *data = xchg(&tp->root, NULL);
263 struct rsvp_session **sht;
264 int h1, h2;
265
266 if (data == NULL)
267 return;
268
269 sht = data->ht;
270
271 for (h1=0; h1<256; h1++) {
272 struct rsvp_session *s;
273
274 while ((s = sht[h1]) != NULL) {
275 sht[h1] = s->next;
276
277 for (h2=0; h2<=16; h2++) {
278 struct rsvp_filter *f;
279
280 while ((f = s->ht[h2]) != NULL) {
281 s->ht[h2] = f->next;
282 rsvp_delete_filter(tp, f);
283 }
284 }
285 kfree(s);
286 }
287 }
288 kfree(data);
289}
290
291static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
292{
293 struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
294 unsigned h = f->handle;
295 struct rsvp_session **sp;
296 struct rsvp_session *s = f->sess;
297 int i;
298
299 for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
300 if (*fp == f) {
301 tcf_tree_lock(tp);
302 *fp = f->next;
303 tcf_tree_unlock(tp);
304 rsvp_delete_filter(tp, f);
305
306 /* Strip tree */
307
308 for (i=0; i<=16; i++)
309 if (s->ht[i])
310 return 0;
311
312 /* OK, session has no flows */
313 for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
314 *sp; sp = &(*sp)->next) {
315 if (*sp == s) {
316 tcf_tree_lock(tp);
317 *sp = s->next;
318 tcf_tree_unlock(tp);
319
320 kfree(s);
321 return 0;
322 }
323 }
324
325 return 0;
326 }
327 }
328 return 0;
329}
330
331static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
332{
333 struct rsvp_head *data = tp->root;
334 int i = 0xFFFF;
335
336 while (i-- > 0) {
337 u32 h;
338 if ((data->hgenerator += 0x10000) == 0)
339 data->hgenerator = 0x10000;
340 h = data->hgenerator|salt;
341 if (rsvp_get(tp, h) == 0)
342 return h;
343 }
344 return 0;
345}
346
347static int tunnel_bts(struct rsvp_head *data)
348{
349 int n = data->tgenerator>>5;
350 u32 b = 1<<(data->tgenerator&0x1F);
351
352 if (data->tmap[n]&b)
353 return 0;
354 data->tmap[n] |= b;
355 return 1;
356}
357
358static void tunnel_recycle(struct rsvp_head *data)
359{
360 struct rsvp_session **sht = data->ht;
361 u32 tmap[256/32];
362 int h1, h2;
363
364 memset(tmap, 0, sizeof(tmap));
365
366 for (h1=0; h1<256; h1++) {
367 struct rsvp_session *s;
368 for (s = sht[h1]; s; s = s->next) {
369 for (h2=0; h2<=16; h2++) {
370 struct rsvp_filter *f;
371
372 for (f = s->ht[h2]; f; f = f->next) {
373 if (f->tunnelhdr == 0)
374 continue;
375 data->tgenerator = f->res.classid;
376 tunnel_bts(data);
377 }
378 }
379 }
380 }
381
382 memcpy(data->tmap, tmap, sizeof(tmap));
383}
384
385static u32 gen_tunnel(struct rsvp_head *data)
386{
387 int i, k;
388
389 for (k=0; k<2; k++) {
390 for (i=255; i>0; i--) {
391 if (++data->tgenerator == 0)
392 data->tgenerator = 1;
393 if (tunnel_bts(data))
394 return data->tgenerator;
395 }
396 tunnel_recycle(data);
397 }
398 return 0;
399}
400
401static int rsvp_change(struct tcf_proto *tp, unsigned long base,
402 u32 handle,
403 struct rtattr **tca,
404 unsigned long *arg)
405{
406 struct rsvp_head *data = tp->root;
407 struct rsvp_filter *f, **fp;
408 struct rsvp_session *s, **sp;
409 struct tc_rsvp_pinfo *pinfo = NULL;
410 struct rtattr *opt = tca[TCA_OPTIONS-1];
411 struct rtattr *tb[TCA_RSVP_MAX];
412 struct tcf_exts e;
413 unsigned h1, h2;
414 u32 *dst;
415 int err;
416
417 if (opt == NULL)
418 return handle ? -EINVAL : 0;
419
420 if (rtattr_parse_nested(tb, TCA_RSVP_MAX, opt) < 0)
421 return -EINVAL;
422
423 err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
424 if (err < 0)
425 return err;
426
427 if ((f = (struct rsvp_filter*)*arg) != NULL) {
428 /* Node exists: adjust only classid */
429
430 if (f->handle != handle && handle)
431 goto errout2;
432 if (tb[TCA_RSVP_CLASSID-1]) {
433 f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
434 tcf_bind_filter(tp, &f->res, base);
435 }
436
437 tcf_exts_change(tp, &f->exts, &e);
438 return 0;
439 }
440
441 /* Now more serious part... */
442 err = -EINVAL;
443 if (handle)
444 goto errout2;
445 if (tb[TCA_RSVP_DST-1] == NULL)
446 goto errout2;
447
448 err = -ENOBUFS;
449 f = kmalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
450 if (f == NULL)
451 goto errout2;
452
453 memset(f, 0, sizeof(*f));
454 h2 = 16;
455 if (tb[TCA_RSVP_SRC-1]) {
456 err = -EINVAL;
457 if (RTA_PAYLOAD(tb[TCA_RSVP_SRC-1]) != sizeof(f->src))
458 goto errout;
459 memcpy(f->src, RTA_DATA(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
460 h2 = hash_src(f->src);
461 }
462 if (tb[TCA_RSVP_PINFO-1]) {
463 err = -EINVAL;
464 if (RTA_PAYLOAD(tb[TCA_RSVP_PINFO-1]) < sizeof(struct tc_rsvp_pinfo))
465 goto errout;
466 pinfo = RTA_DATA(tb[TCA_RSVP_PINFO-1]);
467 f->spi = pinfo->spi;
468 f->tunnelhdr = pinfo->tunnelhdr;
469 }
470 if (tb[TCA_RSVP_CLASSID-1]) {
471 err = -EINVAL;
472 if (RTA_PAYLOAD(tb[TCA_RSVP_CLASSID-1]) != 4)
473 goto errout;
474 f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
475 }
476
477 err = -EINVAL;
478 if (RTA_PAYLOAD(tb[TCA_RSVP_DST-1]) != sizeof(f->src))
479 goto errout;
480 dst = RTA_DATA(tb[TCA_RSVP_DST-1]);
481 h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
482
483 err = -ENOMEM;
484 if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
485 goto errout;
486
487 if (f->tunnelhdr) {
488 err = -EINVAL;
489 if (f->res.classid > 255)
490 goto errout;
491
492 err = -ENOMEM;
493 if (f->res.classid == 0 &&
494 (f->res.classid = gen_tunnel(data)) == 0)
495 goto errout;
496 }
497
498 for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
499 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
500 pinfo && pinfo->protocol == s->protocol &&
501 memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0
502#if RSVP_DST_LEN == 4
503 && dst[0] == s->dst[0]
504 && dst[1] == s->dst[1]
505 && dst[2] == s->dst[2]
506#endif
507 && pinfo->tunnelid == s->tunnelid) {
508
509insert:
510 /* OK, we found appropriate session */
511
512 fp = &s->ht[h2];
513
514 f->sess = s;
515 if (f->tunnelhdr == 0)
516 tcf_bind_filter(tp, &f->res, base);
517
518 tcf_exts_change(tp, &f->exts, &e);
519
520 for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
521 if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
522 break;
523 f->next = *fp;
524 wmb();
525 *fp = f;
526
527 *arg = (unsigned long)f;
528 return 0;
529 }
530 }
531
532 /* No session found. Create new one. */
533
534 err = -ENOBUFS;
535 s = kmalloc(sizeof(struct rsvp_session), GFP_KERNEL);
536 if (s == NULL)
537 goto errout;
538 memset(s, 0, sizeof(*s));
539 memcpy(s->dst, dst, sizeof(s->dst));
540
541 if (pinfo) {
542 s->dpi = pinfo->dpi;
543 s->protocol = pinfo->protocol;
544 s->tunnelid = pinfo->tunnelid;
545 }
546 for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
547 if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
548 break;
549 }
550 s->next = *sp;
551 wmb();
552 *sp = s;
553
554 goto insert;
555
556errout:
Jesper Juhla51482b2005-11-08 09:41:34 -0800557 kfree(f);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558errout2:
559 tcf_exts_destroy(tp, &e);
560 return err;
561}
562
563static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
564{
565 struct rsvp_head *head = tp->root;
566 unsigned h, h1;
567
568 if (arg->stop)
569 return;
570
571 for (h = 0; h < 256; h++) {
572 struct rsvp_session *s;
573
574 for (s = head->ht[h]; s; s = s->next) {
575 for (h1 = 0; h1 <= 16; h1++) {
576 struct rsvp_filter *f;
577
578 for (f = s->ht[h1]; f; f = f->next) {
579 if (arg->count < arg->skip) {
580 arg->count++;
581 continue;
582 }
583 if (arg->fn(tp, (unsigned long)f, arg) < 0) {
584 arg->stop = 1;
585 return;
586 }
587 arg->count++;
588 }
589 }
590 }
591 }
592}
593
594static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
595 struct sk_buff *skb, struct tcmsg *t)
596{
597 struct rsvp_filter *f = (struct rsvp_filter*)fh;
598 struct rsvp_session *s;
599 unsigned char *b = skb->tail;
600 struct rtattr *rta;
601 struct tc_rsvp_pinfo pinfo;
602
603 if (f == NULL)
604 return skb->len;
605 s = f->sess;
606
607 t->tcm_handle = f->handle;
608
609
610 rta = (struct rtattr*)b;
611 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
612
613 RTA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
614 pinfo.dpi = s->dpi;
615 pinfo.spi = f->spi;
616 pinfo.protocol = s->protocol;
617 pinfo.tunnelid = s->tunnelid;
618 pinfo.tunnelhdr = f->tunnelhdr;
Patrick McHardy8a470772005-06-28 12:56:45 -0700619 pinfo.pad = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 RTA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
621 if (f->res.classid)
622 RTA_PUT(skb, TCA_RSVP_CLASSID, 4, &f->res.classid);
623 if (((f->handle>>8)&0xFF) != 16)
624 RTA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
625
626 if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
627 goto rtattr_failure;
628
629 rta->rta_len = skb->tail - b;
630
631 if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
632 goto rtattr_failure;
633 return skb->len;
634
635rtattr_failure:
636 skb_trim(skb, b - skb->data);
637 return -1;
638}
639
640static struct tcf_proto_ops RSVP_OPS = {
641 .next = NULL,
642 .kind = RSVP_ID,
643 .classify = rsvp_classify,
644 .init = rsvp_init,
645 .destroy = rsvp_destroy,
646 .get = rsvp_get,
647 .put = rsvp_put,
648 .change = rsvp_change,
649 .delete = rsvp_delete,
650 .walk = rsvp_walk,
651 .dump = rsvp_dump,
652 .owner = THIS_MODULE,
653};
654
655static int __init init_rsvp(void)
656{
657 return register_tcf_proto_ops(&RSVP_OPS);
658}
659
660static void __exit exit_rsvp(void)
661{
662 unregister_tcf_proto_ops(&RSVP_OPS);
663}
664
665module_init(init_rsvp)
666module_exit(exit_rsvp)