blob: 7e9e658d4d93dd6c0d1270c6b25222e8f00234ef [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_netem.c Network emulator
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
Stephen Hemminger798b6b12006-10-22 20:16:57 -07007 * 2 of the License.
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 *
9 * Many of the algorithms and ideas for this came from
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090010 * NIST Net which is not copyrighted.
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 *
12 * Authors: Stephen Hemminger <shemminger@osdl.org>
13 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
14 */
15
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/module.h>
17#include <linux/bitops.h>
18#include <linux/types.h>
19#include <linux/kernel.h>
20#include <linux/errno.h>
21#include <linux/netdevice.h>
22#include <linux/skbuff.h>
23#include <linux/rtnetlink.h>
24
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070025#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026#include <net/pkt_sched.h>
27
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -080028#define VERSION "1.2"
Stephen Hemmingereb229c42005-11-03 13:49:01 -080029
Linus Torvalds1da177e2005-04-16 15:20:36 -070030/* Network Emulation Queuing algorithm.
31 ====================================
32
33 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
34 Network Emulation Tool
35 [2] Luigi Rizzo, DummyNet for FreeBSD
36
37 ----------------------------------------------------------------
38
39 This started out as a simple way to delay outgoing packets to
40 test TCP but has grown to include most of the functionality
41 of a full blown network emulator like NISTnet. It can delay
42 packets and add random jitter (and correlation). The random
43 distribution can be loaded from a table as well to provide
44 normal, Pareto, or experimental curves. Packet loss,
45 duplication, and reordering can also be emulated.
46
47 This qdisc does not do classification that can be handled in
48 layering other disciplines. It does not need to do bandwidth
49 control either since that can be handled by using token
50 bucket or other rate control.
51
52 The simulator is limited by the Linux timer resolution
53 and will create packet bursts on the HZ boundary (1ms).
54*/
55
56struct netem_sched_data {
57 struct Qdisc *qdisc;
Patrick McHardy59cb5c62007-03-16 01:20:31 -070058 struct qdisc_watchdog watchdog;
Linus Torvalds1da177e2005-04-16 15:20:36 -070059
Stephen Hemmingerb4076212007-03-22 12:16:21 -070060 psched_tdiff_t latency;
61 psched_tdiff_t jitter;
62
Linus Torvalds1da177e2005-04-16 15:20:36 -070063 u32 loss;
64 u32 limit;
65 u32 counter;
66 u32 gap;
Linus Torvalds1da177e2005-04-16 15:20:36 -070067 u32 duplicate;
Stephen Hemminger0dca51d2005-05-26 12:55:48 -070068 u32 reorder;
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -080069 u32 corrupt;
Linus Torvalds1da177e2005-04-16 15:20:36 -070070
71 struct crndstate {
Stephen Hemmingerb4076212007-03-22 12:16:21 -070072 u32 last;
73 u32 rho;
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -080074 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
76 struct disttable {
77 u32 size;
78 s16 table[0];
79 } *delay_dist;
80};
81
82/* Time stamp put into socket buffer control block */
83struct netem_skb_cb {
84 psched_time_t time_to_send;
85};
86
87/* init_crandom - initialize correlated random number generator
88 * Use entropy source for initial seed.
89 */
90static void init_crandom(struct crndstate *state, unsigned long rho)
91{
92 state->rho = rho;
93 state->last = net_random();
94}
95
96/* get_crandom - correlated random number generator
97 * Next number depends on last value.
98 * rho is scaled to avoid floating point.
99 */
Stephen Hemmingerb4076212007-03-22 12:16:21 -0700100static u32 get_crandom(struct crndstate *state)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101{
102 u64 value, rho;
103 unsigned long answer;
104
105 if (state->rho == 0) /* no correllation */
106 return net_random();
107
108 value = net_random();
109 rho = (u64)state->rho + 1;
110 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
111 state->last = answer;
112 return answer;
113}
114
115/* tabledist - return a pseudo-randomly distributed value with mean mu and
116 * std deviation sigma. Uses table lookup to approximate the desired
117 * distribution, and a uniformly-distributed pseudo-random source.
118 */
Stephen Hemmingerb4076212007-03-22 12:16:21 -0700119static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
120 struct crndstate *state,
121 const struct disttable *dist)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122{
Stephen Hemmingerb4076212007-03-22 12:16:21 -0700123 psched_tdiff_t x;
124 long t;
125 u32 rnd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126
127 if (sigma == 0)
128 return mu;
129
130 rnd = get_crandom(state);
131
132 /* default uniform distribution */
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900133 if (dist == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 return (rnd % (2*sigma)) - sigma + mu;
135
136 t = dist->table[rnd % dist->size];
137 x = (sigma % NETEM_DIST_SCALE) * t;
138 if (x >= 0)
139 x += NETEM_DIST_SCALE/2;
140 else
141 x -= NETEM_DIST_SCALE/2;
142
143 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
144}
145
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700146/*
147 * Insert one skb into qdisc.
148 * Note: parent depends on return value to account for queue length.
149 * NET_XMIT_DROP: queue length didn't change.
150 * NET_XMIT_SUCCESS: one skb was queued.
151 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
153{
154 struct netem_sched_data *q = qdisc_priv(sch);
Guillaume Chazarain89e1df72006-07-21 14:45:25 -0700155 /* We don't fill cb now as skb_unshare() may invalidate it */
156 struct netem_skb_cb *cb;
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700157 struct sk_buff *skb2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 int ret;
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700159 int count = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160
Stephen Hemminger771018e2005-05-03 16:24:32 -0700161 pr_debug("netem_enqueue skb=%p\n", skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700163 /* Random duplication */
164 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
165 ++count;
166
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 /* Random packet drop 0 => none, ~0 => all */
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700168 if (q->loss && q->loss >= get_crandom(&q->loss_cor))
169 --count;
170
171 if (count == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172 sch->qstats.drops++;
173 kfree_skb(skb);
Stephen Hemminger89bbb0a2006-04-28 12:11:36 -0700174 return NET_XMIT_BYPASS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 }
176
David S. Miller4e8a5202006-10-22 21:00:33 -0700177 skb_orphan(skb);
178
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700179 /*
180 * If we need to duplicate packet, then re-insert at top of the
181 * qdisc tree, since parent queuer expects that only one
182 * skb will be queued.
183 */
184 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
185 struct Qdisc *rootq = sch->dev->qdisc;
186 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
187 q->duplicate = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188
Stephen Hemminger0afb51e2005-05-26 12:53:49 -0700189 rootq->enqueue(skb2, rootq);
190 q->duplicate = dupsave;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 }
192
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800193 /*
194 * Randomized packet corruption.
195 * Make copy if needed since we are modifying
196 * If packet is going to be hardware checksummed, then
197 * do it now in software before we mangle it.
198 */
199 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
200 if (!(skb = skb_unshare(skb, GFP_ATOMIC))
Patrick McHardy84fa7932006-08-29 16:44:56 -0700201 || (skb->ip_summed == CHECKSUM_PARTIAL
202 && skb_checksum_help(skb))) {
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800203 sch->qstats.drops++;
204 return NET_XMIT_DROP;
205 }
206
207 skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
208 }
209
Guillaume Chazarain89e1df72006-07-21 14:45:25 -0700210 cb = (struct netem_skb_cb *)skb->cb;
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700211 if (q->gap == 0 /* not doing reordering */
212 || q->counter < q->gap /* inside last reordering gap */
213 || q->reorder < get_crandom(&q->reorder_cor)) {
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700214 psched_time_t now;
Stephen Hemminger07aaa112005-11-03 13:43:07 -0800215 psched_tdiff_t delay;
216
217 delay = tabledist(q->latency, q->jitter,
218 &q->delay_cor, q->delay_dist);
219
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700220 PSCHED_GET_TIME(now);
Stephen Hemminger07aaa112005-11-03 13:43:07 -0800221 PSCHED_TADD2(now, delay, cb->time_to_send);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222 ++q->counter;
223 ret = q->qdisc->enqueue(skb, q->qdisc);
224 } else {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900225 /*
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700226 * Do re-ordering by putting one out of N packets at the front
227 * of the queue.
228 */
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700229 PSCHED_GET_TIME(cb->time_to_send);
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700230 q->counter = 0;
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700231 ret = q->qdisc->ops->requeue(skb, q->qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232 }
233
234 if (likely(ret == NET_XMIT_SUCCESS)) {
235 sch->q.qlen++;
236 sch->bstats.bytes += skb->len;
237 sch->bstats.packets++;
238 } else
239 sch->qstats.drops++;
240
Stephen Hemmingerd5d75cd2005-05-03 16:24:57 -0700241 pr_debug("netem: enqueue ret %d\n", ret);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 return ret;
243}
244
245/* Requeue packets but don't change time stamp */
246static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
247{
248 struct netem_sched_data *q = qdisc_priv(sch);
249 int ret;
250
251 if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
252 sch->q.qlen++;
253 sch->qstats.requeues++;
254 }
255
256 return ret;
257}
258
259static unsigned int netem_drop(struct Qdisc* sch)
260{
261 struct netem_sched_data *q = qdisc_priv(sch);
Patrick McHardy6d037a22006-03-20 19:00:49 -0800262 unsigned int len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263
Patrick McHardy6d037a22006-03-20 19:00:49 -0800264 if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 sch->q.qlen--;
266 sch->qstats.drops++;
267 }
268 return len;
269}
270
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271static struct sk_buff *netem_dequeue(struct Qdisc *sch)
272{
273 struct netem_sched_data *q = qdisc_priv(sch);
274 struct sk_buff *skb;
275
276 skb = q->qdisc->dequeue(q->qdisc);
Stephen Hemminger771018e2005-05-03 16:24:32 -0700277 if (skb) {
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700278 const struct netem_skb_cb *cb
279 = (const struct netem_skb_cb *)skb->cb;
280 psched_time_t now;
Stephen Hemminger771018e2005-05-03 16:24:32 -0700281
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700282 /* if more time remaining? */
283 PSCHED_GET_TIME(now);
Stephen Hemminger07aaa112005-11-03 13:43:07 -0800284
285 if (PSCHED_TLESS(cb->time_to_send, now)) {
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700286 pr_debug("netem_dequeue: return skb=%p\n", skb);
287 sch->q.qlen--;
288 sch->flags &= ~TCQ_F_THROTTLED;
289 return skb;
Stephen Hemminger07aaa112005-11-03 13:43:07 -0800290 } else {
Patrick McHardy59cb5c62007-03-16 01:20:31 -0700291 qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
Stephen Hemminger07aaa112005-11-03 13:43:07 -0800292
293 if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
Patrick McHardye488eaf2006-11-29 17:37:42 -0800294 qdisc_tree_decrease_qlen(q->qdisc, 1);
Stephen Hemminger07aaa112005-11-03 13:43:07 -0800295 sch->qstats.drops++;
Stephen Hemminger07aaa112005-11-03 13:43:07 -0800296 printk(KERN_ERR "netem: queue discpline %s could not requeue\n",
297 q->qdisc->ops->id);
Stephen Hemminger07aaa112005-11-03 13:43:07 -0800298 }
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700299 }
Stephen Hemminger0f9f32a2005-05-26 12:55:01 -0700300 }
301
302 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303}
304
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305static void netem_reset(struct Qdisc *sch)
306{
307 struct netem_sched_data *q = qdisc_priv(sch);
308
309 qdisc_reset(q->qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310 sch->q.qlen = 0;
Patrick McHardy59cb5c62007-03-16 01:20:31 -0700311 qdisc_watchdog_cancel(&q->watchdog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312}
313
Stephen Hemminger300ce172005-10-30 13:47:34 -0800314/* Pass size change message down to embedded FIFO */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315static int set_fifo_limit(struct Qdisc *q, int limit)
316{
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900317 struct rtattr *rta;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318 int ret = -ENOMEM;
319
Stephen Hemminger300ce172005-10-30 13:47:34 -0800320 /* Hack to avoid sending change message to non-FIFO */
321 if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
322 return 0;
323
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
325 if (rta) {
326 rta->rta_type = RTM_NEWQDISC;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900327 rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328 ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900329
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330 ret = q->ops->change(q, rta);
331 kfree(rta);
332 }
333 return ret;
334}
335
336/*
337 * Distribution data is a variable size payload containing
338 * signed 16 bit values.
339 */
340static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr)
341{
342 struct netem_sched_data *q = qdisc_priv(sch);
343 unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16);
344 const __s16 *data = RTA_DATA(attr);
345 struct disttable *d;
346 int i;
347
348 if (n > 65536)
349 return -EINVAL;
350
351 d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
352 if (!d)
353 return -ENOMEM;
354
355 d->size = n;
356 for (i = 0; i < n; i++)
357 d->table[i] = data[i];
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900358
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 spin_lock_bh(&sch->dev->queue_lock);
360 d = xchg(&q->delay_dist, d);
361 spin_unlock_bh(&sch->dev->queue_lock);
362
363 kfree(d);
364 return 0;
365}
366
367static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
368{
369 struct netem_sched_data *q = qdisc_priv(sch);
370 const struct tc_netem_corr *c = RTA_DATA(attr);
371
372 if (RTA_PAYLOAD(attr) != sizeof(*c))
373 return -EINVAL;
374
375 init_crandom(&q->delay_cor, c->delay_corr);
376 init_crandom(&q->loss_cor, c->loss_corr);
377 init_crandom(&q->dup_cor, c->dup_corr);
378 return 0;
379}
380
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700381static int get_reorder(struct Qdisc *sch, const struct rtattr *attr)
382{
383 struct netem_sched_data *q = qdisc_priv(sch);
384 const struct tc_netem_reorder *r = RTA_DATA(attr);
385
386 if (RTA_PAYLOAD(attr) != sizeof(*r))
387 return -EINVAL;
388
389 q->reorder = r->probability;
390 init_crandom(&q->reorder_cor, r->correlation);
391 return 0;
392}
393
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800394static int get_corrupt(struct Qdisc *sch, const struct rtattr *attr)
395{
396 struct netem_sched_data *q = qdisc_priv(sch);
397 const struct tc_netem_corrupt *r = RTA_DATA(attr);
398
399 if (RTA_PAYLOAD(attr) != sizeof(*r))
400 return -EINVAL;
401
402 q->corrupt = r->probability;
403 init_crandom(&q->corrupt_cor, r->correlation);
404 return 0;
405}
406
407/* Parse netlink message to set options */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408static int netem_change(struct Qdisc *sch, struct rtattr *opt)
409{
410 struct netem_sched_data *q = qdisc_priv(sch);
411 struct tc_netem_qopt *qopt;
412 int ret;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900413
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
415 return -EINVAL;
416
417 qopt = RTA_DATA(opt);
418 ret = set_fifo_limit(q->qdisc, qopt->limit);
419 if (ret) {
420 pr_debug("netem: can't set fifo limit\n");
421 return ret;
422 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900423
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424 q->latency = qopt->latency;
425 q->jitter = qopt->jitter;
426 q->limit = qopt->limit;
427 q->gap = qopt->gap;
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700428 q->counter = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 q->loss = qopt->loss;
430 q->duplicate = qopt->duplicate;
431
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700432 /* for compatiablity with earlier versions.
433 * if gap is set, need to assume 100% probablity
434 */
Stephen Hemmingera362e0a2007-03-22 12:15:45 -0700435 if (q->gap)
436 q->reorder = ~0;
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700437
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 /* Handle nested options after initial queue options.
439 * Should have put all options in nested format but too late now.
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900440 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 if (RTA_PAYLOAD(opt) > sizeof(*qopt)) {
442 struct rtattr *tb[TCA_NETEM_MAX];
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900443 if (rtattr_parse(tb, TCA_NETEM_MAX,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444 RTA_DATA(opt) + sizeof(*qopt),
445 RTA_PAYLOAD(opt) - sizeof(*qopt)))
446 return -EINVAL;
447
448 if (tb[TCA_NETEM_CORR-1]) {
449 ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]);
450 if (ret)
451 return ret;
452 }
453
454 if (tb[TCA_NETEM_DELAY_DIST-1]) {
455 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]);
456 if (ret)
457 return ret;
458 }
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800459
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700460 if (tb[TCA_NETEM_REORDER-1]) {
461 ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]);
462 if (ret)
463 return ret;
464 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800466 if (tb[TCA_NETEM_CORRUPT-1]) {
467 ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT-1]);
468 if (ret)
469 return ret;
470 }
471 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472
473 return 0;
474}
475
Stephen Hemminger300ce172005-10-30 13:47:34 -0800476/*
477 * Special case version of FIFO queue for use by netem.
478 * It queues in order based on timestamps in skb's
479 */
480struct fifo_sched_data {
481 u32 limit;
Stephen Hemminger075aa572007-03-22 12:17:05 -0700482 psched_time_t oldest;
Stephen Hemminger300ce172005-10-30 13:47:34 -0800483};
484
485static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
486{
487 struct fifo_sched_data *q = qdisc_priv(sch);
488 struct sk_buff_head *list = &sch->q;
Stephen Hemminger075aa572007-03-22 12:17:05 -0700489 psched_time_t tnext = ((struct netem_skb_cb *)nskb->cb)->time_to_send;
Stephen Hemminger300ce172005-10-30 13:47:34 -0800490 struct sk_buff *skb;
491
492 if (likely(skb_queue_len(list) < q->limit)) {
Stephen Hemminger075aa572007-03-22 12:17:05 -0700493 /* Optimize for add at tail */
494 if (likely(skb_queue_empty(list) || !PSCHED_TLESS(tnext, q->oldest))) {
495 q->oldest = tnext;
496 return qdisc_enqueue_tail(nskb, sch);
497 }
498
Stephen Hemminger300ce172005-10-30 13:47:34 -0800499 skb_queue_reverse_walk(list, skb) {
500 const struct netem_skb_cb *cb
501 = (const struct netem_skb_cb *)skb->cb;
502
Stephen Hemminger075aa572007-03-22 12:17:05 -0700503 if (!PSCHED_TLESS(tnext, cb->time_to_send))
Stephen Hemminger300ce172005-10-30 13:47:34 -0800504 break;
505 }
506
507 __skb_queue_after(list, skb, nskb);
508
509 sch->qstats.backlog += nskb->len;
510 sch->bstats.bytes += nskb->len;
511 sch->bstats.packets++;
512
513 return NET_XMIT_SUCCESS;
514 }
515
Stephen Hemminger075aa572007-03-22 12:17:05 -0700516 return qdisc_reshape_fail(nskb, sch);
Stephen Hemminger300ce172005-10-30 13:47:34 -0800517}
518
519static int tfifo_init(struct Qdisc *sch, struct rtattr *opt)
520{
521 struct fifo_sched_data *q = qdisc_priv(sch);
522
523 if (opt) {
524 struct tc_fifo_qopt *ctl = RTA_DATA(opt);
525 if (RTA_PAYLOAD(opt) < sizeof(*ctl))
526 return -EINVAL;
527
528 q->limit = ctl->limit;
529 } else
530 q->limit = max_t(u32, sch->dev->tx_queue_len, 1);
531
Stephen Hemminger075aa572007-03-22 12:17:05 -0700532 PSCHED_SET_PASTPERFECT(q->oldest);
Stephen Hemminger300ce172005-10-30 13:47:34 -0800533 return 0;
534}
535
536static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
537{
538 struct fifo_sched_data *q = qdisc_priv(sch);
539 struct tc_fifo_qopt opt = { .limit = q->limit };
540
541 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
542 return skb->len;
543
544rtattr_failure:
545 return -1;
546}
547
548static struct Qdisc_ops tfifo_qdisc_ops = {
549 .id = "tfifo",
550 .priv_size = sizeof(struct fifo_sched_data),
551 .enqueue = tfifo_enqueue,
552 .dequeue = qdisc_dequeue_head,
553 .requeue = qdisc_requeue,
554 .drop = qdisc_queue_drop,
555 .init = tfifo_init,
556 .reset = qdisc_reset_queue,
557 .change = tfifo_init,
558 .dump = tfifo_dump,
559};
560
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561static int netem_init(struct Qdisc *sch, struct rtattr *opt)
562{
563 struct netem_sched_data *q = qdisc_priv(sch);
564 int ret;
565
566 if (!opt)
567 return -EINVAL;
568
Patrick McHardy59cb5c62007-03-16 01:20:31 -0700569 qdisc_watchdog_init(&q->watchdog, sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570
Patrick McHardy9f9afec2006-11-29 17:35:18 -0800571 q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops,
572 TC_H_MAKE(sch->handle, 1));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573 if (!q->qdisc) {
574 pr_debug("netem: qdisc create failed\n");
575 return -ENOMEM;
576 }
577
578 ret = netem_change(sch, opt);
579 if (ret) {
580 pr_debug("netem: change failed\n");
581 qdisc_destroy(q->qdisc);
582 }
583 return ret;
584}
585
586static void netem_destroy(struct Qdisc *sch)
587{
588 struct netem_sched_data *q = qdisc_priv(sch);
589
Patrick McHardy59cb5c62007-03-16 01:20:31 -0700590 qdisc_watchdog_cancel(&q->watchdog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 qdisc_destroy(q->qdisc);
592 kfree(q->delay_dist);
593}
594
595static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
596{
597 const struct netem_sched_data *q = qdisc_priv(sch);
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700598 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 struct rtattr *rta = (struct rtattr *) b;
600 struct tc_netem_qopt qopt;
601 struct tc_netem_corr cor;
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700602 struct tc_netem_reorder reorder;
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800603 struct tc_netem_corrupt corrupt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604
605 qopt.latency = q->latency;
606 qopt.jitter = q->jitter;
607 qopt.limit = q->limit;
608 qopt.loss = q->loss;
609 qopt.gap = q->gap;
610 qopt.duplicate = q->duplicate;
611 RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
612
613 cor.delay_corr = q->delay_cor.rho;
614 cor.loss_corr = q->loss_cor.rho;
615 cor.dup_corr = q->dup_cor.rho;
616 RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
Stephen Hemminger0dca51d2005-05-26 12:55:48 -0700617
618 reorder.probability = q->reorder;
619 reorder.correlation = q->reorder_cor.rho;
620 RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
621
Stephen Hemmingerc865e5d2005-12-21 19:03:44 -0800622 corrupt.probability = q->corrupt;
623 corrupt.correlation = q->corrupt_cor.rho;
624 RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
625
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700626 rta->rta_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627
628 return skb->len;
629
630rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -0700631 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 return -1;
633}
634
635static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
636 struct sk_buff *skb, struct tcmsg *tcm)
637{
638 struct netem_sched_data *q = qdisc_priv(sch);
639
640 if (cl != 1) /* only one class */
641 return -ENOENT;
642
643 tcm->tcm_handle |= TC_H_MIN(1);
644 tcm->tcm_info = q->qdisc->handle;
645
646 return 0;
647}
648
649static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
650 struct Qdisc **old)
651{
652 struct netem_sched_data *q = qdisc_priv(sch);
653
654 if (new == NULL)
655 new = &noop_qdisc;
656
657 sch_tree_lock(sch);
658 *old = xchg(&q->qdisc, new);
Patrick McHardy5e50da02006-11-29 17:36:20 -0800659 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 qdisc_reset(*old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661 sch_tree_unlock(sch);
662
663 return 0;
664}
665
666static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
667{
668 struct netem_sched_data *q = qdisc_priv(sch);
669 return q->qdisc;
670}
671
672static unsigned long netem_get(struct Qdisc *sch, u32 classid)
673{
674 return 1;
675}
676
677static void netem_put(struct Qdisc *sch, unsigned long arg)
678{
679}
680
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900681static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682 struct rtattr **tca, unsigned long *arg)
683{
684 return -ENOSYS;
685}
686
687static int netem_delete(struct Qdisc *sch, unsigned long arg)
688{
689 return -ENOSYS;
690}
691
692static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
693{
694 if (!walker->stop) {
695 if (walker->count >= walker->skip)
696 if (walker->fn(sch, 1, walker) < 0) {
697 walker->stop = 1;
698 return;
699 }
700 walker->count++;
701 }
702}
703
704static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
705{
706 return NULL;
707}
708
709static struct Qdisc_class_ops netem_class_ops = {
710 .graft = netem_graft,
711 .leaf = netem_leaf,
712 .get = netem_get,
713 .put = netem_put,
714 .change = netem_change_class,
715 .delete = netem_delete,
716 .walk = netem_walk,
717 .tcf_chain = netem_find_tcf,
718 .dump = netem_dump_class,
719};
720
721static struct Qdisc_ops netem_qdisc_ops = {
722 .id = "netem",
723 .cl_ops = &netem_class_ops,
724 .priv_size = sizeof(struct netem_sched_data),
725 .enqueue = netem_enqueue,
726 .dequeue = netem_dequeue,
727 .requeue = netem_requeue,
728 .drop = netem_drop,
729 .init = netem_init,
730 .reset = netem_reset,
731 .destroy = netem_destroy,
732 .change = netem_change,
733 .dump = netem_dump,
734 .owner = THIS_MODULE,
735};
736
737
738static int __init netem_module_init(void)
739{
Stephen Hemmingereb229c42005-11-03 13:49:01 -0800740 pr_info("netem: version " VERSION "\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700741 return register_qdisc(&netem_qdisc_ops);
742}
743static void __exit netem_module_exit(void)
744{
745 unregister_qdisc(&netem_qdisc_ops);
746}
747module_init(netem_module_init)
748module_exit(netem_module_exit)
749MODULE_LICENSE("GPL");