| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * net/sched/sch_netem.c	Network emulator | 
 | 3 |  * | 
 | 4 |  * 		This program is free software; you can redistribute it and/or | 
 | 5 |  * 		modify it under the terms of the GNU General Public License | 
 | 6 |  * 		as published by the Free Software Foundation; either version | 
 | 7 |  * 		2 of the License, or (at your option) any later version. | 
 | 8 |  * | 
 | 9 |  *  		Many of the algorithms and ideas for this came from | 
 | 10 |  *		NIST Net which is not copyrighted.  | 
 | 11 |  * | 
 | 12 |  * Authors:	Stephen Hemminger <shemminger@osdl.org> | 
 | 13 |  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro> | 
 | 14 |  */ | 
 | 15 |  | 
 | 16 | #include <linux/config.h> | 
 | 17 | #include <linux/module.h> | 
 | 18 | #include <linux/bitops.h> | 
 | 19 | #include <linux/types.h> | 
 | 20 | #include <linux/kernel.h> | 
 | 21 | #include <linux/errno.h> | 
 | 22 | #include <linux/netdevice.h> | 
 | 23 | #include <linux/skbuff.h> | 
 | 24 | #include <linux/rtnetlink.h> | 
 | 25 |  | 
 | 26 | #include <net/pkt_sched.h> | 
 | 27 |  | 
| Stephen Hemminger | c865e5d | 2005-12-21 19:03:44 -0800 | [diff] [blame] | 28 | #define VERSION "1.2" | 
| Stephen Hemminger | eb229c4 | 2005-11-03 13:49:01 -0800 | [diff] [blame] | 29 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 30 | /*	Network Emulation Queuing algorithm. | 
 | 31 | 	==================================== | 
 | 32 |  | 
 | 33 | 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based | 
 | 34 | 		 Network Emulation Tool | 
 | 35 | 		 [2] Luigi Rizzo, DummyNet for FreeBSD | 
 | 36 |  | 
 | 37 | 	 ---------------------------------------------------------------- | 
 | 38 |  | 
 | 39 | 	 This started out as a simple way to delay outgoing packets to | 
 | 40 | 	 test TCP but has grown to include most of the functionality | 
 | 41 | 	 of a full blown network emulator like NISTnet. It can delay | 
 | 42 | 	 packets and add random jitter (and correlation). The random | 
 | 43 | 	 distribution can be loaded from a table as well to provide | 
 | 44 | 	 normal, Pareto, or experimental curves. Packet loss, | 
 | 45 | 	 duplication, and reordering can also be emulated. | 
 | 46 |  | 
 | 47 | 	 This qdisc does not do classification that can be handled in | 
 | 48 | 	 layering other disciplines.  It does not need to do bandwidth | 
 | 49 | 	 control either since that can be handled by using token | 
 | 50 | 	 bucket or other rate control. | 
 | 51 |  | 
 | 52 | 	 The simulator is limited by the Linux timer resolution | 
 | 53 | 	 and will create packet bursts on the HZ boundary (1ms). | 
 | 54 | */ | 
 | 55 |  | 
 | 56 | struct netem_sched_data { | 
 | 57 | 	struct Qdisc	*qdisc; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 58 | 	struct timer_list timer; | 
 | 59 |  | 
 | 60 | 	u32 latency; | 
 | 61 | 	u32 loss; | 
 | 62 | 	u32 limit; | 
 | 63 | 	u32 counter; | 
 | 64 | 	u32 gap; | 
 | 65 | 	u32 jitter; | 
 | 66 | 	u32 duplicate; | 
| Stephen Hemminger | 0dca51d | 2005-05-26 12:55:48 -0700 | [diff] [blame] | 67 | 	u32 reorder; | 
| Stephen Hemminger | c865e5d | 2005-12-21 19:03:44 -0800 | [diff] [blame] | 68 | 	u32 corrupt; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 69 |  | 
 | 70 | 	struct crndstate { | 
 | 71 | 		unsigned long last; | 
 | 72 | 		unsigned long rho; | 
| Stephen Hemminger | c865e5d | 2005-12-21 19:03:44 -0800 | [diff] [blame] | 73 | 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 74 |  | 
 | 75 | 	struct disttable { | 
 | 76 | 		u32  size; | 
 | 77 | 		s16 table[0]; | 
 | 78 | 	} *delay_dist; | 
 | 79 | }; | 
 | 80 |  | 
 | 81 | /* Time stamp put into socket buffer control block */ | 
 | 82 | struct netem_skb_cb { | 
 | 83 | 	psched_time_t	time_to_send; | 
 | 84 | }; | 
 | 85 |  | 
 | 86 | /* init_crandom - initialize correlated random number generator | 
 | 87 |  * Use entropy source for initial seed. | 
 | 88 |  */ | 
 | 89 | static void init_crandom(struct crndstate *state, unsigned long rho) | 
 | 90 | { | 
 | 91 | 	state->rho = rho; | 
 | 92 | 	state->last = net_random(); | 
 | 93 | } | 
 | 94 |  | 
 | 95 | /* get_crandom - correlated random number generator | 
 | 96 |  * Next number depends on last value. | 
 | 97 |  * rho is scaled to avoid floating point. | 
 | 98 |  */ | 
 | 99 | static unsigned long get_crandom(struct crndstate *state) | 
 | 100 | { | 
 | 101 | 	u64 value, rho; | 
 | 102 | 	unsigned long answer; | 
 | 103 |  | 
 | 104 | 	if (state->rho == 0)	/* no correllation */ | 
 | 105 | 		return net_random(); | 
 | 106 |  | 
 | 107 | 	value = net_random(); | 
 | 108 | 	rho = (u64)state->rho + 1; | 
 | 109 | 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32; | 
 | 110 | 	state->last = answer; | 
 | 111 | 	return answer; | 
 | 112 | } | 
 | 113 |  | 
 | 114 | /* tabledist - return a pseudo-randomly distributed value with mean mu and | 
 | 115 |  * std deviation sigma.  Uses table lookup to approximate the desired | 
 | 116 |  * distribution, and a uniformly-distributed pseudo-random source. | 
 | 117 |  */ | 
 | 118 | static long tabledist(unsigned long mu, long sigma,  | 
 | 119 | 		      struct crndstate *state, const struct disttable *dist) | 
 | 120 | { | 
 | 121 | 	long t, x; | 
 | 122 | 	unsigned long rnd; | 
 | 123 |  | 
 | 124 | 	if (sigma == 0) | 
 | 125 | 		return mu; | 
 | 126 |  | 
 | 127 | 	rnd = get_crandom(state); | 
 | 128 |  | 
 | 129 | 	/* default uniform distribution */ | 
 | 130 | 	if (dist == NULL)  | 
 | 131 | 		return (rnd % (2*sigma)) - sigma + mu; | 
 | 132 |  | 
 | 133 | 	t = dist->table[rnd % dist->size]; | 
 | 134 | 	x = (sigma % NETEM_DIST_SCALE) * t; | 
 | 135 | 	if (x >= 0) | 
 | 136 | 		x += NETEM_DIST_SCALE/2; | 
 | 137 | 	else | 
 | 138 | 		x -= NETEM_DIST_SCALE/2; | 
 | 139 |  | 
 | 140 | 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; | 
 | 141 | } | 
 | 142 |  | 
| Stephen Hemminger | 0afb51e | 2005-05-26 12:53:49 -0700 | [diff] [blame] | 143 | /* | 
 | 144 |  * Insert one skb into qdisc. | 
 | 145 |  * Note: parent depends on return value to account for queue length. | 
 | 146 |  * 	NET_XMIT_DROP: queue length didn't change. | 
 | 147 |  *      NET_XMIT_SUCCESS: one skb was queued. | 
 | 148 |  */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 149 | static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) | 
 | 150 | { | 
 | 151 | 	struct netem_sched_data *q = qdisc_priv(sch); | 
| Stephen Hemminger | 0f9f32a | 2005-05-26 12:55:01 -0700 | [diff] [blame] | 152 | 	struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb; | 
| Stephen Hemminger | 0afb51e | 2005-05-26 12:53:49 -0700 | [diff] [blame] | 153 | 	struct sk_buff *skb2; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 154 | 	int ret; | 
| Stephen Hemminger | 0afb51e | 2005-05-26 12:53:49 -0700 | [diff] [blame] | 155 | 	int count = 1; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 156 |  | 
| Stephen Hemminger | 771018e | 2005-05-03 16:24:32 -0700 | [diff] [blame] | 157 | 	pr_debug("netem_enqueue skb=%p\n", skb); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 158 |  | 
| Stephen Hemminger | 0afb51e | 2005-05-26 12:53:49 -0700 | [diff] [blame] | 159 | 	/* Random duplication */ | 
 | 160 | 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) | 
 | 161 | 		++count; | 
 | 162 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 163 | 	/* Random packet drop 0 => none, ~0 => all */ | 
| Stephen Hemminger | 0afb51e | 2005-05-26 12:53:49 -0700 | [diff] [blame] | 164 | 	if (q->loss && q->loss >= get_crandom(&q->loss_cor)) | 
 | 165 | 		--count; | 
 | 166 |  | 
 | 167 | 	if (count == 0) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 168 | 		sch->qstats.drops++; | 
 | 169 | 		kfree_skb(skb); | 
| Stephen Hemminger | 0afb51e | 2005-05-26 12:53:49 -0700 | [diff] [blame] | 170 | 		return NET_XMIT_DROP; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 171 | 	} | 
 | 172 |  | 
| Stephen Hemminger | 0afb51e | 2005-05-26 12:53:49 -0700 | [diff] [blame] | 173 | 	/* | 
 | 174 | 	 * If we need to duplicate packet, then re-insert at top of the | 
 | 175 | 	 * qdisc tree, since parent queuer expects that only one | 
 | 176 | 	 * skb will be queued. | 
 | 177 | 	 */ | 
 | 178 | 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { | 
 | 179 | 		struct Qdisc *rootq = sch->dev->qdisc; | 
 | 180 | 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ | 
 | 181 | 		q->duplicate = 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 182 |  | 
| Stephen Hemminger | 0afb51e | 2005-05-26 12:53:49 -0700 | [diff] [blame] | 183 | 		rootq->enqueue(skb2, rootq); | 
 | 184 | 		q->duplicate = dupsave; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 185 | 	} | 
 | 186 |  | 
| Stephen Hemminger | c865e5d | 2005-12-21 19:03:44 -0800 | [diff] [blame] | 187 | 	/* | 
 | 188 | 	 * Randomized packet corruption. | 
 | 189 | 	 * Make copy if needed since we are modifying | 
 | 190 | 	 * If packet is going to be hardware checksummed, then | 
 | 191 | 	 * do it now in software before we mangle it. | 
 | 192 | 	 */ | 
 | 193 | 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { | 
 | 194 | 		if (!(skb = skb_unshare(skb, GFP_ATOMIC)) | 
 | 195 | 		    || (skb->ip_summed == CHECKSUM_HW | 
 | 196 | 			&& skb_checksum_help(skb, 0))) { | 
 | 197 | 			sch->qstats.drops++; | 
 | 198 | 			return NET_XMIT_DROP; | 
 | 199 | 		} | 
 | 200 |  | 
 | 201 | 		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); | 
 | 202 | 	} | 
 | 203 |  | 
| Stephen Hemminger | 0dca51d | 2005-05-26 12:55:48 -0700 | [diff] [blame] | 204 | 	if (q->gap == 0 		/* not doing reordering */ | 
 | 205 | 	    || q->counter < q->gap 	/* inside last reordering gap */ | 
 | 206 | 	    || q->reorder < get_crandom(&q->reorder_cor)) { | 
| Stephen Hemminger | 0f9f32a | 2005-05-26 12:55:01 -0700 | [diff] [blame] | 207 | 		psched_time_t now; | 
| Stephen Hemminger | 07aaa11 | 2005-11-03 13:43:07 -0800 | [diff] [blame] | 208 | 		psched_tdiff_t delay; | 
 | 209 |  | 
 | 210 | 		delay = tabledist(q->latency, q->jitter, | 
 | 211 | 				  &q->delay_cor, q->delay_dist); | 
 | 212 |  | 
| Stephen Hemminger | 0f9f32a | 2005-05-26 12:55:01 -0700 | [diff] [blame] | 213 | 		PSCHED_GET_TIME(now); | 
| Stephen Hemminger | 07aaa11 | 2005-11-03 13:43:07 -0800 | [diff] [blame] | 214 | 		PSCHED_TADD2(now, delay, cb->time_to_send); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 215 | 		++q->counter; | 
 | 216 | 		ret = q->qdisc->enqueue(skb, q->qdisc); | 
 | 217 | 	} else { | 
| Stephen Hemminger | 0dca51d | 2005-05-26 12:55:48 -0700 | [diff] [blame] | 218 | 		/*  | 
 | 219 | 		 * Do re-ordering by putting one out of N packets at the front | 
 | 220 | 		 * of the queue. | 
 | 221 | 		 */ | 
| Stephen Hemminger | 0f9f32a | 2005-05-26 12:55:01 -0700 | [diff] [blame] | 222 | 		PSCHED_GET_TIME(cb->time_to_send); | 
| Stephen Hemminger | 0dca51d | 2005-05-26 12:55:48 -0700 | [diff] [blame] | 223 | 		q->counter = 0; | 
| Stephen Hemminger | 0f9f32a | 2005-05-26 12:55:01 -0700 | [diff] [blame] | 224 | 		ret = q->qdisc->ops->requeue(skb, q->qdisc); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 225 | 	} | 
 | 226 |  | 
 | 227 | 	if (likely(ret == NET_XMIT_SUCCESS)) { | 
 | 228 | 		sch->q.qlen++; | 
 | 229 | 		sch->bstats.bytes += skb->len; | 
 | 230 | 		sch->bstats.packets++; | 
 | 231 | 	} else | 
 | 232 | 		sch->qstats.drops++; | 
 | 233 |  | 
| Stephen Hemminger | d5d75cd | 2005-05-03 16:24:57 -0700 | [diff] [blame] | 234 | 	pr_debug("netem: enqueue ret %d\n", ret); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 235 | 	return ret; | 
 | 236 | } | 
 | 237 |  | 
 | 238 | /* Requeue packets but don't change time stamp */ | 
 | 239 | static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch) | 
 | 240 | { | 
 | 241 | 	struct netem_sched_data *q = qdisc_priv(sch); | 
 | 242 | 	int ret; | 
 | 243 |  | 
 | 244 | 	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) { | 
 | 245 | 		sch->q.qlen++; | 
 | 246 | 		sch->qstats.requeues++; | 
 | 247 | 	} | 
 | 248 |  | 
 | 249 | 	return ret; | 
 | 250 | } | 
 | 251 |  | 
 | 252 | static unsigned int netem_drop(struct Qdisc* sch) | 
 | 253 | { | 
 | 254 | 	struct netem_sched_data *q = qdisc_priv(sch); | 
 | 255 | 	unsigned int len; | 
 | 256 |  | 
 | 257 | 	if ((len = q->qdisc->ops->drop(q->qdisc)) != 0) { | 
 | 258 | 		sch->q.qlen--; | 
 | 259 | 		sch->qstats.drops++; | 
 | 260 | 	} | 
 | 261 | 	return len; | 
 | 262 | } | 
 | 263 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 264 | static struct sk_buff *netem_dequeue(struct Qdisc *sch) | 
 | 265 | { | 
 | 266 | 	struct netem_sched_data *q = qdisc_priv(sch); | 
 | 267 | 	struct sk_buff *skb; | 
 | 268 |  | 
 | 269 | 	skb = q->qdisc->dequeue(q->qdisc); | 
| Stephen Hemminger | 771018e | 2005-05-03 16:24:32 -0700 | [diff] [blame] | 270 | 	if (skb) { | 
| Stephen Hemminger | 0f9f32a | 2005-05-26 12:55:01 -0700 | [diff] [blame] | 271 | 		const struct netem_skb_cb *cb | 
 | 272 | 			= (const struct netem_skb_cb *)skb->cb; | 
 | 273 | 		psched_time_t now; | 
| Stephen Hemminger | 771018e | 2005-05-03 16:24:32 -0700 | [diff] [blame] | 274 |  | 
| Stephen Hemminger | 0f9f32a | 2005-05-26 12:55:01 -0700 | [diff] [blame] | 275 | 		/* if more time remaining? */ | 
 | 276 | 		PSCHED_GET_TIME(now); | 
| Stephen Hemminger | 07aaa11 | 2005-11-03 13:43:07 -0800 | [diff] [blame] | 277 |  | 
 | 278 | 		if (PSCHED_TLESS(cb->time_to_send, now)) { | 
| Stephen Hemminger | 0f9f32a | 2005-05-26 12:55:01 -0700 | [diff] [blame] | 279 | 			pr_debug("netem_dequeue: return skb=%p\n", skb); | 
 | 280 | 			sch->q.qlen--; | 
 | 281 | 			sch->flags &= ~TCQ_F_THROTTLED; | 
 | 282 | 			return skb; | 
| Stephen Hemminger | 07aaa11 | 2005-11-03 13:43:07 -0800 | [diff] [blame] | 283 | 		} else { | 
 | 284 | 			psched_tdiff_t delay = PSCHED_TDIFF(cb->time_to_send, now); | 
 | 285 |  | 
 | 286 | 			if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) { | 
 | 287 | 				sch->qstats.drops++; | 
 | 288 |  | 
 | 289 | 				/* After this qlen is confused */ | 
 | 290 | 				printk(KERN_ERR "netem: queue discpline %s could not requeue\n", | 
 | 291 | 				       q->qdisc->ops->id); | 
 | 292 |  | 
 | 293 | 				sch->q.qlen--; | 
 | 294 | 			} | 
 | 295 |  | 
 | 296 | 			mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay)); | 
 | 297 | 			sch->flags |= TCQ_F_THROTTLED; | 
| Stephen Hemminger | 0f9f32a | 2005-05-26 12:55:01 -0700 | [diff] [blame] | 298 | 		} | 
| Stephen Hemminger | 0f9f32a | 2005-05-26 12:55:01 -0700 | [diff] [blame] | 299 | 	} | 
 | 300 |  | 
 | 301 | 	return NULL; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 302 | } | 
 | 303 |  | 
 | 304 | static void netem_watchdog(unsigned long arg) | 
 | 305 | { | 
 | 306 | 	struct Qdisc *sch = (struct Qdisc *)arg; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 307 |  | 
| Stephen Hemminger | 771018e | 2005-05-03 16:24:32 -0700 | [diff] [blame] | 308 | 	pr_debug("netem_watchdog qlen=%d\n", sch->q.qlen); | 
 | 309 | 	sch->flags &= ~TCQ_F_THROTTLED; | 
 | 310 | 	netif_schedule(sch->dev); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 311 | } | 
 | 312 |  | 
 | 313 | static void netem_reset(struct Qdisc *sch) | 
 | 314 | { | 
 | 315 | 	struct netem_sched_data *q = qdisc_priv(sch); | 
 | 316 |  | 
 | 317 | 	qdisc_reset(q->qdisc); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 318 | 	sch->q.qlen = 0; | 
| Stephen Hemminger | 771018e | 2005-05-03 16:24:32 -0700 | [diff] [blame] | 319 | 	sch->flags &= ~TCQ_F_THROTTLED; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 320 | 	del_timer_sync(&q->timer); | 
 | 321 | } | 
 | 322 |  | 
| Stephen Hemminger | 300ce17 | 2005-10-30 13:47:34 -0800 | [diff] [blame] | 323 | /* Pass size change message down to embedded FIFO */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 324 | static int set_fifo_limit(struct Qdisc *q, int limit) | 
 | 325 | { | 
 | 326 |         struct rtattr *rta; | 
 | 327 | 	int ret = -ENOMEM; | 
 | 328 |  | 
| Stephen Hemminger | 300ce17 | 2005-10-30 13:47:34 -0800 | [diff] [blame] | 329 | 	/* Hack to avoid sending change message to non-FIFO */ | 
 | 330 | 	if (strncmp(q->ops->id + 1, "fifo", 4) != 0) | 
 | 331 | 		return 0; | 
 | 332 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 333 | 	rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); | 
 | 334 | 	if (rta) { | 
 | 335 | 		rta->rta_type = RTM_NEWQDISC; | 
 | 336 | 		rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));  | 
 | 337 | 		((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit; | 
 | 338 | 		 | 
 | 339 | 		ret = q->ops->change(q, rta); | 
 | 340 | 		kfree(rta); | 
 | 341 | 	} | 
 | 342 | 	return ret; | 
 | 343 | } | 
 | 344 |  | 
 | 345 | /* | 
 | 346 |  * Distribution data is a variable size payload containing | 
 | 347 |  * signed 16 bit values. | 
 | 348 |  */ | 
 | 349 | static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr) | 
 | 350 | { | 
 | 351 | 	struct netem_sched_data *q = qdisc_priv(sch); | 
 | 352 | 	unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16); | 
 | 353 | 	const __s16 *data = RTA_DATA(attr); | 
 | 354 | 	struct disttable *d; | 
 | 355 | 	int i; | 
 | 356 |  | 
 | 357 | 	if (n > 65536) | 
 | 358 | 		return -EINVAL; | 
 | 359 |  | 
 | 360 | 	d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL); | 
 | 361 | 	if (!d) | 
 | 362 | 		return -ENOMEM; | 
 | 363 |  | 
 | 364 | 	d->size = n; | 
 | 365 | 	for (i = 0; i < n; i++) | 
 | 366 | 		d->table[i] = data[i]; | 
 | 367 | 	 | 
 | 368 | 	spin_lock_bh(&sch->dev->queue_lock); | 
 | 369 | 	d = xchg(&q->delay_dist, d); | 
 | 370 | 	spin_unlock_bh(&sch->dev->queue_lock); | 
 | 371 |  | 
 | 372 | 	kfree(d); | 
 | 373 | 	return 0; | 
 | 374 | } | 
 | 375 |  | 
 | 376 | static int get_correlation(struct Qdisc *sch, const struct rtattr *attr) | 
 | 377 | { | 
 | 378 | 	struct netem_sched_data *q = qdisc_priv(sch); | 
 | 379 | 	const struct tc_netem_corr *c = RTA_DATA(attr); | 
 | 380 |  | 
 | 381 | 	if (RTA_PAYLOAD(attr) != sizeof(*c)) | 
 | 382 | 		return -EINVAL; | 
 | 383 |  | 
 | 384 | 	init_crandom(&q->delay_cor, c->delay_corr); | 
 | 385 | 	init_crandom(&q->loss_cor, c->loss_corr); | 
 | 386 | 	init_crandom(&q->dup_cor, c->dup_corr); | 
 | 387 | 	return 0; | 
 | 388 | } | 
 | 389 |  | 
| Stephen Hemminger | 0dca51d | 2005-05-26 12:55:48 -0700 | [diff] [blame] | 390 | static int get_reorder(struct Qdisc *sch, const struct rtattr *attr) | 
 | 391 | { | 
 | 392 | 	struct netem_sched_data *q = qdisc_priv(sch); | 
 | 393 | 	const struct tc_netem_reorder *r = RTA_DATA(attr); | 
 | 394 |  | 
 | 395 | 	if (RTA_PAYLOAD(attr) != sizeof(*r)) | 
 | 396 | 		return -EINVAL; | 
 | 397 |  | 
 | 398 | 	q->reorder = r->probability; | 
 | 399 | 	init_crandom(&q->reorder_cor, r->correlation); | 
 | 400 | 	return 0; | 
 | 401 | } | 
 | 402 |  | 
| Stephen Hemminger | c865e5d | 2005-12-21 19:03:44 -0800 | [diff] [blame] | 403 | static int get_corrupt(struct Qdisc *sch, const struct rtattr *attr) | 
 | 404 | { | 
 | 405 | 	struct netem_sched_data *q = qdisc_priv(sch); | 
 | 406 | 	const struct tc_netem_corrupt *r = RTA_DATA(attr); | 
 | 407 |  | 
 | 408 | 	if (RTA_PAYLOAD(attr) != sizeof(*r)) | 
 | 409 | 		return -EINVAL; | 
 | 410 |  | 
 | 411 | 	q->corrupt = r->probability; | 
 | 412 | 	init_crandom(&q->corrupt_cor, r->correlation); | 
 | 413 | 	return 0; | 
 | 414 | } | 
 | 415 |  | 
 | 416 | /* Parse netlink message to set options */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 417 | static int netem_change(struct Qdisc *sch, struct rtattr *opt) | 
 | 418 | { | 
 | 419 | 	struct netem_sched_data *q = qdisc_priv(sch); | 
 | 420 | 	struct tc_netem_qopt *qopt; | 
 | 421 | 	int ret; | 
 | 422 | 	 | 
 | 423 | 	if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt)) | 
 | 424 | 		return -EINVAL; | 
 | 425 |  | 
 | 426 | 	qopt = RTA_DATA(opt); | 
 | 427 | 	ret = set_fifo_limit(q->qdisc, qopt->limit); | 
 | 428 | 	if (ret) { | 
 | 429 | 		pr_debug("netem: can't set fifo limit\n"); | 
 | 430 | 		return ret; | 
 | 431 | 	} | 
 | 432 | 	 | 
 | 433 | 	q->latency = qopt->latency; | 
 | 434 | 	q->jitter = qopt->jitter; | 
 | 435 | 	q->limit = qopt->limit; | 
 | 436 | 	q->gap = qopt->gap; | 
| Stephen Hemminger | 0dca51d | 2005-05-26 12:55:48 -0700 | [diff] [blame] | 437 | 	q->counter = 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 438 | 	q->loss = qopt->loss; | 
 | 439 | 	q->duplicate = qopt->duplicate; | 
 | 440 |  | 
| Stephen Hemminger | 0dca51d | 2005-05-26 12:55:48 -0700 | [diff] [blame] | 441 | 	/* for compatiablity with earlier versions. | 
 | 442 | 	 * if gap is set, need to assume 100% probablity | 
 | 443 | 	 */ | 
 | 444 | 	q->reorder = ~0; | 
 | 445 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 446 | 	/* Handle nested options after initial queue options. | 
 | 447 | 	 * Should have put all options in nested format but too late now. | 
 | 448 | 	 */  | 
 | 449 | 	if (RTA_PAYLOAD(opt) > sizeof(*qopt)) { | 
 | 450 | 		struct rtattr *tb[TCA_NETEM_MAX]; | 
 | 451 | 		if (rtattr_parse(tb, TCA_NETEM_MAX,  | 
 | 452 | 				 RTA_DATA(opt) + sizeof(*qopt), | 
 | 453 | 				 RTA_PAYLOAD(opt) - sizeof(*qopt))) | 
 | 454 | 			return -EINVAL; | 
 | 455 |  | 
 | 456 | 		if (tb[TCA_NETEM_CORR-1]) { | 
 | 457 | 			ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]); | 
 | 458 | 			if (ret) | 
 | 459 | 				return ret; | 
 | 460 | 		} | 
 | 461 |  | 
 | 462 | 		if (tb[TCA_NETEM_DELAY_DIST-1]) { | 
 | 463 | 			ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]); | 
 | 464 | 			if (ret) | 
 | 465 | 				return ret; | 
 | 466 | 		} | 
| Stephen Hemminger | c865e5d | 2005-12-21 19:03:44 -0800 | [diff] [blame] | 467 |  | 
| Stephen Hemminger | 0dca51d | 2005-05-26 12:55:48 -0700 | [diff] [blame] | 468 | 		if (tb[TCA_NETEM_REORDER-1]) { | 
 | 469 | 			ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]); | 
 | 470 | 			if (ret) | 
 | 471 | 				return ret; | 
 | 472 | 		} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 473 |  | 
| Stephen Hemminger | c865e5d | 2005-12-21 19:03:44 -0800 | [diff] [blame] | 474 | 		if (tb[TCA_NETEM_CORRUPT-1]) { | 
 | 475 | 			ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT-1]); | 
 | 476 | 			if (ret) | 
 | 477 | 				return ret; | 
 | 478 | 		} | 
 | 479 | 	} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 480 |  | 
 | 481 | 	return 0; | 
 | 482 | } | 
 | 483 |  | 
| Stephen Hemminger | 300ce17 | 2005-10-30 13:47:34 -0800 | [diff] [blame] | 484 | /* | 
 | 485 |  * Special case version of FIFO queue for use by netem. | 
 | 486 |  * It queues in order based on timestamps in skb's | 
 | 487 |  */ | 
 | 488 | struct fifo_sched_data { | 
 | 489 | 	u32 limit; | 
 | 490 | }; | 
 | 491 |  | 
 | 492 | static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) | 
 | 493 | { | 
 | 494 | 	struct fifo_sched_data *q = qdisc_priv(sch); | 
 | 495 | 	struct sk_buff_head *list = &sch->q; | 
 | 496 | 	const struct netem_skb_cb *ncb | 
 | 497 | 		= (const struct netem_skb_cb *)nskb->cb; | 
 | 498 | 	struct sk_buff *skb; | 
 | 499 |  | 
 | 500 | 	if (likely(skb_queue_len(list) < q->limit)) { | 
 | 501 | 		skb_queue_reverse_walk(list, skb) { | 
 | 502 | 			const struct netem_skb_cb *cb | 
 | 503 | 				= (const struct netem_skb_cb *)skb->cb; | 
 | 504 |  | 
| Andrea Bittau | aa87516 | 2005-11-20 13:41:05 -0800 | [diff] [blame] | 505 | 			if (!PSCHED_TLESS(ncb->time_to_send, cb->time_to_send)) | 
| Stephen Hemminger | 300ce17 | 2005-10-30 13:47:34 -0800 | [diff] [blame] | 506 | 				break; | 
 | 507 | 		} | 
 | 508 |  | 
 | 509 | 		__skb_queue_after(list, skb, nskb); | 
 | 510 |  | 
 | 511 | 		sch->qstats.backlog += nskb->len; | 
 | 512 | 		sch->bstats.bytes += nskb->len; | 
 | 513 | 		sch->bstats.packets++; | 
 | 514 |  | 
 | 515 | 		return NET_XMIT_SUCCESS; | 
 | 516 | 	} | 
 | 517 |  | 
 | 518 | 	return qdisc_drop(nskb, sch); | 
 | 519 | } | 
 | 520 |  | 
 | 521 | static int tfifo_init(struct Qdisc *sch, struct rtattr *opt) | 
 | 522 | { | 
 | 523 | 	struct fifo_sched_data *q = qdisc_priv(sch); | 
 | 524 |  | 
 | 525 | 	if (opt) { | 
 | 526 | 		struct tc_fifo_qopt *ctl = RTA_DATA(opt); | 
 | 527 | 		if (RTA_PAYLOAD(opt) < sizeof(*ctl)) | 
 | 528 | 			return -EINVAL; | 
 | 529 |  | 
 | 530 | 		q->limit = ctl->limit; | 
 | 531 | 	} else | 
 | 532 | 		q->limit = max_t(u32, sch->dev->tx_queue_len, 1); | 
 | 533 |  | 
 | 534 | 	return 0; | 
 | 535 | } | 
 | 536 |  | 
 | 537 | static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb) | 
 | 538 | { | 
 | 539 | 	struct fifo_sched_data *q = qdisc_priv(sch); | 
 | 540 | 	struct tc_fifo_qopt opt = { .limit = q->limit }; | 
 | 541 |  | 
 | 542 | 	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | 
 | 543 | 	return skb->len; | 
 | 544 |  | 
 | 545 | rtattr_failure: | 
 | 546 | 	return -1; | 
 | 547 | } | 
 | 548 |  | 
 | 549 | static struct Qdisc_ops tfifo_qdisc_ops = { | 
 | 550 | 	.id		=	"tfifo", | 
 | 551 | 	.priv_size	=	sizeof(struct fifo_sched_data), | 
 | 552 | 	.enqueue	=	tfifo_enqueue, | 
 | 553 | 	.dequeue	=	qdisc_dequeue_head, | 
 | 554 | 	.requeue	=	qdisc_requeue, | 
 | 555 | 	.drop		=	qdisc_queue_drop, | 
 | 556 | 	.init		=	tfifo_init, | 
 | 557 | 	.reset		=	qdisc_reset_queue, | 
 | 558 | 	.change		=	tfifo_init, | 
 | 559 | 	.dump		=	tfifo_dump, | 
 | 560 | }; | 
 | 561 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 562 | static int netem_init(struct Qdisc *sch, struct rtattr *opt) | 
 | 563 | { | 
 | 564 | 	struct netem_sched_data *q = qdisc_priv(sch); | 
 | 565 | 	int ret; | 
 | 566 |  | 
 | 567 | 	if (!opt) | 
 | 568 | 		return -EINVAL; | 
 | 569 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 570 | 	init_timer(&q->timer); | 
 | 571 | 	q->timer.function = netem_watchdog; | 
 | 572 | 	q->timer.data = (unsigned long) sch; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 573 |  | 
| Stephen Hemminger | 300ce17 | 2005-10-30 13:47:34 -0800 | [diff] [blame] | 574 | 	q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 575 | 	if (!q->qdisc) { | 
 | 576 | 		pr_debug("netem: qdisc create failed\n"); | 
 | 577 | 		return -ENOMEM; | 
 | 578 | 	} | 
 | 579 |  | 
 | 580 | 	ret = netem_change(sch, opt); | 
 | 581 | 	if (ret) { | 
 | 582 | 		pr_debug("netem: change failed\n"); | 
 | 583 | 		qdisc_destroy(q->qdisc); | 
 | 584 | 	} | 
 | 585 | 	return ret; | 
 | 586 | } | 
 | 587 |  | 
 | 588 | static void netem_destroy(struct Qdisc *sch) | 
 | 589 | { | 
 | 590 | 	struct netem_sched_data *q = qdisc_priv(sch); | 
 | 591 |  | 
 | 592 | 	del_timer_sync(&q->timer); | 
 | 593 | 	qdisc_destroy(q->qdisc); | 
 | 594 | 	kfree(q->delay_dist); | 
 | 595 | } | 
 | 596 |  | 
 | 597 | static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) | 
 | 598 | { | 
 | 599 | 	const struct netem_sched_data *q = qdisc_priv(sch); | 
 | 600 | 	unsigned char	 *b = skb->tail; | 
 | 601 | 	struct rtattr *rta = (struct rtattr *) b; | 
 | 602 | 	struct tc_netem_qopt qopt; | 
 | 603 | 	struct tc_netem_corr cor; | 
| Stephen Hemminger | 0dca51d | 2005-05-26 12:55:48 -0700 | [diff] [blame] | 604 | 	struct tc_netem_reorder reorder; | 
| Stephen Hemminger | c865e5d | 2005-12-21 19:03:44 -0800 | [diff] [blame] | 605 | 	struct tc_netem_corrupt corrupt; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 606 |  | 
 | 607 | 	qopt.latency = q->latency; | 
 | 608 | 	qopt.jitter = q->jitter; | 
 | 609 | 	qopt.limit = q->limit; | 
 | 610 | 	qopt.loss = q->loss; | 
 | 611 | 	qopt.gap = q->gap; | 
 | 612 | 	qopt.duplicate = q->duplicate; | 
 | 613 | 	RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt); | 
 | 614 |  | 
 | 615 | 	cor.delay_corr = q->delay_cor.rho; | 
 | 616 | 	cor.loss_corr = q->loss_cor.rho; | 
 | 617 | 	cor.dup_corr = q->dup_cor.rho; | 
 | 618 | 	RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor); | 
| Stephen Hemminger | 0dca51d | 2005-05-26 12:55:48 -0700 | [diff] [blame] | 619 |  | 
 | 620 | 	reorder.probability = q->reorder; | 
 | 621 | 	reorder.correlation = q->reorder_cor.rho; | 
 | 622 | 	RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder); | 
 | 623 |  | 
| Stephen Hemminger | c865e5d | 2005-12-21 19:03:44 -0800 | [diff] [blame] | 624 | 	corrupt.probability = q->corrupt; | 
 | 625 | 	corrupt.correlation = q->corrupt_cor.rho; | 
 | 626 | 	RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); | 
 | 627 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 628 | 	rta->rta_len = skb->tail - b; | 
 | 629 |  | 
 | 630 | 	return skb->len; | 
 | 631 |  | 
 | 632 | rtattr_failure: | 
 | 633 | 	skb_trim(skb, b - skb->data); | 
 | 634 | 	return -1; | 
 | 635 | } | 
 | 636 |  | 
 | 637 | static int netem_dump_class(struct Qdisc *sch, unsigned long cl, | 
 | 638 | 			  struct sk_buff *skb, struct tcmsg *tcm) | 
 | 639 | { | 
 | 640 | 	struct netem_sched_data *q = qdisc_priv(sch); | 
 | 641 |  | 
 | 642 | 	if (cl != 1) 	/* only one class */ | 
 | 643 | 		return -ENOENT; | 
 | 644 |  | 
 | 645 | 	tcm->tcm_handle |= TC_H_MIN(1); | 
 | 646 | 	tcm->tcm_info = q->qdisc->handle; | 
 | 647 |  | 
 | 648 | 	return 0; | 
 | 649 | } | 
 | 650 |  | 
 | 651 | static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, | 
 | 652 | 		     struct Qdisc **old) | 
 | 653 | { | 
 | 654 | 	struct netem_sched_data *q = qdisc_priv(sch); | 
 | 655 |  | 
 | 656 | 	if (new == NULL) | 
 | 657 | 		new = &noop_qdisc; | 
 | 658 |  | 
 | 659 | 	sch_tree_lock(sch); | 
 | 660 | 	*old = xchg(&q->qdisc, new); | 
 | 661 | 	qdisc_reset(*old); | 
 | 662 | 	sch->q.qlen = 0; | 
 | 663 | 	sch_tree_unlock(sch); | 
 | 664 |  | 
 | 665 | 	return 0; | 
 | 666 | } | 
 | 667 |  | 
 | 668 | static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg) | 
 | 669 | { | 
 | 670 | 	struct netem_sched_data *q = qdisc_priv(sch); | 
 | 671 | 	return q->qdisc; | 
 | 672 | } | 
 | 673 |  | 
 | 674 | static unsigned long netem_get(struct Qdisc *sch, u32 classid) | 
 | 675 | { | 
 | 676 | 	return 1; | 
 | 677 | } | 
 | 678 |  | 
 | 679 | static void netem_put(struct Qdisc *sch, unsigned long arg) | 
 | 680 | { | 
 | 681 | } | 
 | 682 |  | 
 | 683 | static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,  | 
 | 684 | 			    struct rtattr **tca, unsigned long *arg) | 
 | 685 | { | 
 | 686 | 	return -ENOSYS; | 
 | 687 | } | 
 | 688 |  | 
 | 689 | static int netem_delete(struct Qdisc *sch, unsigned long arg) | 
 | 690 | { | 
 | 691 | 	return -ENOSYS; | 
 | 692 | } | 
 | 693 |  | 
 | 694 | static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) | 
 | 695 | { | 
 | 696 | 	if (!walker->stop) { | 
 | 697 | 		if (walker->count >= walker->skip) | 
 | 698 | 			if (walker->fn(sch, 1, walker) < 0) { | 
 | 699 | 				walker->stop = 1; | 
 | 700 | 				return; | 
 | 701 | 			} | 
 | 702 | 		walker->count++; | 
 | 703 | 	} | 
 | 704 | } | 
 | 705 |  | 
 | 706 | static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl) | 
 | 707 | { | 
 | 708 | 	return NULL; | 
 | 709 | } | 
 | 710 |  | 
 | 711 | static struct Qdisc_class_ops netem_class_ops = { | 
 | 712 | 	.graft		=	netem_graft, | 
 | 713 | 	.leaf		=	netem_leaf, | 
 | 714 | 	.get		=	netem_get, | 
 | 715 | 	.put		=	netem_put, | 
 | 716 | 	.change		=	netem_change_class, | 
 | 717 | 	.delete		=	netem_delete, | 
 | 718 | 	.walk		=	netem_walk, | 
 | 719 | 	.tcf_chain	=	netem_find_tcf, | 
 | 720 | 	.dump		=	netem_dump_class, | 
 | 721 | }; | 
 | 722 |  | 
 | 723 | static struct Qdisc_ops netem_qdisc_ops = { | 
 | 724 | 	.id		=	"netem", | 
 | 725 | 	.cl_ops		=	&netem_class_ops, | 
 | 726 | 	.priv_size	=	sizeof(struct netem_sched_data), | 
 | 727 | 	.enqueue	=	netem_enqueue, | 
 | 728 | 	.dequeue	=	netem_dequeue, | 
 | 729 | 	.requeue	=	netem_requeue, | 
 | 730 | 	.drop		=	netem_drop, | 
 | 731 | 	.init		=	netem_init, | 
 | 732 | 	.reset		=	netem_reset, | 
 | 733 | 	.destroy	=	netem_destroy, | 
 | 734 | 	.change		=	netem_change, | 
 | 735 | 	.dump		=	netem_dump, | 
 | 736 | 	.owner		=	THIS_MODULE, | 
 | 737 | }; | 
 | 738 |  | 
 | 739 |  | 
 | 740 | static int __init netem_module_init(void) | 
 | 741 | { | 
| Stephen Hemminger | eb229c4 | 2005-11-03 13:49:01 -0800 | [diff] [blame] | 742 | 	pr_info("netem: version " VERSION "\n"); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 743 | 	return register_qdisc(&netem_qdisc_ops); | 
 | 744 | } | 
 | 745 | static void __exit netem_module_exit(void) | 
 | 746 | { | 
 | 747 | 	unregister_qdisc(&netem_qdisc_ops); | 
 | 748 | } | 
 | 749 | module_init(netem_module_init) | 
 | 750 | module_exit(netem_module_exit) | 
 | 751 | MODULE_LICENSE("GPL"); |