| Eric Dumazet | e13e02a | 2011-02-23 10:56:17 +0000 | [diff] [blame] | 1 | /* | 
 | 2 |  * net/sched/sch_sfb.c	  Stochastic Fair Blue | 
 | 3 |  * | 
 | 4 |  * Copyright (c) 2008-2011 Juliusz Chroboczek <jch@pps.jussieu.fr> | 
 | 5 |  * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com> | 
 | 6 |  * | 
 | 7 |  * This program is free software; you can redistribute it and/or | 
 | 8 |  * modify it under the terms of the GNU General Public License | 
 | 9 |  * version 2 as published by the Free Software Foundation. | 
 | 10 |  * | 
 | 11 |  * W. Feng, D. Kandlur, D. Saha, K. Shin. Blue: | 
 | 12 |  * A New Class of Active Queue Management Algorithms. | 
 | 13 |  * U. Michigan CSE-TR-387-99, April 1999. | 
 | 14 |  * | 
 | 15 |  * http://www.thefengs.com/wuchang/blue/CSE-TR-387-99.pdf | 
 | 16 |  * | 
 | 17 |  */ | 
 | 18 |  | 
 | 19 | #include <linux/module.h> | 
 | 20 | #include <linux/types.h> | 
 | 21 | #include <linux/kernel.h> | 
 | 22 | #include <linux/errno.h> | 
 | 23 | #include <linux/skbuff.h> | 
 | 24 | #include <linux/random.h> | 
 | 25 | #include <linux/jhash.h> | 
 | 26 | #include <net/ip.h> | 
 | 27 | #include <net/pkt_sched.h> | 
 | 28 | #include <net/inet_ecn.h> | 
 | 29 |  | 
 | 30 | /* | 
 | 31 |  * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level) | 
 | 32 |  * This implementation uses L = 8 and N = 16 | 
 | 33 |  * This permits us to split one 32bit hash (provided per packet by rxhash or | 
 | 34 |  * external classifier) into 8 subhashes of 4 bits. | 
 | 35 |  */ | 
 | 36 | #define SFB_BUCKET_SHIFT 4 | 
 | 37 | #define SFB_NUMBUCKETS	(1 << SFB_BUCKET_SHIFT) /* N bins per Level */ | 
 | 38 | #define SFB_BUCKET_MASK (SFB_NUMBUCKETS - 1) | 
 | 39 | #define SFB_LEVELS	(32 / SFB_BUCKET_SHIFT) /* L */ | 
 | 40 |  | 
 | 41 | /* SFB algo uses a virtual queue, named "bin" */ | 
 | 42 | struct sfb_bucket { | 
 | 43 | 	u16		qlen; /* length of virtual queue */ | 
 | 44 | 	u16		p_mark; /* marking probability */ | 
 | 45 | }; | 
 | 46 |  | 
 | 47 | /* We use a double buffering right before hash change | 
 | 48 |  * (Section 4.4 of SFB reference : moving hash functions) | 
 | 49 |  */ | 
 | 50 | struct sfb_bins { | 
 | 51 | 	u32		  perturbation; /* jhash perturbation */ | 
 | 52 | 	struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS]; | 
 | 53 | }; | 
 | 54 |  | 
 | 55 | struct sfb_sched_data { | 
 | 56 | 	struct Qdisc	*qdisc; | 
 | 57 | 	struct tcf_proto *filter_list; | 
 | 58 | 	unsigned long	rehash_interval; | 
 | 59 | 	unsigned long	warmup_time;	/* double buffering warmup time in jiffies */ | 
 | 60 | 	u32		max; | 
 | 61 | 	u32		bin_size;	/* maximum queue length per bin */ | 
 | 62 | 	u32		increment;	/* d1 */ | 
 | 63 | 	u32		decrement;	/* d2 */ | 
 | 64 | 	u32		limit;		/* HARD maximal queue length */ | 
 | 65 | 	u32		penalty_rate; | 
 | 66 | 	u32		penalty_burst; | 
 | 67 | 	u32		tokens_avail; | 
 | 68 | 	unsigned long	rehash_time; | 
 | 69 | 	unsigned long	token_time; | 
 | 70 |  | 
 | 71 | 	u8		slot;		/* current active bins (0 or 1) */ | 
 | 72 | 	bool		double_buffering; | 
 | 73 | 	struct sfb_bins bins[2]; | 
 | 74 |  | 
 | 75 | 	struct { | 
 | 76 | 		u32	earlydrop; | 
 | 77 | 		u32	penaltydrop; | 
 | 78 | 		u32	bucketdrop; | 
 | 79 | 		u32	queuedrop; | 
 | 80 | 		u32	childdrop;	/* drops in child qdisc */ | 
 | 81 | 		u32	marked;		/* ECN mark */ | 
 | 82 | 	} stats; | 
 | 83 | }; | 
 | 84 |  | 
 | 85 | /* | 
 | 86 |  * Each queued skb might be hashed on one or two bins | 
 | 87 |  * We store in skb_cb the two hash values. | 
 | 88 |  * (A zero value means double buffering was not used) | 
 | 89 |  */ | 
 | 90 | struct sfb_skb_cb { | 
 | 91 | 	u32 hashes[2]; | 
 | 92 | }; | 
 | 93 |  | 
 | 94 | static inline struct sfb_skb_cb *sfb_skb_cb(const struct sk_buff *skb) | 
 | 95 | { | 
 | 96 | 	BUILD_BUG_ON(sizeof(skb->cb) < | 
 | 97 | 		sizeof(struct qdisc_skb_cb) + sizeof(struct sfb_skb_cb)); | 
 | 98 | 	return (struct sfb_skb_cb *)qdisc_skb_cb(skb)->data; | 
 | 99 | } | 
 | 100 |  | 
 | 101 | /* | 
 | 102 |  * If using 'internal' SFB flow classifier, hash comes from skb rxhash | 
 | 103 |  * If using external classifier, hash comes from the classid. | 
 | 104 |  */ | 
 | 105 | static u32 sfb_hash(const struct sk_buff *skb, u32 slot) | 
 | 106 | { | 
 | 107 | 	return sfb_skb_cb(skb)->hashes[slot]; | 
 | 108 | } | 
 | 109 |  | 
 | 110 | /* Probabilities are coded as Q0.16 fixed-point values, | 
 | 111 |  * with 0xFFFF representing 65535/65536 (almost 1.0) | 
 | 112 |  * Addition and subtraction are saturating in [0, 65535] | 
 | 113 |  */ | 
 | 114 | static u32 prob_plus(u32 p1, u32 p2) | 
 | 115 | { | 
 | 116 | 	u32 res = p1 + p2; | 
 | 117 |  | 
 | 118 | 	return min_t(u32, res, SFB_MAX_PROB); | 
 | 119 | } | 
 | 120 |  | 
 | 121 | static u32 prob_minus(u32 p1, u32 p2) | 
 | 122 | { | 
 | 123 | 	return p1 > p2 ? p1 - p2 : 0; | 
 | 124 | } | 
 | 125 |  | 
 | 126 | static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q) | 
 | 127 | { | 
 | 128 | 	int i; | 
 | 129 | 	struct sfb_bucket *b = &q->bins[slot].bins[0][0]; | 
 | 130 |  | 
 | 131 | 	for (i = 0; i < SFB_LEVELS; i++) { | 
 | 132 | 		u32 hash = sfbhash & SFB_BUCKET_MASK; | 
 | 133 |  | 
 | 134 | 		sfbhash >>= SFB_BUCKET_SHIFT; | 
 | 135 | 		if (b[hash].qlen < 0xFFFF) | 
 | 136 | 			b[hash].qlen++; | 
 | 137 | 		b += SFB_NUMBUCKETS; /* next level */ | 
 | 138 | 	} | 
 | 139 | } | 
 | 140 |  | 
 | 141 | static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q) | 
 | 142 | { | 
 | 143 | 	u32 sfbhash; | 
 | 144 |  | 
 | 145 | 	sfbhash = sfb_hash(skb, 0); | 
 | 146 | 	if (sfbhash) | 
 | 147 | 		increment_one_qlen(sfbhash, 0, q); | 
 | 148 |  | 
 | 149 | 	sfbhash = sfb_hash(skb, 1); | 
 | 150 | 	if (sfbhash) | 
 | 151 | 		increment_one_qlen(sfbhash, 1, q); | 
 | 152 | } | 
 | 153 |  | 
 | 154 | static void decrement_one_qlen(u32 sfbhash, u32 slot, | 
 | 155 | 			       struct sfb_sched_data *q) | 
 | 156 | { | 
 | 157 | 	int i; | 
 | 158 | 	struct sfb_bucket *b = &q->bins[slot].bins[0][0]; | 
 | 159 |  | 
 | 160 | 	for (i = 0; i < SFB_LEVELS; i++) { | 
 | 161 | 		u32 hash = sfbhash & SFB_BUCKET_MASK; | 
 | 162 |  | 
 | 163 | 		sfbhash >>= SFB_BUCKET_SHIFT; | 
 | 164 | 		if (b[hash].qlen > 0) | 
 | 165 | 			b[hash].qlen--; | 
 | 166 | 		b += SFB_NUMBUCKETS; /* next level */ | 
 | 167 | 	} | 
 | 168 | } | 
 | 169 |  | 
 | 170 | static void decrement_qlen(const struct sk_buff *skb, struct sfb_sched_data *q) | 
 | 171 | { | 
 | 172 | 	u32 sfbhash; | 
 | 173 |  | 
 | 174 | 	sfbhash = sfb_hash(skb, 0); | 
 | 175 | 	if (sfbhash) | 
 | 176 | 		decrement_one_qlen(sfbhash, 0, q); | 
 | 177 |  | 
 | 178 | 	sfbhash = sfb_hash(skb, 1); | 
 | 179 | 	if (sfbhash) | 
 | 180 | 		decrement_one_qlen(sfbhash, 1, q); | 
 | 181 | } | 
 | 182 |  | 
 | 183 | static void decrement_prob(struct sfb_bucket *b, struct sfb_sched_data *q) | 
 | 184 | { | 
 | 185 | 	b->p_mark = prob_minus(b->p_mark, q->decrement); | 
 | 186 | } | 
 | 187 |  | 
 | 188 | static void increment_prob(struct sfb_bucket *b, struct sfb_sched_data *q) | 
 | 189 | { | 
 | 190 | 	b->p_mark = prob_plus(b->p_mark, q->increment); | 
 | 191 | } | 
 | 192 |  | 
 | 193 | static void sfb_zero_all_buckets(struct sfb_sched_data *q) | 
 | 194 | { | 
 | 195 | 	memset(&q->bins, 0, sizeof(q->bins)); | 
 | 196 | } | 
 | 197 |  | 
 | 198 | /* | 
 | 199 |  * compute max qlen, max p_mark, and avg p_mark | 
 | 200 |  */ | 
 | 201 | static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_data *q) | 
 | 202 | { | 
 | 203 | 	int i; | 
 | 204 | 	u32 qlen = 0, prob = 0, totalpm = 0; | 
 | 205 | 	const struct sfb_bucket *b = &q->bins[q->slot].bins[0][0]; | 
 | 206 |  | 
 | 207 | 	for (i = 0; i < SFB_LEVELS * SFB_NUMBUCKETS; i++) { | 
 | 208 | 		if (qlen < b->qlen) | 
 | 209 | 			qlen = b->qlen; | 
 | 210 | 		totalpm += b->p_mark; | 
 | 211 | 		if (prob < b->p_mark) | 
 | 212 | 			prob = b->p_mark; | 
 | 213 | 		b++; | 
 | 214 | 	} | 
 | 215 | 	*prob_r = prob; | 
 | 216 | 	*avgpm_r = totalpm / (SFB_LEVELS * SFB_NUMBUCKETS); | 
 | 217 | 	return qlen; | 
 | 218 | } | 
 | 219 |  | 
 | 220 |  | 
 | 221 | static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q) | 
 | 222 | { | 
 | 223 | 	q->bins[slot].perturbation = net_random(); | 
 | 224 | } | 
 | 225 |  | 
 | 226 | static void sfb_swap_slot(struct sfb_sched_data *q) | 
 | 227 | { | 
 | 228 | 	sfb_init_perturbation(q->slot, q); | 
 | 229 | 	q->slot ^= 1; | 
 | 230 | 	q->double_buffering = false; | 
 | 231 | } | 
 | 232 |  | 
 | 233 | /* Non elastic flows are allowed to use part of the bandwidth, expressed | 
 | 234 |  * in "penalty_rate" packets per second, with "penalty_burst" burst | 
 | 235 |  */ | 
 | 236 | static bool sfb_rate_limit(struct sk_buff *skb, struct sfb_sched_data *q) | 
 | 237 | { | 
 | 238 | 	if (q->penalty_rate == 0 || q->penalty_burst == 0) | 
 | 239 | 		return true; | 
 | 240 |  | 
 | 241 | 	if (q->tokens_avail < 1) { | 
 | 242 | 		unsigned long age = min(10UL * HZ, jiffies - q->token_time); | 
 | 243 |  | 
 | 244 | 		q->tokens_avail = (age * q->penalty_rate) / HZ; | 
 | 245 | 		if (q->tokens_avail > q->penalty_burst) | 
 | 246 | 			q->tokens_avail = q->penalty_burst; | 
 | 247 | 		q->token_time = jiffies; | 
 | 248 | 		if (q->tokens_avail < 1) | 
 | 249 | 			return true; | 
 | 250 | 	} | 
 | 251 |  | 
 | 252 | 	q->tokens_avail--; | 
 | 253 | 	return false; | 
 | 254 | } | 
 | 255 |  | 
 | 256 | static bool sfb_classify(struct sk_buff *skb, struct sfb_sched_data *q, | 
 | 257 | 			 int *qerr, u32 *salt) | 
 | 258 | { | 
 | 259 | 	struct tcf_result res; | 
 | 260 | 	int result; | 
 | 261 |  | 
 | 262 | 	result = tc_classify(skb, q->filter_list, &res); | 
 | 263 | 	if (result >= 0) { | 
 | 264 | #ifdef CONFIG_NET_CLS_ACT | 
 | 265 | 		switch (result) { | 
 | 266 | 		case TC_ACT_STOLEN: | 
 | 267 | 		case TC_ACT_QUEUED: | 
 | 268 | 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; | 
 | 269 | 		case TC_ACT_SHOT: | 
 | 270 | 			return false; | 
 | 271 | 		} | 
 | 272 | #endif | 
 | 273 | 		*salt = TC_H_MIN(res.classid); | 
 | 274 | 		return true; | 
 | 275 | 	} | 
 | 276 | 	return false; | 
 | 277 | } | 
 | 278 |  | 
 | 279 | static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) | 
 | 280 | { | 
 | 281 |  | 
 | 282 | 	struct sfb_sched_data *q = qdisc_priv(sch); | 
 | 283 | 	struct Qdisc *child = q->qdisc; | 
 | 284 | 	int i; | 
 | 285 | 	u32 p_min = ~0; | 
 | 286 | 	u32 minqlen = ~0; | 
 | 287 | 	u32 r, slot, salt, sfbhash; | 
 | 288 | 	int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; | 
 | 289 |  | 
 | 290 | 	if (q->rehash_interval > 0) { | 
 | 291 | 		unsigned long limit = q->rehash_time + q->rehash_interval; | 
 | 292 |  | 
 | 293 | 		if (unlikely(time_after(jiffies, limit))) { | 
 | 294 | 			sfb_swap_slot(q); | 
 | 295 | 			q->rehash_time = jiffies; | 
 | 296 | 		} else if (unlikely(!q->double_buffering && q->warmup_time > 0 && | 
 | 297 | 				    time_after(jiffies, limit - q->warmup_time))) { | 
 | 298 | 			q->double_buffering = true; | 
 | 299 | 		} | 
 | 300 | 	} | 
 | 301 |  | 
 | 302 | 	if (q->filter_list) { | 
 | 303 | 		/* If using external classifiers, get result and record it. */ | 
 | 304 | 		if (!sfb_classify(skb, q, &ret, &salt)) | 
 | 305 | 			goto other_drop; | 
 | 306 | 	} else { | 
 | 307 | 		salt = skb_get_rxhash(skb); | 
 | 308 | 	} | 
 | 309 |  | 
 | 310 | 	slot = q->slot; | 
 | 311 |  | 
 | 312 | 	sfbhash = jhash_1word(salt, q->bins[slot].perturbation); | 
 | 313 | 	if (!sfbhash) | 
 | 314 | 		sfbhash = 1; | 
 | 315 | 	sfb_skb_cb(skb)->hashes[slot] = sfbhash; | 
 | 316 |  | 
 | 317 | 	for (i = 0; i < SFB_LEVELS; i++) { | 
 | 318 | 		u32 hash = sfbhash & SFB_BUCKET_MASK; | 
 | 319 | 		struct sfb_bucket *b = &q->bins[slot].bins[i][hash]; | 
 | 320 |  | 
 | 321 | 		sfbhash >>= SFB_BUCKET_SHIFT; | 
 | 322 | 		if (b->qlen == 0) | 
 | 323 | 			decrement_prob(b, q); | 
 | 324 | 		else if (b->qlen >= q->bin_size) | 
 | 325 | 			increment_prob(b, q); | 
 | 326 | 		if (minqlen > b->qlen) | 
 | 327 | 			minqlen = b->qlen; | 
 | 328 | 		if (p_min > b->p_mark) | 
 | 329 | 			p_min = b->p_mark; | 
 | 330 | 	} | 
 | 331 |  | 
 | 332 | 	slot ^= 1; | 
 | 333 | 	sfb_skb_cb(skb)->hashes[slot] = 0; | 
 | 334 |  | 
 | 335 | 	if (unlikely(minqlen >= q->max || sch->q.qlen >= q->limit)) { | 
 | 336 | 		sch->qstats.overlimits++; | 
 | 337 | 		if (minqlen >= q->max) | 
 | 338 | 			q->stats.bucketdrop++; | 
 | 339 | 		else | 
 | 340 | 			q->stats.queuedrop++; | 
 | 341 | 		goto drop; | 
 | 342 | 	} | 
 | 343 |  | 
 | 344 | 	if (unlikely(p_min >= SFB_MAX_PROB)) { | 
 | 345 | 		/* Inelastic flow */ | 
 | 346 | 		if (q->double_buffering) { | 
 | 347 | 			sfbhash = jhash_1word(salt, q->bins[slot].perturbation); | 
 | 348 | 			if (!sfbhash) | 
 | 349 | 				sfbhash = 1; | 
 | 350 | 			sfb_skb_cb(skb)->hashes[slot] = sfbhash; | 
 | 351 |  | 
 | 352 | 			for (i = 0; i < SFB_LEVELS; i++) { | 
 | 353 | 				u32 hash = sfbhash & SFB_BUCKET_MASK; | 
 | 354 | 				struct sfb_bucket *b = &q->bins[slot].bins[i][hash]; | 
 | 355 |  | 
 | 356 | 				sfbhash >>= SFB_BUCKET_SHIFT; | 
 | 357 | 				if (b->qlen == 0) | 
 | 358 | 					decrement_prob(b, q); | 
 | 359 | 				else if (b->qlen >= q->bin_size) | 
 | 360 | 					increment_prob(b, q); | 
 | 361 | 			} | 
 | 362 | 		} | 
 | 363 | 		if (sfb_rate_limit(skb, q)) { | 
 | 364 | 			sch->qstats.overlimits++; | 
 | 365 | 			q->stats.penaltydrop++; | 
 | 366 | 			goto drop; | 
 | 367 | 		} | 
 | 368 | 		goto enqueue; | 
 | 369 | 	} | 
 | 370 |  | 
 | 371 | 	r = net_random() & SFB_MAX_PROB; | 
 | 372 |  | 
 | 373 | 	if (unlikely(r < p_min)) { | 
 | 374 | 		if (unlikely(p_min > SFB_MAX_PROB / 2)) { | 
 | 375 | 			/* If we're marking that many packets, then either | 
 | 376 | 			 * this flow is unresponsive, or we're badly congested. | 
 | 377 | 			 * In either case, we want to start dropping packets. | 
 | 378 | 			 */ | 
 | 379 | 			if (r < (p_min - SFB_MAX_PROB / 2) * 2) { | 
 | 380 | 				q->stats.earlydrop++; | 
 | 381 | 				goto drop; | 
 | 382 | 			} | 
 | 383 | 		} | 
 | 384 | 		if (INET_ECN_set_ce(skb)) { | 
 | 385 | 			q->stats.marked++; | 
 | 386 | 		} else { | 
 | 387 | 			q->stats.earlydrop++; | 
 | 388 | 			goto drop; | 
 | 389 | 		} | 
 | 390 | 	} | 
 | 391 |  | 
 | 392 | enqueue: | 
 | 393 | 	ret = qdisc_enqueue(skb, child); | 
 | 394 | 	if (likely(ret == NET_XMIT_SUCCESS)) { | 
 | 395 | 		sch->q.qlen++; | 
 | 396 | 		increment_qlen(skb, q); | 
 | 397 | 	} else if (net_xmit_drop_count(ret)) { | 
 | 398 | 		q->stats.childdrop++; | 
 | 399 | 		sch->qstats.drops++; | 
 | 400 | 	} | 
 | 401 | 	return ret; | 
 | 402 |  | 
 | 403 | drop: | 
 | 404 | 	qdisc_drop(skb, sch); | 
 | 405 | 	return NET_XMIT_CN; | 
 | 406 | other_drop: | 
 | 407 | 	if (ret & __NET_XMIT_BYPASS) | 
 | 408 | 		sch->qstats.drops++; | 
 | 409 | 	kfree_skb(skb); | 
 | 410 | 	return ret; | 
 | 411 | } | 
 | 412 |  | 
 | 413 | static struct sk_buff *sfb_dequeue(struct Qdisc *sch) | 
 | 414 | { | 
 | 415 | 	struct sfb_sched_data *q = qdisc_priv(sch); | 
 | 416 | 	struct Qdisc *child = q->qdisc; | 
 | 417 | 	struct sk_buff *skb; | 
 | 418 |  | 
 | 419 | 	skb = child->dequeue(q->qdisc); | 
 | 420 |  | 
 | 421 | 	if (skb) { | 
 | 422 | 		qdisc_bstats_update(sch, skb); | 
 | 423 | 		sch->q.qlen--; | 
 | 424 | 		decrement_qlen(skb, q); | 
 | 425 | 	} | 
 | 426 |  | 
 | 427 | 	return skb; | 
 | 428 | } | 
 | 429 |  | 
 | 430 | static struct sk_buff *sfb_peek(struct Qdisc *sch) | 
 | 431 | { | 
 | 432 | 	struct sfb_sched_data *q = qdisc_priv(sch); | 
 | 433 | 	struct Qdisc *child = q->qdisc; | 
 | 434 |  | 
 | 435 | 	return child->ops->peek(child); | 
 | 436 | } | 
 | 437 |  | 
 | 438 | /* No sfb_drop -- impossible since the child doesn't return the dropped skb. */ | 
 | 439 |  | 
 | 440 | static void sfb_reset(struct Qdisc *sch) | 
 | 441 | { | 
 | 442 | 	struct sfb_sched_data *q = qdisc_priv(sch); | 
 | 443 |  | 
 | 444 | 	qdisc_reset(q->qdisc); | 
 | 445 | 	sch->q.qlen = 0; | 
 | 446 | 	q->slot = 0; | 
 | 447 | 	q->double_buffering = false; | 
 | 448 | 	sfb_zero_all_buckets(q); | 
 | 449 | 	sfb_init_perturbation(0, q); | 
 | 450 | } | 
 | 451 |  | 
 | 452 | static void sfb_destroy(struct Qdisc *sch) | 
 | 453 | { | 
 | 454 | 	struct sfb_sched_data *q = qdisc_priv(sch); | 
 | 455 |  | 
 | 456 | 	tcf_destroy_chain(&q->filter_list); | 
 | 457 | 	qdisc_destroy(q->qdisc); | 
 | 458 | } | 
 | 459 |  | 
 | 460 | static const struct nla_policy sfb_policy[TCA_SFB_MAX + 1] = { | 
 | 461 | 	[TCA_SFB_PARMS]	= { .len = sizeof(struct tc_sfb_qopt) }, | 
 | 462 | }; | 
 | 463 |  | 
 | 464 | static const struct tc_sfb_qopt sfb_default_ops = { | 
 | 465 | 	.rehash_interval = 600 * MSEC_PER_SEC, | 
 | 466 | 	.warmup_time = 60 * MSEC_PER_SEC, | 
 | 467 | 	.limit = 0, | 
 | 468 | 	.max = 25, | 
 | 469 | 	.bin_size = 20, | 
 | 470 | 	.increment = (SFB_MAX_PROB + 500) / 1000, /* 0.1 % */ | 
 | 471 | 	.decrement = (SFB_MAX_PROB + 3000) / 6000, | 
 | 472 | 	.penalty_rate = 10, | 
 | 473 | 	.penalty_burst = 20, | 
 | 474 | }; | 
 | 475 |  | 
 | 476 | static int sfb_change(struct Qdisc *sch, struct nlattr *opt) | 
 | 477 | { | 
 | 478 | 	struct sfb_sched_data *q = qdisc_priv(sch); | 
 | 479 | 	struct Qdisc *child; | 
 | 480 | 	struct nlattr *tb[TCA_SFB_MAX + 1]; | 
 | 481 | 	const struct tc_sfb_qopt *ctl = &sfb_default_ops; | 
 | 482 | 	u32 limit; | 
 | 483 | 	int err; | 
 | 484 |  | 
 | 485 | 	if (opt) { | 
 | 486 | 		err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy); | 
 | 487 | 		if (err < 0) | 
 | 488 | 			return -EINVAL; | 
 | 489 |  | 
 | 490 | 		if (tb[TCA_SFB_PARMS] == NULL) | 
 | 491 | 			return -EINVAL; | 
 | 492 |  | 
 | 493 | 		ctl = nla_data(tb[TCA_SFB_PARMS]); | 
 | 494 | 	} | 
 | 495 |  | 
 | 496 | 	limit = ctl->limit; | 
 | 497 | 	if (limit == 0) | 
 | 498 | 		limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1); | 
 | 499 |  | 
 | 500 | 	child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit); | 
 | 501 | 	if (IS_ERR(child)) | 
 | 502 | 		return PTR_ERR(child); | 
 | 503 |  | 
 | 504 | 	sch_tree_lock(sch); | 
 | 505 |  | 
 | 506 | 	qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); | 
 | 507 | 	qdisc_destroy(q->qdisc); | 
 | 508 | 	q->qdisc = child; | 
 | 509 |  | 
 | 510 | 	q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval); | 
 | 511 | 	q->warmup_time = msecs_to_jiffies(ctl->warmup_time); | 
 | 512 | 	q->rehash_time = jiffies; | 
 | 513 | 	q->limit = limit; | 
 | 514 | 	q->increment = ctl->increment; | 
 | 515 | 	q->decrement = ctl->decrement; | 
 | 516 | 	q->max = ctl->max; | 
 | 517 | 	q->bin_size = ctl->bin_size; | 
 | 518 | 	q->penalty_rate = ctl->penalty_rate; | 
 | 519 | 	q->penalty_burst = ctl->penalty_burst; | 
 | 520 | 	q->tokens_avail = ctl->penalty_burst; | 
 | 521 | 	q->token_time = jiffies; | 
 | 522 |  | 
 | 523 | 	q->slot = 0; | 
 | 524 | 	q->double_buffering = false; | 
 | 525 | 	sfb_zero_all_buckets(q); | 
 | 526 | 	sfb_init_perturbation(0, q); | 
 | 527 | 	sfb_init_perturbation(1, q); | 
 | 528 |  | 
 | 529 | 	sch_tree_unlock(sch); | 
 | 530 |  | 
 | 531 | 	return 0; | 
 | 532 | } | 
 | 533 |  | 
 | 534 | static int sfb_init(struct Qdisc *sch, struct nlattr *opt) | 
 | 535 | { | 
 | 536 | 	struct sfb_sched_data *q = qdisc_priv(sch); | 
 | 537 |  | 
 | 538 | 	q->qdisc = &noop_qdisc; | 
 | 539 | 	return sfb_change(sch, opt); | 
 | 540 | } | 
 | 541 |  | 
 | 542 | static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb) | 
 | 543 | { | 
 | 544 | 	struct sfb_sched_data *q = qdisc_priv(sch); | 
 | 545 | 	struct nlattr *opts; | 
 | 546 | 	struct tc_sfb_qopt opt = { | 
 | 547 | 		.rehash_interval = jiffies_to_msecs(q->rehash_interval), | 
 | 548 | 		.warmup_time = jiffies_to_msecs(q->warmup_time), | 
 | 549 | 		.limit = q->limit, | 
 | 550 | 		.max = q->max, | 
 | 551 | 		.bin_size = q->bin_size, | 
 | 552 | 		.increment = q->increment, | 
 | 553 | 		.decrement = q->decrement, | 
 | 554 | 		.penalty_rate = q->penalty_rate, | 
 | 555 | 		.penalty_burst = q->penalty_burst, | 
 | 556 | 	}; | 
 | 557 |  | 
 | 558 | 	sch->qstats.backlog = q->qdisc->qstats.backlog; | 
 | 559 | 	opts = nla_nest_start(skb, TCA_OPTIONS); | 
 | 560 | 	NLA_PUT(skb, TCA_SFB_PARMS, sizeof(opt), &opt); | 
 | 561 | 	return nla_nest_end(skb, opts); | 
 | 562 |  | 
 | 563 | nla_put_failure: | 
 | 564 | 	nla_nest_cancel(skb, opts); | 
 | 565 | 	return -EMSGSIZE; | 
 | 566 | } | 
 | 567 |  | 
 | 568 | static int sfb_dump_stats(struct Qdisc *sch, struct gnet_dump *d) | 
 | 569 | { | 
 | 570 | 	struct sfb_sched_data *q = qdisc_priv(sch); | 
 | 571 | 	struct tc_sfb_xstats st = { | 
 | 572 | 		.earlydrop = q->stats.earlydrop, | 
 | 573 | 		.penaltydrop = q->stats.penaltydrop, | 
 | 574 | 		.bucketdrop = q->stats.bucketdrop, | 
 | 575 | 		.queuedrop = q->stats.queuedrop, | 
 | 576 | 		.childdrop = q->stats.childdrop, | 
 | 577 | 		.marked = q->stats.marked, | 
 | 578 | 	}; | 
 | 579 |  | 
 | 580 | 	st.maxqlen = sfb_compute_qlen(&st.maxprob, &st.avgprob, q); | 
 | 581 |  | 
 | 582 | 	return gnet_stats_copy_app(d, &st, sizeof(st)); | 
 | 583 | } | 
 | 584 |  | 
 | 585 | static int sfb_dump_class(struct Qdisc *sch, unsigned long cl, | 
 | 586 | 			  struct sk_buff *skb, struct tcmsg *tcm) | 
 | 587 | { | 
 | 588 | 	return -ENOSYS; | 
 | 589 | } | 
 | 590 |  | 
 | 591 | static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, | 
 | 592 | 		     struct Qdisc **old) | 
 | 593 | { | 
 | 594 | 	struct sfb_sched_data *q = qdisc_priv(sch); | 
 | 595 |  | 
 | 596 | 	if (new == NULL) | 
 | 597 | 		new = &noop_qdisc; | 
 | 598 |  | 
 | 599 | 	sch_tree_lock(sch); | 
 | 600 | 	*old = q->qdisc; | 
 | 601 | 	q->qdisc = new; | 
 | 602 | 	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); | 
 | 603 | 	qdisc_reset(*old); | 
 | 604 | 	sch_tree_unlock(sch); | 
 | 605 | 	return 0; | 
 | 606 | } | 
 | 607 |  | 
 | 608 | static struct Qdisc *sfb_leaf(struct Qdisc *sch, unsigned long arg) | 
 | 609 | { | 
 | 610 | 	struct sfb_sched_data *q = qdisc_priv(sch); | 
 | 611 |  | 
 | 612 | 	return q->qdisc; | 
 | 613 | } | 
 | 614 |  | 
 | 615 | static unsigned long sfb_get(struct Qdisc *sch, u32 classid) | 
 | 616 | { | 
 | 617 | 	return 1; | 
 | 618 | } | 
 | 619 |  | 
 | 620 | static void sfb_put(struct Qdisc *sch, unsigned long arg) | 
 | 621 | { | 
 | 622 | } | 
 | 623 |  | 
 | 624 | static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid, | 
 | 625 | 			    struct nlattr **tca, unsigned long *arg) | 
 | 626 | { | 
 | 627 | 	return -ENOSYS; | 
 | 628 | } | 
 | 629 |  | 
 | 630 | static int sfb_delete(struct Qdisc *sch, unsigned long cl) | 
 | 631 | { | 
 | 632 | 	return -ENOSYS; | 
 | 633 | } | 
 | 634 |  | 
 | 635 | static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker) | 
 | 636 | { | 
 | 637 | 	if (!walker->stop) { | 
 | 638 | 		if (walker->count >= walker->skip) | 
 | 639 | 			if (walker->fn(sch, 1, walker) < 0) { | 
 | 640 | 				walker->stop = 1; | 
 | 641 | 				return; | 
 | 642 | 			} | 
 | 643 | 		walker->count++; | 
 | 644 | 	} | 
 | 645 | } | 
 | 646 |  | 
 | 647 | static struct tcf_proto **sfb_find_tcf(struct Qdisc *sch, unsigned long cl) | 
 | 648 | { | 
 | 649 | 	struct sfb_sched_data *q = qdisc_priv(sch); | 
 | 650 |  | 
 | 651 | 	if (cl) | 
 | 652 | 		return NULL; | 
 | 653 | 	return &q->filter_list; | 
 | 654 | } | 
 | 655 |  | 
 | 656 | static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent, | 
 | 657 | 			      u32 classid) | 
 | 658 | { | 
 | 659 | 	return 0; | 
 | 660 | } | 
 | 661 |  | 
 | 662 |  | 
 | 663 | static const struct Qdisc_class_ops sfb_class_ops = { | 
 | 664 | 	.graft		=	sfb_graft, | 
 | 665 | 	.leaf		=	sfb_leaf, | 
 | 666 | 	.get		=	sfb_get, | 
 | 667 | 	.put		=	sfb_put, | 
 | 668 | 	.change		=	sfb_change_class, | 
 | 669 | 	.delete		=	sfb_delete, | 
 | 670 | 	.walk		=	sfb_walk, | 
 | 671 | 	.tcf_chain	=	sfb_find_tcf, | 
 | 672 | 	.bind_tcf	=	sfb_bind, | 
 | 673 | 	.unbind_tcf	=	sfb_put, | 
 | 674 | 	.dump		=	sfb_dump_class, | 
 | 675 | }; | 
 | 676 |  | 
 | 677 | static struct Qdisc_ops sfb_qdisc_ops __read_mostly = { | 
 | 678 | 	.id		=	"sfb", | 
 | 679 | 	.priv_size	=	sizeof(struct sfb_sched_data), | 
 | 680 | 	.cl_ops		=	&sfb_class_ops, | 
 | 681 | 	.enqueue	=	sfb_enqueue, | 
 | 682 | 	.dequeue	=	sfb_dequeue, | 
 | 683 | 	.peek		=	sfb_peek, | 
 | 684 | 	.init		=	sfb_init, | 
 | 685 | 	.reset		=	sfb_reset, | 
 | 686 | 	.destroy	=	sfb_destroy, | 
 | 687 | 	.change		=	sfb_change, | 
 | 688 | 	.dump		=	sfb_dump, | 
 | 689 | 	.dump_stats	=	sfb_dump_stats, | 
 | 690 | 	.owner		=	THIS_MODULE, | 
 | 691 | }; | 
 | 692 |  | 
 | 693 | static int __init sfb_module_init(void) | 
 | 694 | { | 
 | 695 | 	return register_qdisc(&sfb_qdisc_ops); | 
 | 696 | } | 
 | 697 |  | 
 | 698 | static void __exit sfb_module_exit(void) | 
 | 699 | { | 
 | 700 | 	unregister_qdisc(&sfb_qdisc_ops); | 
 | 701 | } | 
 | 702 |  | 
 | 703 | module_init(sfb_module_init) | 
 | 704 | module_exit(sfb_module_exit) | 
 | 705 |  | 
 | 706 | MODULE_DESCRIPTION("Stochastic Fair Blue queue discipline"); | 
 | 707 | MODULE_AUTHOR("Juliusz Chroboczek"); | 
 | 708 | MODULE_AUTHOR("Eric Dumazet"); | 
 | 709 | MODULE_LICENSE("GPL"); |