| Eric Dumazet | 76e3cc1 | 2012-05-10 07:51:25 +0000 | [diff] [blame] | 1 | /* | 
 | 2 |  * Codel - The Controlled-Delay Active Queue Management algorithm | 
 | 3 |  * | 
 | 4 |  *  Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com> | 
 | 5 |  *  Copyright (C) 2011-2012 Van Jacobson <van@pollere.net> | 
 | 6 |  * | 
 | 7 |  *  Implemented on linux by : | 
 | 8 |  *  Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> | 
 | 9 |  *  Copyright (C) 2012 Eric Dumazet <edumazet@google.com> | 
 | 10 |  * | 
 | 11 |  * Redistribution and use in source and binary forms, with or without | 
 | 12 |  * modification, are permitted provided that the following conditions | 
 | 13 |  * are met: | 
 | 14 |  * 1. Redistributions of source code must retain the above copyright | 
 | 15 |  *    notice, this list of conditions, and the following disclaimer, | 
 | 16 |  *    without modification. | 
 | 17 |  * 2. Redistributions in binary form must reproduce the above copyright | 
 | 18 |  *    notice, this list of conditions and the following disclaimer in the | 
 | 19 |  *    documentation and/or other materials provided with the distribution. | 
 | 20 |  * 3. The names of the authors may not be used to endorse or promote products | 
 | 21 |  *    derived from this software without specific prior written permission. | 
 | 22 |  * | 
 | 23 |  * Alternatively, provided that this notice is retained in full, this | 
 | 24 |  * software may be distributed under the terms of the GNU General | 
 | 25 |  * Public License ("GPL") version 2, in which case the provisions of the | 
 | 26 |  * GPL apply INSTEAD OF those given above. | 
 | 27 |  * | 
 | 28 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
 | 29 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
 | 30 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
 | 31 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 
 | 32 |  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 
 | 33 |  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 
 | 34 |  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
 | 35 |  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
 | 36 |  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
 | 37 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
 | 38 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH | 
 | 39 |  * DAMAGE. | 
 | 40 |  * | 
 | 41 |  */ | 
 | 42 |  | 
 | 43 | #include <linux/module.h> | 
 | 44 | #include <linux/slab.h> | 
 | 45 | #include <linux/types.h> | 
 | 46 | #include <linux/kernel.h> | 
 | 47 | #include <linux/errno.h> | 
 | 48 | #include <linux/skbuff.h> | 
| Geert Uytterhoeven | ce5b4b9 | 2012-05-14 09:47:05 +0000 | [diff] [blame] | 49 | #include <linux/prefetch.h> | 
| Eric Dumazet | 76e3cc1 | 2012-05-10 07:51:25 +0000 | [diff] [blame] | 50 | #include <net/pkt_sched.h> | 
 | 51 | #include <net/codel.h> | 
 | 52 |  | 
 | 53 |  | 
 | 54 | #define DEFAULT_CODEL_LIMIT 1000 | 
 | 55 |  | 
 | 56 | struct codel_sched_data { | 
 | 57 | 	struct codel_params	params; | 
 | 58 | 	struct codel_vars	vars; | 
 | 59 | 	struct codel_stats	stats; | 
 | 60 | 	u32			drop_overlimit; | 
 | 61 | }; | 
 | 62 |  | 
 | 63 | /* This is the specific function called from codel_dequeue() | 
 | 64 |  * to dequeue a packet from queue. Note: backlog is handled in | 
 | 65 |  * codel, we dont need to reduce it here. | 
 | 66 |  */ | 
 | 67 | static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch) | 
 | 68 | { | 
 | 69 | 	struct sk_buff *skb = __skb_dequeue(&sch->q); | 
 | 70 |  | 
 | 71 | 	prefetch(&skb->end); /* we'll need skb_shinfo() */ | 
 | 72 | 	return skb; | 
 | 73 | } | 
 | 74 |  | 
 | 75 | static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch) | 
 | 76 | { | 
 | 77 | 	struct codel_sched_data *q = qdisc_priv(sch); | 
 | 78 | 	struct sk_buff *skb; | 
 | 79 |  | 
| Eric Dumazet | 865ec55 | 2012-05-16 04:39:09 +0000 | [diff] [blame] | 80 | 	skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, dequeue); | 
 | 81 |  | 
| Eric Dumazet | 76e3cc1 | 2012-05-10 07:51:25 +0000 | [diff] [blame] | 82 | 	/* We cant call qdisc_tree_decrease_qlen() if our qlen is 0, | 
 | 83 | 	 * or HTB crashes. Defer it for next round. | 
 | 84 | 	 */ | 
 | 85 | 	if (q->stats.drop_count && sch->q.qlen) { | 
 | 86 | 		qdisc_tree_decrease_qlen(sch, q->stats.drop_count); | 
 | 87 | 		q->stats.drop_count = 0; | 
 | 88 | 	} | 
 | 89 | 	if (skb) | 
 | 90 | 		qdisc_bstats_update(sch, skb); | 
 | 91 | 	return skb; | 
 | 92 | } | 
 | 93 |  | 
 | 94 | static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) | 
 | 95 | { | 
 | 96 | 	struct codel_sched_data *q; | 
 | 97 |  | 
 | 98 | 	if (likely(qdisc_qlen(sch) < sch->limit)) { | 
 | 99 | 		codel_set_enqueue_time(skb); | 
 | 100 | 		return qdisc_enqueue_tail(skb, sch); | 
 | 101 | 	} | 
 | 102 | 	q = qdisc_priv(sch); | 
 | 103 | 	q->drop_overlimit++; | 
 | 104 | 	return qdisc_drop(skb, sch); | 
 | 105 | } | 
 | 106 |  | 
 | 107 | static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = { | 
 | 108 | 	[TCA_CODEL_TARGET]	= { .type = NLA_U32 }, | 
 | 109 | 	[TCA_CODEL_LIMIT]	= { .type = NLA_U32 }, | 
 | 110 | 	[TCA_CODEL_INTERVAL]	= { .type = NLA_U32 }, | 
 | 111 | 	[TCA_CODEL_ECN]		= { .type = NLA_U32 }, | 
 | 112 | }; | 
 | 113 |  | 
 | 114 | static int codel_change(struct Qdisc *sch, struct nlattr *opt) | 
 | 115 | { | 
 | 116 | 	struct codel_sched_data *q = qdisc_priv(sch); | 
 | 117 | 	struct nlattr *tb[TCA_CODEL_MAX + 1]; | 
 | 118 | 	unsigned int qlen; | 
 | 119 | 	int err; | 
 | 120 |  | 
 | 121 | 	if (!opt) | 
 | 122 | 		return -EINVAL; | 
 | 123 |  | 
 | 124 | 	err = nla_parse_nested(tb, TCA_CODEL_MAX, opt, codel_policy); | 
 | 125 | 	if (err < 0) | 
 | 126 | 		return err; | 
 | 127 |  | 
 | 128 | 	sch_tree_lock(sch); | 
 | 129 |  | 
 | 130 | 	if (tb[TCA_CODEL_TARGET]) { | 
 | 131 | 		u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]); | 
 | 132 |  | 
 | 133 | 		q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT; | 
 | 134 | 	} | 
 | 135 |  | 
 | 136 | 	if (tb[TCA_CODEL_INTERVAL]) { | 
 | 137 | 		u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]); | 
 | 138 |  | 
 | 139 | 		q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT; | 
 | 140 | 	} | 
 | 141 |  | 
 | 142 | 	if (tb[TCA_CODEL_LIMIT]) | 
 | 143 | 		sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]); | 
 | 144 |  | 
 | 145 | 	if (tb[TCA_CODEL_ECN]) | 
 | 146 | 		q->params.ecn = !!nla_get_u32(tb[TCA_CODEL_ECN]); | 
 | 147 |  | 
 | 148 | 	qlen = sch->q.qlen; | 
 | 149 | 	while (sch->q.qlen > sch->limit) { | 
 | 150 | 		struct sk_buff *skb = __skb_dequeue(&sch->q); | 
 | 151 |  | 
 | 152 | 		sch->qstats.backlog -= qdisc_pkt_len(skb); | 
 | 153 | 		qdisc_drop(skb, sch); | 
 | 154 | 	} | 
 | 155 | 	qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); | 
 | 156 |  | 
 | 157 | 	sch_tree_unlock(sch); | 
 | 158 | 	return 0; | 
 | 159 | } | 
 | 160 |  | 
 | 161 | static int codel_init(struct Qdisc *sch, struct nlattr *opt) | 
 | 162 | { | 
 | 163 | 	struct codel_sched_data *q = qdisc_priv(sch); | 
 | 164 |  | 
 | 165 | 	sch->limit = DEFAULT_CODEL_LIMIT; | 
 | 166 |  | 
 | 167 | 	codel_params_init(&q->params); | 
 | 168 | 	codel_vars_init(&q->vars); | 
 | 169 | 	codel_stats_init(&q->stats); | 
 | 170 |  | 
 | 171 | 	if (opt) { | 
 | 172 | 		int err = codel_change(sch, opt); | 
 | 173 |  | 
 | 174 | 		if (err) | 
 | 175 | 			return err; | 
 | 176 | 	} | 
 | 177 |  | 
 | 178 | 	if (sch->limit >= 1) | 
 | 179 | 		sch->flags |= TCQ_F_CAN_BYPASS; | 
 | 180 | 	else | 
 | 181 | 		sch->flags &= ~TCQ_F_CAN_BYPASS; | 
 | 182 |  | 
 | 183 | 	return 0; | 
 | 184 | } | 
 | 185 |  | 
 | 186 | static int codel_dump(struct Qdisc *sch, struct sk_buff *skb) | 
 | 187 | { | 
 | 188 | 	struct codel_sched_data *q = qdisc_priv(sch); | 
 | 189 | 	struct nlattr *opts; | 
 | 190 |  | 
 | 191 | 	opts = nla_nest_start(skb, TCA_OPTIONS); | 
 | 192 | 	if (opts == NULL) | 
 | 193 | 		goto nla_put_failure; | 
 | 194 |  | 
 | 195 | 	if (nla_put_u32(skb, TCA_CODEL_TARGET, | 
 | 196 | 			codel_time_to_us(q->params.target)) || | 
 | 197 | 	    nla_put_u32(skb, TCA_CODEL_LIMIT, | 
 | 198 | 			sch->limit) || | 
 | 199 | 	    nla_put_u32(skb, TCA_CODEL_INTERVAL, | 
 | 200 | 			codel_time_to_us(q->params.interval)) || | 
 | 201 | 	    nla_put_u32(skb, TCA_CODEL_ECN, | 
 | 202 | 			q->params.ecn)) | 
 | 203 | 		goto nla_put_failure; | 
 | 204 |  | 
 | 205 | 	return nla_nest_end(skb, opts); | 
 | 206 |  | 
 | 207 | nla_put_failure: | 
 | 208 | 	nla_nest_cancel(skb, opts); | 
 | 209 | 	return -1; | 
 | 210 | } | 
 | 211 |  | 
 | 212 | static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d) | 
 | 213 | { | 
 | 214 | 	const struct codel_sched_data *q = qdisc_priv(sch); | 
 | 215 | 	struct tc_codel_xstats st = { | 
 | 216 | 		.maxpacket	= q->stats.maxpacket, | 
 | 217 | 		.count		= q->vars.count, | 
 | 218 | 		.lastcount	= q->vars.lastcount, | 
 | 219 | 		.drop_overlimit = q->drop_overlimit, | 
 | 220 | 		.ldelay		= codel_time_to_us(q->vars.ldelay), | 
 | 221 | 		.dropping	= q->vars.dropping, | 
 | 222 | 		.ecn_mark	= q->stats.ecn_mark, | 
 | 223 | 	}; | 
 | 224 |  | 
 | 225 | 	if (q->vars.dropping) { | 
 | 226 | 		codel_tdiff_t delta = q->vars.drop_next - codel_get_time(); | 
 | 227 |  | 
 | 228 | 		if (delta >= 0) | 
 | 229 | 			st.drop_next = codel_time_to_us(delta); | 
 | 230 | 		else | 
 | 231 | 			st.drop_next = -codel_time_to_us(-delta); | 
 | 232 | 	} | 
 | 233 |  | 
 | 234 | 	return gnet_stats_copy_app(d, &st, sizeof(st)); | 
 | 235 | } | 
 | 236 |  | 
 | 237 | static void codel_reset(struct Qdisc *sch) | 
 | 238 | { | 
 | 239 | 	struct codel_sched_data *q = qdisc_priv(sch); | 
 | 240 |  | 
 | 241 | 	qdisc_reset_queue(sch); | 
 | 242 | 	codel_vars_init(&q->vars); | 
 | 243 | } | 
 | 244 |  | 
 | 245 | static struct Qdisc_ops codel_qdisc_ops __read_mostly = { | 
 | 246 | 	.id		=	"codel", | 
 | 247 | 	.priv_size	=	sizeof(struct codel_sched_data), | 
 | 248 |  | 
 | 249 | 	.enqueue	=	codel_qdisc_enqueue, | 
 | 250 | 	.dequeue	=	codel_qdisc_dequeue, | 
 | 251 | 	.peek		=	qdisc_peek_dequeued, | 
 | 252 | 	.init		=	codel_init, | 
 | 253 | 	.reset		=	codel_reset, | 
 | 254 | 	.change 	=	codel_change, | 
 | 255 | 	.dump		=	codel_dump, | 
 | 256 | 	.dump_stats	=	codel_dump_stats, | 
 | 257 | 	.owner		=	THIS_MODULE, | 
 | 258 | }; | 
 | 259 |  | 
 | 260 | static int __init codel_module_init(void) | 
 | 261 | { | 
 | 262 | 	return register_qdisc(&codel_qdisc_ops); | 
 | 263 | } | 
 | 264 |  | 
 | 265 | static void __exit codel_module_exit(void) | 
 | 266 | { | 
 | 267 | 	unregister_qdisc(&codel_qdisc_ops); | 
 | 268 | } | 
 | 269 |  | 
 | 270 | module_init(codel_module_init) | 
 | 271 | module_exit(codel_module_exit) | 
 | 272 |  | 
 | 273 | MODULE_DESCRIPTION("Controlled Delay queue discipline"); | 
 | 274 | MODULE_AUTHOR("Dave Taht"); | 
 | 275 | MODULE_AUTHOR("Eric Dumazet"); | 
 | 276 | MODULE_LICENSE("Dual BSD/GPL"); |