| stephen hemminger | 0545a30 | 2011-04-04 05:30:58 +0000 | [diff] [blame] | 1 | /* | 
 | 2 |  * net/sched/sch_qfq.c         Quick Fair Queueing Scheduler. | 
 | 3 |  * | 
 | 4 |  * Copyright (c) 2009 Fabio Checconi, Luigi Rizzo, and Paolo Valente. | 
 | 5 |  * | 
 | 6 |  * This program is free software; you can redistribute it and/or | 
 | 7 |  * modify it under the terms of the GNU General Public License | 
 | 8 |  * version 2 as published by the Free Software Foundation. | 
 | 9 |  */ | 
 | 10 |  | 
 | 11 | #include <linux/module.h> | 
 | 12 | #include <linux/init.h> | 
 | 13 | #include <linux/bitops.h> | 
 | 14 | #include <linux/errno.h> | 
 | 15 | #include <linux/netdevice.h> | 
 | 16 | #include <linux/pkt_sched.h> | 
 | 17 | #include <net/sch_generic.h> | 
 | 18 | #include <net/pkt_sched.h> | 
 | 19 | #include <net/pkt_cls.h> | 
 | 20 |  | 
 | 21 |  | 
 | 22 | /*  Quick Fair Queueing | 
 | 23 |     =================== | 
 | 24 |  | 
 | 25 |     Sources: | 
 | 26 |  | 
 | 27 |     Fabio Checconi, Luigi Rizzo, and Paolo Valente: "QFQ: Efficient | 
 | 28 |     Packet Scheduling with Tight Bandwidth Distribution Guarantees." | 
 | 29 |  | 
 | 30 |     See also: | 
 | 31 |     http://retis.sssup.it/~fabio/linux/qfq/ | 
 | 32 |  */ | 
 | 33 |  | 
 | 34 | /* | 
 | 35 |  | 
 | 36 |   Virtual time computations. | 
 | 37 |  | 
 | 38 |   S, F and V are all computed in fixed point arithmetic with | 
 | 39 |   FRAC_BITS decimal bits. | 
 | 40 |  | 
 | 41 |   QFQ_MAX_INDEX is the maximum index allowed for a group. We need | 
 | 42 | 	one bit per index. | 
 | 43 |   QFQ_MAX_WSHIFT is the maximum power of two supported as a weight. | 
 | 44 |  | 
 | 45 |   The layout of the bits is as below: | 
 | 46 |  | 
 | 47 |                    [ MTU_SHIFT ][      FRAC_BITS    ] | 
 | 48 |                    [ MAX_INDEX    ][ MIN_SLOT_SHIFT ] | 
 | 49 | 				 ^.__grp->index = 0 | 
 | 50 | 				 *.__grp->slot_shift | 
 | 51 |  | 
 | 52 |   where MIN_SLOT_SHIFT is derived by difference from the others. | 
 | 53 |  | 
 | 54 |   The max group index corresponds to Lmax/w_min, where | 
 | 55 |   Lmax=1<<MTU_SHIFT, w_min = 1 . | 
 | 56 |   From this, and knowing how many groups (MAX_INDEX) we want, | 
 | 57 |   we can derive the shift corresponding to each group. | 
 | 58 |  | 
 | 59 |   Because we often need to compute | 
 | 60 | 	F = S + len/w_i  and V = V + len/wsum | 
 | 61 |   instead of storing w_i store the value | 
 | 62 | 	inv_w = (1<<FRAC_BITS)/w_i | 
 | 63 |   so we can do F = S + len * inv_w * wsum. | 
 | 64 |   We use W_TOT in the formulas so we can easily move between | 
 | 65 |   static and adaptive weight sum. | 
 | 66 |  | 
 | 67 |   The per-scheduler-instance data contain all the data structures | 
 | 68 |   for the scheduler: bitmaps and bucket lists. | 
 | 69 |  | 
 | 70 |  */ | 
 | 71 |  | 
 | 72 | /* | 
 | 73 |  * Maximum number of consecutive slots occupied by backlogged classes | 
 | 74 |  * inside a group. | 
 | 75 |  */ | 
 | 76 | #define QFQ_MAX_SLOTS	32 | 
 | 77 |  | 
 | 78 | /* | 
 | 79 |  * Shifts used for class<->group mapping.  We allow class weights that are | 
 | 80 |  * in the range [1, 2^MAX_WSHIFT], and we try to map each class i to the | 
 | 81 |  * group with the smallest index that can support the L_i / r_i configured | 
 | 82 |  * for the class. | 
 | 83 |  * | 
 | 84 |  * grp->index is the index of the group; and grp->slot_shift | 
 | 85 |  * is the shift for the corresponding (scaled) sigma_i. | 
 | 86 |  */ | 
 | 87 | #define QFQ_MAX_INDEX		19 | 
 | 88 | #define QFQ_MAX_WSHIFT		16 | 
 | 89 |  | 
 | 90 | #define	QFQ_MAX_WEIGHT		(1<<QFQ_MAX_WSHIFT) | 
 | 91 | #define QFQ_MAX_WSUM		(2*QFQ_MAX_WEIGHT) | 
 | 92 |  | 
 | 93 | #define FRAC_BITS		30	/* fixed point arithmetic */ | 
 | 94 | #define ONE_FP			(1UL << FRAC_BITS) | 
 | 95 | #define IWSUM			(ONE_FP/QFQ_MAX_WSUM) | 
 | 96 |  | 
 | 97 | #define QFQ_MTU_SHIFT		11 | 
 | 98 | #define QFQ_MIN_SLOT_SHIFT	(FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX) | 
 | 99 |  | 
 | 100 | /* | 
 | 101 |  * Possible group states.  These values are used as indexes for the bitmaps | 
 | 102 |  * array of struct qfq_queue. | 
 | 103 |  */ | 
 | 104 | enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE }; | 
 | 105 |  | 
 | 106 | struct qfq_group; | 
 | 107 |  | 
 | 108 | struct qfq_class { | 
 | 109 | 	struct Qdisc_class_common common; | 
 | 110 |  | 
 | 111 | 	unsigned int refcnt; | 
 | 112 | 	unsigned int filter_cnt; | 
 | 113 |  | 
 | 114 | 	struct gnet_stats_basic_packed bstats; | 
 | 115 | 	struct gnet_stats_queue qstats; | 
 | 116 | 	struct gnet_stats_rate_est rate_est; | 
 | 117 | 	struct Qdisc *qdisc; | 
 | 118 |  | 
 | 119 | 	struct hlist_node next;	/* Link for the slot list. */ | 
 | 120 | 	u64 S, F;		/* flow timestamps (exact) */ | 
 | 121 |  | 
 | 122 | 	/* group we belong to. In principle we would need the index, | 
 | 123 | 	 * which is log_2(lmax/weight), but we never reference it | 
 | 124 | 	 * directly, only the group. | 
 | 125 | 	 */ | 
 | 126 | 	struct qfq_group *grp; | 
 | 127 |  | 
 | 128 | 	/* these are copied from the flowset. */ | 
 | 129 | 	u32	inv_w;		/* ONE_FP/weight */ | 
 | 130 | 	u32	lmax;		/* Max packet size for this flow. */ | 
 | 131 | }; | 
 | 132 |  | 
 | 133 | struct qfq_group { | 
 | 134 | 	u64 S, F;			/* group timestamps (approx). */ | 
 | 135 | 	unsigned int slot_shift;	/* Slot shift. */ | 
 | 136 | 	unsigned int index;		/* Group index. */ | 
 | 137 | 	unsigned int front;		/* Index of the front slot. */ | 
 | 138 | 	unsigned long full_slots;	/* non-empty slots */ | 
 | 139 |  | 
 | 140 | 	/* Array of RR lists of active classes. */ | 
 | 141 | 	struct hlist_head slots[QFQ_MAX_SLOTS]; | 
 | 142 | }; | 
 | 143 |  | 
 | 144 | struct qfq_sched { | 
 | 145 | 	struct tcf_proto *filter_list; | 
 | 146 | 	struct Qdisc_class_hash clhash; | 
 | 147 |  | 
 | 148 | 	u64		V;		/* Precise virtual time. */ | 
 | 149 | 	u32		wsum;		/* weight sum */ | 
 | 150 |  | 
 | 151 | 	unsigned long bitmaps[QFQ_MAX_STATE];	    /* Group bitmaps. */ | 
 | 152 | 	struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */ | 
 | 153 | }; | 
 | 154 |  | 
 | 155 | static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) | 
 | 156 | { | 
 | 157 | 	struct qfq_sched *q = qdisc_priv(sch); | 
 | 158 | 	struct Qdisc_class_common *clc; | 
 | 159 |  | 
 | 160 | 	clc = qdisc_class_find(&q->clhash, classid); | 
 | 161 | 	if (clc == NULL) | 
 | 162 | 		return NULL; | 
 | 163 | 	return container_of(clc, struct qfq_class, common); | 
 | 164 | } | 
 | 165 |  | 
 | 166 | static void qfq_purge_queue(struct qfq_class *cl) | 
 | 167 | { | 
 | 168 | 	unsigned int len = cl->qdisc->q.qlen; | 
 | 169 |  | 
 | 170 | 	qdisc_reset(cl->qdisc); | 
 | 171 | 	qdisc_tree_decrease_qlen(cl->qdisc, len); | 
 | 172 | } | 
 | 173 |  | 
 | 174 | static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = { | 
 | 175 | 	[TCA_QFQ_WEIGHT] = { .type = NLA_U32 }, | 
 | 176 | 	[TCA_QFQ_LMAX] = { .type = NLA_U32 }, | 
 | 177 | }; | 
 | 178 |  | 
 | 179 | /* | 
 | 180 |  * Calculate a flow index, given its weight and maximum packet length. | 
 | 181 |  * index = log_2(maxlen/weight) but we need to apply the scaling. | 
 | 182 |  * This is used only once at flow creation. | 
 | 183 |  */ | 
 | 184 | static int qfq_calc_index(u32 inv_w, unsigned int maxlen) | 
 | 185 | { | 
 | 186 | 	u64 slot_size = (u64)maxlen * inv_w; | 
 | 187 | 	unsigned long size_map; | 
 | 188 | 	int index = 0; | 
 | 189 |  | 
 | 190 | 	size_map = slot_size >> QFQ_MIN_SLOT_SHIFT; | 
 | 191 | 	if (!size_map) | 
 | 192 | 		goto out; | 
 | 193 |  | 
 | 194 | 	index = __fls(size_map) + 1;	/* basically a log_2 */ | 
 | 195 | 	index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1))); | 
 | 196 |  | 
 | 197 | 	if (index < 0) | 
 | 198 | 		index = 0; | 
 | 199 | out: | 
 | 200 | 	pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n", | 
 | 201 | 		 (unsigned long) ONE_FP/inv_w, maxlen, index); | 
 | 202 |  | 
 | 203 | 	return index; | 
 | 204 | } | 
 | 205 |  | 
 | 206 | static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, | 
 | 207 | 			    struct nlattr **tca, unsigned long *arg) | 
 | 208 | { | 
 | 209 | 	struct qfq_sched *q = qdisc_priv(sch); | 
 | 210 | 	struct qfq_class *cl = (struct qfq_class *)*arg; | 
 | 211 | 	struct nlattr *tb[TCA_QFQ_MAX + 1]; | 
 | 212 | 	u32 weight, lmax, inv_w; | 
 | 213 | 	int i, err; | 
 | 214 |  | 
 | 215 | 	if (tca[TCA_OPTIONS] == NULL) { | 
 | 216 | 		pr_notice("qfq: no options\n"); | 
 | 217 | 		return -EINVAL; | 
 | 218 | 	} | 
 | 219 |  | 
 | 220 | 	err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy); | 
 | 221 | 	if (err < 0) | 
 | 222 | 		return err; | 
 | 223 |  | 
 | 224 | 	if (tb[TCA_QFQ_WEIGHT]) { | 
 | 225 | 		weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]); | 
 | 226 | 		if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) { | 
 | 227 | 			pr_notice("qfq: invalid weight %u\n", weight); | 
 | 228 | 			return -EINVAL; | 
 | 229 | 		} | 
 | 230 | 	} else | 
 | 231 | 		weight = 1; | 
 | 232 |  | 
 | 233 | 	inv_w = ONE_FP / weight; | 
 | 234 | 	weight = ONE_FP / inv_w; | 
 | 235 | 	if (q->wsum + weight > QFQ_MAX_WSUM) { | 
 | 236 | 		pr_notice("qfq: total weight out of range (%u + %u)\n", | 
 | 237 | 			  weight, q->wsum); | 
 | 238 | 		return -EINVAL; | 
 | 239 | 	} | 
 | 240 |  | 
 | 241 | 	if (tb[TCA_QFQ_LMAX]) { | 
 | 242 | 		lmax = nla_get_u32(tb[TCA_QFQ_LMAX]); | 
 | 243 | 		if (!lmax || lmax > (1UL << QFQ_MTU_SHIFT)) { | 
 | 244 | 			pr_notice("qfq: invalid max length %u\n", lmax); | 
 | 245 | 			return -EINVAL; | 
 | 246 | 		} | 
 | 247 | 	} else | 
 | 248 | 		lmax = 1UL << QFQ_MTU_SHIFT; | 
 | 249 |  | 
 | 250 | 	if (cl != NULL) { | 
 | 251 | 		if (tca[TCA_RATE]) { | 
 | 252 | 			err = gen_replace_estimator(&cl->bstats, &cl->rate_est, | 
 | 253 | 						    qdisc_root_sleeping_lock(sch), | 
 | 254 | 						    tca[TCA_RATE]); | 
 | 255 | 			if (err) | 
 | 256 | 				return err; | 
 | 257 | 		} | 
 | 258 |  | 
 | 259 | 		sch_tree_lock(sch); | 
 | 260 | 		if (tb[TCA_QFQ_WEIGHT]) { | 
 | 261 | 			q->wsum = weight - ONE_FP / cl->inv_w; | 
 | 262 | 			cl->inv_w = inv_w; | 
 | 263 | 		} | 
 | 264 | 		sch_tree_unlock(sch); | 
 | 265 |  | 
 | 266 | 		return 0; | 
 | 267 | 	} | 
 | 268 |  | 
 | 269 | 	cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL); | 
 | 270 | 	if (cl == NULL) | 
 | 271 | 		return -ENOBUFS; | 
 | 272 |  | 
 | 273 | 	cl->refcnt = 1; | 
 | 274 | 	cl->common.classid = classid; | 
 | 275 | 	cl->lmax = lmax; | 
 | 276 | 	cl->inv_w = inv_w; | 
 | 277 | 	i = qfq_calc_index(cl->inv_w, cl->lmax); | 
 | 278 |  | 
 | 279 | 	cl->grp = &q->groups[i]; | 
 | 280 | 	q->wsum += weight; | 
 | 281 |  | 
 | 282 | 	cl->qdisc = qdisc_create_dflt(sch->dev_queue, | 
 | 283 | 				      &pfifo_qdisc_ops, classid); | 
 | 284 | 	if (cl->qdisc == NULL) | 
 | 285 | 		cl->qdisc = &noop_qdisc; | 
 | 286 |  | 
 | 287 | 	if (tca[TCA_RATE]) { | 
 | 288 | 		err = gen_new_estimator(&cl->bstats, &cl->rate_est, | 
 | 289 | 					qdisc_root_sleeping_lock(sch), | 
 | 290 | 					tca[TCA_RATE]); | 
 | 291 | 		if (err) { | 
 | 292 | 			qdisc_destroy(cl->qdisc); | 
 | 293 | 			kfree(cl); | 
 | 294 | 			return err; | 
 | 295 | 		} | 
 | 296 | 	} | 
 | 297 |  | 
 | 298 | 	sch_tree_lock(sch); | 
 | 299 | 	qdisc_class_hash_insert(&q->clhash, &cl->common); | 
 | 300 | 	sch_tree_unlock(sch); | 
 | 301 |  | 
 | 302 | 	qdisc_class_hash_grow(sch, &q->clhash); | 
 | 303 |  | 
 | 304 | 	*arg = (unsigned long)cl; | 
 | 305 | 	return 0; | 
 | 306 | } | 
 | 307 |  | 
 | 308 | static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl) | 
 | 309 | { | 
 | 310 | 	struct qfq_sched *q = qdisc_priv(sch); | 
 | 311 |  | 
 | 312 | 	if (cl->inv_w) { | 
 | 313 | 		q->wsum -= ONE_FP / cl->inv_w; | 
 | 314 | 		cl->inv_w = 0; | 
 | 315 | 	} | 
 | 316 |  | 
 | 317 | 	gen_kill_estimator(&cl->bstats, &cl->rate_est); | 
 | 318 | 	qdisc_destroy(cl->qdisc); | 
 | 319 | 	kfree(cl); | 
 | 320 | } | 
 | 321 |  | 
 | 322 | static int qfq_delete_class(struct Qdisc *sch, unsigned long arg) | 
 | 323 | { | 
 | 324 | 	struct qfq_sched *q = qdisc_priv(sch); | 
 | 325 | 	struct qfq_class *cl = (struct qfq_class *)arg; | 
 | 326 |  | 
 | 327 | 	if (cl->filter_cnt > 0) | 
 | 328 | 		return -EBUSY; | 
 | 329 |  | 
 | 330 | 	sch_tree_lock(sch); | 
 | 331 |  | 
 | 332 | 	qfq_purge_queue(cl); | 
 | 333 | 	qdisc_class_hash_remove(&q->clhash, &cl->common); | 
 | 334 |  | 
 | 335 | 	BUG_ON(--cl->refcnt == 0); | 
 | 336 | 	/* | 
 | 337 | 	 * This shouldn't happen: we "hold" one cops->get() when called | 
 | 338 | 	 * from tc_ctl_tclass; the destroy method is done from cops->put(). | 
 | 339 | 	 */ | 
 | 340 |  | 
 | 341 | 	sch_tree_unlock(sch); | 
 | 342 | 	return 0; | 
 | 343 | } | 
 | 344 |  | 
 | 345 | static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid) | 
 | 346 | { | 
 | 347 | 	struct qfq_class *cl = qfq_find_class(sch, classid); | 
 | 348 |  | 
 | 349 | 	if (cl != NULL) | 
 | 350 | 		cl->refcnt++; | 
 | 351 |  | 
 | 352 | 	return (unsigned long)cl; | 
 | 353 | } | 
 | 354 |  | 
 | 355 | static void qfq_put_class(struct Qdisc *sch, unsigned long arg) | 
 | 356 | { | 
 | 357 | 	struct qfq_class *cl = (struct qfq_class *)arg; | 
 | 358 |  | 
 | 359 | 	if (--cl->refcnt == 0) | 
 | 360 | 		qfq_destroy_class(sch, cl); | 
 | 361 | } | 
 | 362 |  | 
 | 363 | static struct tcf_proto **qfq_tcf_chain(struct Qdisc *sch, unsigned long cl) | 
 | 364 | { | 
 | 365 | 	struct qfq_sched *q = qdisc_priv(sch); | 
 | 366 |  | 
 | 367 | 	if (cl) | 
 | 368 | 		return NULL; | 
 | 369 |  | 
 | 370 | 	return &q->filter_list; | 
 | 371 | } | 
 | 372 |  | 
 | 373 | static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent, | 
 | 374 | 				  u32 classid) | 
 | 375 | { | 
 | 376 | 	struct qfq_class *cl = qfq_find_class(sch, classid); | 
 | 377 |  | 
 | 378 | 	if (cl != NULL) | 
 | 379 | 		cl->filter_cnt++; | 
 | 380 |  | 
 | 381 | 	return (unsigned long)cl; | 
 | 382 | } | 
 | 383 |  | 
 | 384 | static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg) | 
 | 385 | { | 
 | 386 | 	struct qfq_class *cl = (struct qfq_class *)arg; | 
 | 387 |  | 
 | 388 | 	cl->filter_cnt--; | 
 | 389 | } | 
 | 390 |  | 
 | 391 | static int qfq_graft_class(struct Qdisc *sch, unsigned long arg, | 
 | 392 | 			   struct Qdisc *new, struct Qdisc **old) | 
 | 393 | { | 
 | 394 | 	struct qfq_class *cl = (struct qfq_class *)arg; | 
 | 395 |  | 
 | 396 | 	if (new == NULL) { | 
 | 397 | 		new = qdisc_create_dflt(sch->dev_queue, | 
 | 398 | 					&pfifo_qdisc_ops, cl->common.classid); | 
 | 399 | 		if (new == NULL) | 
 | 400 | 			new = &noop_qdisc; | 
 | 401 | 	} | 
 | 402 |  | 
 | 403 | 	sch_tree_lock(sch); | 
 | 404 | 	qfq_purge_queue(cl); | 
 | 405 | 	*old = cl->qdisc; | 
 | 406 | 	cl->qdisc = new; | 
 | 407 | 	sch_tree_unlock(sch); | 
 | 408 | 	return 0; | 
 | 409 | } | 
 | 410 |  | 
 | 411 | static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg) | 
 | 412 | { | 
 | 413 | 	struct qfq_class *cl = (struct qfq_class *)arg; | 
 | 414 |  | 
 | 415 | 	return cl->qdisc; | 
 | 416 | } | 
 | 417 |  | 
 | 418 | static int qfq_dump_class(struct Qdisc *sch, unsigned long arg, | 
 | 419 | 			  struct sk_buff *skb, struct tcmsg *tcm) | 
 | 420 | { | 
 | 421 | 	struct qfq_class *cl = (struct qfq_class *)arg; | 
 | 422 | 	struct nlattr *nest; | 
 | 423 |  | 
 | 424 | 	tcm->tcm_parent	= TC_H_ROOT; | 
 | 425 | 	tcm->tcm_handle	= cl->common.classid; | 
 | 426 | 	tcm->tcm_info	= cl->qdisc->handle; | 
 | 427 |  | 
 | 428 | 	nest = nla_nest_start(skb, TCA_OPTIONS); | 
 | 429 | 	if (nest == NULL) | 
 | 430 | 		goto nla_put_failure; | 
 | 431 | 	NLA_PUT_U32(skb, TCA_QFQ_WEIGHT, ONE_FP/cl->inv_w); | 
 | 432 | 	NLA_PUT_U32(skb, TCA_QFQ_LMAX, cl->lmax); | 
 | 433 | 	return nla_nest_end(skb, nest); | 
 | 434 |  | 
 | 435 | nla_put_failure: | 
 | 436 | 	nla_nest_cancel(skb, nest); | 
 | 437 | 	return -EMSGSIZE; | 
 | 438 | } | 
 | 439 |  | 
 | 440 | static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg, | 
 | 441 | 				struct gnet_dump *d) | 
 | 442 | { | 
 | 443 | 	struct qfq_class *cl = (struct qfq_class *)arg; | 
 | 444 | 	struct tc_qfq_stats xstats; | 
 | 445 |  | 
 | 446 | 	memset(&xstats, 0, sizeof(xstats)); | 
 | 447 | 	cl->qdisc->qstats.qlen = cl->qdisc->q.qlen; | 
 | 448 |  | 
 | 449 | 	xstats.weight = ONE_FP/cl->inv_w; | 
 | 450 | 	xstats.lmax = cl->lmax; | 
 | 451 |  | 
 | 452 | 	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || | 
 | 453 | 	    gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || | 
 | 454 | 	    gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0) | 
 | 455 | 		return -1; | 
 | 456 |  | 
 | 457 | 	return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); | 
 | 458 | } | 
 | 459 |  | 
 | 460 | static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) | 
 | 461 | { | 
 | 462 | 	struct qfq_sched *q = qdisc_priv(sch); | 
 | 463 | 	struct qfq_class *cl; | 
 | 464 | 	struct hlist_node *n; | 
 | 465 | 	unsigned int i; | 
 | 466 |  | 
 | 467 | 	if (arg->stop) | 
 | 468 | 		return; | 
 | 469 |  | 
 | 470 | 	for (i = 0; i < q->clhash.hashsize; i++) { | 
 | 471 | 		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { | 
 | 472 | 			if (arg->count < arg->skip) { | 
 | 473 | 				arg->count++; | 
 | 474 | 				continue; | 
 | 475 | 			} | 
 | 476 | 			if (arg->fn(sch, (unsigned long)cl, arg) < 0) { | 
 | 477 | 				arg->stop = 1; | 
 | 478 | 				return; | 
 | 479 | 			} | 
 | 480 | 			arg->count++; | 
 | 481 | 		} | 
 | 482 | 	} | 
 | 483 | } | 
 | 484 |  | 
 | 485 | static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch, | 
 | 486 | 				      int *qerr) | 
 | 487 | { | 
 | 488 | 	struct qfq_sched *q = qdisc_priv(sch); | 
 | 489 | 	struct qfq_class *cl; | 
 | 490 | 	struct tcf_result res; | 
 | 491 | 	int result; | 
 | 492 |  | 
 | 493 | 	if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) { | 
 | 494 | 		pr_debug("qfq_classify: found %d\n", skb->priority); | 
 | 495 | 		cl = qfq_find_class(sch, skb->priority); | 
 | 496 | 		if (cl != NULL) | 
 | 497 | 			return cl; | 
 | 498 | 	} | 
 | 499 |  | 
 | 500 | 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; | 
 | 501 | 	result = tc_classify(skb, q->filter_list, &res); | 
 | 502 | 	if (result >= 0) { | 
 | 503 | #ifdef CONFIG_NET_CLS_ACT | 
 | 504 | 		switch (result) { | 
 | 505 | 		case TC_ACT_QUEUED: | 
 | 506 | 		case TC_ACT_STOLEN: | 
 | 507 | 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; | 
 | 508 | 		case TC_ACT_SHOT: | 
 | 509 | 			return NULL; | 
 | 510 | 		} | 
 | 511 | #endif | 
 | 512 | 		cl = (struct qfq_class *)res.class; | 
 | 513 | 		if (cl == NULL) | 
 | 514 | 			cl = qfq_find_class(sch, res.classid); | 
 | 515 | 		return cl; | 
 | 516 | 	} | 
 | 517 |  | 
 | 518 | 	return NULL; | 
 | 519 | } | 
 | 520 |  | 
 | 521 | /* Generic comparison function, handling wraparound. */ | 
 | 522 | static inline int qfq_gt(u64 a, u64 b) | 
 | 523 | { | 
 | 524 | 	return (s64)(a - b) > 0; | 
 | 525 | } | 
 | 526 |  | 
 | 527 | /* Round a precise timestamp to its slotted value. */ | 
 | 528 | static inline u64 qfq_round_down(u64 ts, unsigned int shift) | 
 | 529 | { | 
 | 530 | 	return ts & ~((1ULL << shift) - 1); | 
 | 531 | } | 
 | 532 |  | 
 | 533 | /* return the pointer to the group with lowest index in the bitmap */ | 
 | 534 | static inline struct qfq_group *qfq_ffs(struct qfq_sched *q, | 
 | 535 | 					unsigned long bitmap) | 
 | 536 | { | 
 | 537 | 	int index = __ffs(bitmap); | 
 | 538 | 	return &q->groups[index]; | 
 | 539 | } | 
 | 540 | /* Calculate a mask to mimic what would be ffs_from(). */ | 
 | 541 | static inline unsigned long mask_from(unsigned long bitmap, int from) | 
 | 542 | { | 
 | 543 | 	return bitmap & ~((1UL << from) - 1); | 
 | 544 | } | 
 | 545 |  | 
 | 546 | /* | 
 | 547 |  * The state computation relies on ER=0, IR=1, EB=2, IB=3 | 
 | 548 |  * First compute eligibility comparing grp->S, q->V, | 
 | 549 |  * then check if someone is blocking us and possibly add EB | 
 | 550 |  */ | 
 | 551 | static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp) | 
 | 552 | { | 
 | 553 | 	/* if S > V we are not eligible */ | 
 | 554 | 	unsigned int state = qfq_gt(grp->S, q->V); | 
 | 555 | 	unsigned long mask = mask_from(q->bitmaps[ER], grp->index); | 
 | 556 | 	struct qfq_group *next; | 
 | 557 |  | 
 | 558 | 	if (mask) { | 
 | 559 | 		next = qfq_ffs(q, mask); | 
 | 560 | 		if (qfq_gt(grp->F, next->F)) | 
 | 561 | 			state |= EB; | 
 | 562 | 	} | 
 | 563 |  | 
 | 564 | 	return state; | 
 | 565 | } | 
 | 566 |  | 
 | 567 |  | 
 | 568 | /* | 
 | 569 |  * In principle | 
 | 570 |  *	q->bitmaps[dst] |= q->bitmaps[src] & mask; | 
 | 571 |  *	q->bitmaps[src] &= ~mask; | 
 | 572 |  * but we should make sure that src != dst | 
 | 573 |  */ | 
 | 574 | static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask, | 
 | 575 | 				   int src, int dst) | 
 | 576 | { | 
 | 577 | 	q->bitmaps[dst] |= q->bitmaps[src] & mask; | 
 | 578 | 	q->bitmaps[src] &= ~mask; | 
 | 579 | } | 
 | 580 |  | 
 | 581 | static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F) | 
 | 582 | { | 
 | 583 | 	unsigned long mask = mask_from(q->bitmaps[ER], index + 1); | 
 | 584 | 	struct qfq_group *next; | 
 | 585 |  | 
 | 586 | 	if (mask) { | 
 | 587 | 		next = qfq_ffs(q, mask); | 
 | 588 | 		if (!qfq_gt(next->F, old_F)) | 
 | 589 | 			return; | 
 | 590 | 	} | 
 | 591 |  | 
 | 592 | 	mask = (1UL << index) - 1; | 
 | 593 | 	qfq_move_groups(q, mask, EB, ER); | 
 | 594 | 	qfq_move_groups(q, mask, IB, IR); | 
 | 595 | } | 
 | 596 |  | 
 | 597 | /* | 
 | 598 |  * perhaps | 
 | 599 |  * | 
 | 600 | 	old_V ^= q->V; | 
 | 601 | 	old_V >>= QFQ_MIN_SLOT_SHIFT; | 
 | 602 | 	if (old_V) { | 
 | 603 | 		... | 
 | 604 | 	} | 
 | 605 |  * | 
 | 606 |  */ | 
 | 607 | static void qfq_make_eligible(struct qfq_sched *q, u64 old_V) | 
 | 608 | { | 
 | 609 | 	unsigned long vslot = q->V >> QFQ_MIN_SLOT_SHIFT; | 
 | 610 | 	unsigned long old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT; | 
 | 611 |  | 
 | 612 | 	if (vslot != old_vslot) { | 
 | 613 | 		unsigned long mask = (1UL << fls(vslot ^ old_vslot)) - 1; | 
 | 614 | 		qfq_move_groups(q, mask, IR, ER); | 
 | 615 | 		qfq_move_groups(q, mask, IB, EB); | 
 | 616 | 	} | 
 | 617 | } | 
 | 618 |  | 
 | 619 |  | 
 | 620 | /* | 
 | 621 |  * XXX we should make sure that slot becomes less than 32. | 
 | 622 |  * This is guaranteed by the input values. | 
 | 623 |  * roundedS is always cl->S rounded on grp->slot_shift bits. | 
 | 624 |  */ | 
 | 625 | static void qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl, | 
 | 626 | 			    u64 roundedS) | 
 | 627 | { | 
 | 628 | 	u64 slot = (roundedS - grp->S) >> grp->slot_shift; | 
 | 629 | 	unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS; | 
 | 630 |  | 
 | 631 | 	hlist_add_head(&cl->next, &grp->slots[i]); | 
 | 632 | 	__set_bit(slot, &grp->full_slots); | 
 | 633 | } | 
 | 634 |  | 
 | 635 | /* Maybe introduce hlist_first_entry?? */ | 
 | 636 | static struct qfq_class *qfq_slot_head(struct qfq_group *grp) | 
 | 637 | { | 
 | 638 | 	return hlist_entry(grp->slots[grp->front].first, | 
 | 639 | 			   struct qfq_class, next); | 
 | 640 | } | 
 | 641 |  | 
 | 642 | /* | 
 | 643 |  * remove the entry from the slot | 
 | 644 |  */ | 
 | 645 | static void qfq_front_slot_remove(struct qfq_group *grp) | 
 | 646 | { | 
 | 647 | 	struct qfq_class *cl = qfq_slot_head(grp); | 
 | 648 |  | 
 | 649 | 	BUG_ON(!cl); | 
 | 650 | 	hlist_del(&cl->next); | 
 | 651 | 	if (hlist_empty(&grp->slots[grp->front])) | 
 | 652 | 		__clear_bit(0, &grp->full_slots); | 
 | 653 | } | 
 | 654 |  | 
 | 655 | /* | 
 | 656 |  * Returns the first full queue in a group. As a side effect, | 
 | 657 |  * adjust the bucket list so the first non-empty bucket is at | 
 | 658 |  * position 0 in full_slots. | 
 | 659 |  */ | 
 | 660 | static struct qfq_class *qfq_slot_scan(struct qfq_group *grp) | 
 | 661 | { | 
 | 662 | 	unsigned int i; | 
 | 663 |  | 
 | 664 | 	pr_debug("qfq slot_scan: grp %u full %#lx\n", | 
 | 665 | 		 grp->index, grp->full_slots); | 
 | 666 |  | 
 | 667 | 	if (grp->full_slots == 0) | 
 | 668 | 		return NULL; | 
 | 669 |  | 
 | 670 | 	i = __ffs(grp->full_slots);  /* zero based */ | 
 | 671 | 	if (i > 0) { | 
 | 672 | 		grp->front = (grp->front + i) % QFQ_MAX_SLOTS; | 
 | 673 | 		grp->full_slots >>= i; | 
 | 674 | 	} | 
 | 675 |  | 
 | 676 | 	return qfq_slot_head(grp); | 
 | 677 | } | 
 | 678 |  | 
 | 679 | /* | 
 | 680 |  * adjust the bucket list. When the start time of a group decreases, | 
 | 681 |  * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to | 
 | 682 |  * move the objects. The mask of occupied slots must be shifted | 
 | 683 |  * because we use ffs() to find the first non-empty slot. | 
 | 684 |  * This covers decreases in the group's start time, but what about | 
 | 685 |  * increases of the start time ? | 
 | 686 |  * Here too we should make sure that i is less than 32 | 
 | 687 |  */ | 
 | 688 | static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS) | 
 | 689 | { | 
 | 690 | 	unsigned int i = (grp->S - roundedS) >> grp->slot_shift; | 
 | 691 |  | 
 | 692 | 	grp->full_slots <<= i; | 
 | 693 | 	grp->front = (grp->front - i) % QFQ_MAX_SLOTS; | 
 | 694 | } | 
 | 695 |  | 
 | 696 | static void qfq_update_eligible(struct qfq_sched *q, u64 old_V) | 
 | 697 | { | 
 | 698 | 	struct qfq_group *grp; | 
 | 699 | 	unsigned long ineligible; | 
 | 700 |  | 
 | 701 | 	ineligible = q->bitmaps[IR] | q->bitmaps[IB]; | 
 | 702 | 	if (ineligible) { | 
 | 703 | 		if (!q->bitmaps[ER]) { | 
 | 704 | 			grp = qfq_ffs(q, ineligible); | 
 | 705 | 			if (qfq_gt(grp->S, q->V)) | 
 | 706 | 				q->V = grp->S; | 
 | 707 | 		} | 
 | 708 | 		qfq_make_eligible(q, old_V); | 
 | 709 | 	} | 
 | 710 | } | 
 | 711 |  | 
 | 712 | /* What is length of next packet in queue (0 if queue is empty) */ | 
 | 713 | static unsigned int qdisc_peek_len(struct Qdisc *sch) | 
 | 714 | { | 
 | 715 | 	struct sk_buff *skb; | 
 | 716 |  | 
 | 717 | 	skb = sch->ops->peek(sch); | 
 | 718 | 	return skb ? qdisc_pkt_len(skb) : 0; | 
 | 719 | } | 
 | 720 |  | 
 | 721 | /* | 
 | 722 |  * Updates the class, returns true if also the group needs to be updated. | 
 | 723 |  */ | 
 | 724 | static bool qfq_update_class(struct qfq_group *grp, struct qfq_class *cl) | 
 | 725 | { | 
 | 726 | 	unsigned int len = qdisc_peek_len(cl->qdisc); | 
 | 727 |  | 
 | 728 | 	cl->S = cl->F; | 
 | 729 | 	if (!len) | 
 | 730 | 		qfq_front_slot_remove(grp);	/* queue is empty */ | 
 | 731 | 	else { | 
 | 732 | 		u64 roundedS; | 
 | 733 |  | 
 | 734 | 		cl->F = cl->S + (u64)len * cl->inv_w; | 
 | 735 | 		roundedS = qfq_round_down(cl->S, grp->slot_shift); | 
 | 736 | 		if (roundedS == grp->S) | 
 | 737 | 			return false; | 
 | 738 |  | 
 | 739 | 		qfq_front_slot_remove(grp); | 
 | 740 | 		qfq_slot_insert(grp, cl, roundedS); | 
 | 741 | 	} | 
 | 742 |  | 
 | 743 | 	return true; | 
 | 744 | } | 
 | 745 |  | 
 | 746 | static struct sk_buff *qfq_dequeue(struct Qdisc *sch) | 
 | 747 | { | 
 | 748 | 	struct qfq_sched *q = qdisc_priv(sch); | 
 | 749 | 	struct qfq_group *grp; | 
 | 750 | 	struct qfq_class *cl; | 
 | 751 | 	struct sk_buff *skb; | 
 | 752 | 	unsigned int len; | 
 | 753 | 	u64 old_V; | 
 | 754 |  | 
 | 755 | 	if (!q->bitmaps[ER]) | 
 | 756 | 		return NULL; | 
 | 757 |  | 
 | 758 | 	grp = qfq_ffs(q, q->bitmaps[ER]); | 
 | 759 |  | 
 | 760 | 	cl = qfq_slot_head(grp); | 
 | 761 | 	skb = qdisc_dequeue_peeked(cl->qdisc); | 
 | 762 | 	if (!skb) { | 
 | 763 | 		WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n"); | 
 | 764 | 		return NULL; | 
 | 765 | 	} | 
 | 766 |  | 
 | 767 | 	sch->q.qlen--; | 
 | 768 | 	qdisc_bstats_update(sch, skb); | 
 | 769 |  | 
 | 770 | 	old_V = q->V; | 
 | 771 | 	len = qdisc_pkt_len(skb); | 
 | 772 | 	q->V += (u64)len * IWSUM; | 
 | 773 | 	pr_debug("qfq dequeue: len %u F %lld now %lld\n", | 
 | 774 | 		 len, (unsigned long long) cl->F, (unsigned long long) q->V); | 
 | 775 |  | 
 | 776 | 	if (qfq_update_class(grp, cl)) { | 
 | 777 | 		u64 old_F = grp->F; | 
 | 778 |  | 
 | 779 | 		cl = qfq_slot_scan(grp); | 
 | 780 | 		if (!cl) | 
 | 781 | 			__clear_bit(grp->index, &q->bitmaps[ER]); | 
 | 782 | 		else { | 
 | 783 | 			u64 roundedS = qfq_round_down(cl->S, grp->slot_shift); | 
 | 784 | 			unsigned int s; | 
 | 785 |  | 
 | 786 | 			if (grp->S == roundedS) | 
 | 787 | 				goto skip_unblock; | 
 | 788 | 			grp->S = roundedS; | 
 | 789 | 			grp->F = roundedS + (2ULL << grp->slot_shift); | 
 | 790 | 			__clear_bit(grp->index, &q->bitmaps[ER]); | 
 | 791 | 			s = qfq_calc_state(q, grp); | 
 | 792 | 			__set_bit(grp->index, &q->bitmaps[s]); | 
 | 793 | 		} | 
 | 794 |  | 
 | 795 | 		qfq_unblock_groups(q, grp->index, old_F); | 
 | 796 | 	} | 
 | 797 |  | 
 | 798 | skip_unblock: | 
 | 799 | 	qfq_update_eligible(q, old_V); | 
 | 800 |  | 
 | 801 | 	return skb; | 
 | 802 | } | 
 | 803 |  | 
 | 804 | /* | 
 | 805 |  * Assign a reasonable start time for a new flow k in group i. | 
 | 806 |  * Admissible values for \hat(F) are multiples of \sigma_i | 
 | 807 |  * no greater than V+\sigma_i . Larger values mean that | 
 | 808 |  * we had a wraparound so we consider the timestamp to be stale. | 
 | 809 |  * | 
 | 810 |  * If F is not stale and F >= V then we set S = F. | 
 | 811 |  * Otherwise we should assign S = V, but this may violate | 
 | 812 |  * the ordering in ER. So, if we have groups in ER, set S to | 
 | 813 |  * the F_j of the first group j which would be blocking us. | 
 | 814 |  * We are guaranteed not to move S backward because | 
 | 815 |  * otherwise our group i would still be blocked. | 
 | 816 |  */ | 
 | 817 | static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl) | 
 | 818 | { | 
 | 819 | 	unsigned long mask; | 
 | 820 | 	uint32_t limit, roundedF; | 
 | 821 | 	int slot_shift = cl->grp->slot_shift; | 
 | 822 |  | 
 | 823 | 	roundedF = qfq_round_down(cl->F, slot_shift); | 
 | 824 | 	limit = qfq_round_down(q->V, slot_shift) + (1UL << slot_shift); | 
 | 825 |  | 
 | 826 | 	if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) { | 
 | 827 | 		/* timestamp was stale */ | 
 | 828 | 		mask = mask_from(q->bitmaps[ER], cl->grp->index); | 
 | 829 | 		if (mask) { | 
 | 830 | 			struct qfq_group *next = qfq_ffs(q, mask); | 
 | 831 | 			if (qfq_gt(roundedF, next->F)) { | 
 | 832 | 				cl->S = next->F; | 
 | 833 | 				return; | 
 | 834 | 			} | 
 | 835 | 		} | 
 | 836 | 		cl->S = q->V; | 
 | 837 | 	} else  /* timestamp is not stale */ | 
 | 838 | 		cl->S = cl->F; | 
 | 839 | } | 
 | 840 |  | 
 | 841 | static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) | 
 | 842 | { | 
 | 843 | 	struct qfq_sched *q = qdisc_priv(sch); | 
 | 844 | 	struct qfq_group *grp; | 
 | 845 | 	struct qfq_class *cl; | 
 | 846 | 	int err; | 
 | 847 | 	u64 roundedS; | 
 | 848 | 	int s; | 
 | 849 |  | 
 | 850 | 	cl = qfq_classify(skb, sch, &err); | 
 | 851 | 	if (cl == NULL) { | 
 | 852 | 		if (err & __NET_XMIT_BYPASS) | 
 | 853 | 			sch->qstats.drops++; | 
 | 854 | 		kfree_skb(skb); | 
 | 855 | 		return err; | 
 | 856 | 	} | 
 | 857 | 	pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid); | 
 | 858 |  | 
 | 859 | 	err = qdisc_enqueue(skb, cl->qdisc); | 
 | 860 | 	if (unlikely(err != NET_XMIT_SUCCESS)) { | 
 | 861 | 		pr_debug("qfq_enqueue: enqueue failed %d\n", err); | 
 | 862 | 		if (net_xmit_drop_count(err)) { | 
 | 863 | 			cl->qstats.drops++; | 
 | 864 | 			sch->qstats.drops++; | 
 | 865 | 		} | 
 | 866 | 		return err; | 
 | 867 | 	} | 
 | 868 |  | 
 | 869 | 	bstats_update(&cl->bstats, skb); | 
 | 870 | 	++sch->q.qlen; | 
 | 871 |  | 
 | 872 | 	/* If the new skb is not the head of queue, then done here. */ | 
 | 873 | 	if (cl->qdisc->q.qlen != 1) | 
 | 874 | 		return err; | 
 | 875 |  | 
 | 876 | 	/* If reach this point, queue q was idle */ | 
 | 877 | 	grp = cl->grp; | 
 | 878 | 	qfq_update_start(q, cl); | 
 | 879 |  | 
 | 880 | 	/* compute new finish time and rounded start. */ | 
 | 881 | 	cl->F = cl->S + (u64)qdisc_pkt_len(skb) * cl->inv_w; | 
 | 882 | 	roundedS = qfq_round_down(cl->S, grp->slot_shift); | 
 | 883 |  | 
 | 884 | 	/* | 
 | 885 | 	 * insert cl in the correct bucket. | 
 | 886 | 	 * If cl->S >= grp->S we don't need to adjust the | 
 | 887 | 	 * bucket list and simply go to the insertion phase. | 
 | 888 | 	 * Otherwise grp->S is decreasing, we must make room | 
 | 889 | 	 * in the bucket list, and also recompute the group state. | 
 | 890 | 	 * Finally, if there were no flows in this group and nobody | 
 | 891 | 	 * was in ER make sure to adjust V. | 
 | 892 | 	 */ | 
 | 893 | 	if (grp->full_slots) { | 
 | 894 | 		if (!qfq_gt(grp->S, cl->S)) | 
 | 895 | 			goto skip_update; | 
 | 896 |  | 
 | 897 | 		/* create a slot for this cl->S */ | 
 | 898 | 		qfq_slot_rotate(grp, roundedS); | 
 | 899 | 		/* group was surely ineligible, remove */ | 
 | 900 | 		__clear_bit(grp->index, &q->bitmaps[IR]); | 
 | 901 | 		__clear_bit(grp->index, &q->bitmaps[IB]); | 
 | 902 | 	} else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V)) | 
 | 903 | 		q->V = roundedS; | 
 | 904 |  | 
 | 905 | 	grp->S = roundedS; | 
 | 906 | 	grp->F = roundedS + (2ULL << grp->slot_shift); | 
 | 907 | 	s = qfq_calc_state(q, grp); | 
 | 908 | 	__set_bit(grp->index, &q->bitmaps[s]); | 
 | 909 |  | 
 | 910 | 	pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n", | 
 | 911 | 		 s, q->bitmaps[s], | 
 | 912 | 		 (unsigned long long) cl->S, | 
 | 913 | 		 (unsigned long long) cl->F, | 
 | 914 | 		 (unsigned long long) q->V); | 
 | 915 |  | 
 | 916 | skip_update: | 
 | 917 | 	qfq_slot_insert(grp, cl, roundedS); | 
 | 918 |  | 
 | 919 | 	return err; | 
 | 920 | } | 
 | 921 |  | 
 | 922 |  | 
 | 923 | static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp, | 
 | 924 | 			    struct qfq_class *cl) | 
 | 925 | { | 
 | 926 | 	unsigned int i, offset; | 
 | 927 | 	u64 roundedS; | 
 | 928 |  | 
 | 929 | 	roundedS = qfq_round_down(cl->S, grp->slot_shift); | 
 | 930 | 	offset = (roundedS - grp->S) >> grp->slot_shift; | 
 | 931 | 	i = (grp->front + offset) % QFQ_MAX_SLOTS; | 
 | 932 |  | 
 | 933 | 	hlist_del(&cl->next); | 
 | 934 | 	if (hlist_empty(&grp->slots[i])) | 
 | 935 | 		__clear_bit(offset, &grp->full_slots); | 
 | 936 | } | 
 | 937 |  | 
 | 938 | /* | 
 | 939 |  * called to forcibly destroy a queue. | 
 | 940 |  * If the queue is not in the front bucket, or if it has | 
 | 941 |  * other queues in the front bucket, we can simply remove | 
 | 942 |  * the queue with no other side effects. | 
 | 943 |  * Otherwise we must propagate the event up. | 
 | 944 |  */ | 
 | 945 | static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl) | 
 | 946 | { | 
 | 947 | 	struct qfq_group *grp = cl->grp; | 
 | 948 | 	unsigned long mask; | 
 | 949 | 	u64 roundedS; | 
 | 950 | 	int s; | 
 | 951 |  | 
 | 952 | 	cl->F = cl->S; | 
 | 953 | 	qfq_slot_remove(q, grp, cl); | 
 | 954 |  | 
 | 955 | 	if (!grp->full_slots) { | 
 | 956 | 		__clear_bit(grp->index, &q->bitmaps[IR]); | 
 | 957 | 		__clear_bit(grp->index, &q->bitmaps[EB]); | 
 | 958 | 		__clear_bit(grp->index, &q->bitmaps[IB]); | 
 | 959 |  | 
 | 960 | 		if (test_bit(grp->index, &q->bitmaps[ER]) && | 
 | 961 | 		    !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) { | 
 | 962 | 			mask = q->bitmaps[ER] & ((1UL << grp->index) - 1); | 
 | 963 | 			if (mask) | 
 | 964 | 				mask = ~((1UL << __fls(mask)) - 1); | 
 | 965 | 			else | 
 | 966 | 				mask = ~0UL; | 
 | 967 | 			qfq_move_groups(q, mask, EB, ER); | 
 | 968 | 			qfq_move_groups(q, mask, IB, IR); | 
 | 969 | 		} | 
 | 970 | 		__clear_bit(grp->index, &q->bitmaps[ER]); | 
 | 971 | 	} else if (hlist_empty(&grp->slots[grp->front])) { | 
 | 972 | 		cl = qfq_slot_scan(grp); | 
 | 973 | 		roundedS = qfq_round_down(cl->S, grp->slot_shift); | 
 | 974 | 		if (grp->S != roundedS) { | 
 | 975 | 			__clear_bit(grp->index, &q->bitmaps[ER]); | 
 | 976 | 			__clear_bit(grp->index, &q->bitmaps[IR]); | 
 | 977 | 			__clear_bit(grp->index, &q->bitmaps[EB]); | 
 | 978 | 			__clear_bit(grp->index, &q->bitmaps[IB]); | 
 | 979 | 			grp->S = roundedS; | 
 | 980 | 			grp->F = roundedS + (2ULL << grp->slot_shift); | 
 | 981 | 			s = qfq_calc_state(q, grp); | 
 | 982 | 			__set_bit(grp->index, &q->bitmaps[s]); | 
 | 983 | 		} | 
 | 984 | 	} | 
 | 985 |  | 
 | 986 | 	qfq_update_eligible(q, q->V); | 
 | 987 | } | 
 | 988 |  | 
 | 989 | static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg) | 
 | 990 | { | 
 | 991 | 	struct qfq_sched *q = qdisc_priv(sch); | 
 | 992 | 	struct qfq_class *cl = (struct qfq_class *)arg; | 
 | 993 |  | 
 | 994 | 	if (cl->qdisc->q.qlen == 0) | 
 | 995 | 		qfq_deactivate_class(q, cl); | 
 | 996 | } | 
 | 997 |  | 
 | 998 | static unsigned int qfq_drop(struct Qdisc *sch) | 
 | 999 | { | 
 | 1000 | 	struct qfq_sched *q = qdisc_priv(sch); | 
 | 1001 | 	struct qfq_group *grp; | 
 | 1002 | 	unsigned int i, j, len; | 
 | 1003 |  | 
 | 1004 | 	for (i = 0; i <= QFQ_MAX_INDEX; i++) { | 
 | 1005 | 		grp = &q->groups[i]; | 
 | 1006 | 		for (j = 0; j < QFQ_MAX_SLOTS; j++) { | 
 | 1007 | 			struct qfq_class *cl; | 
 | 1008 | 			struct hlist_node *n; | 
 | 1009 |  | 
 | 1010 | 			hlist_for_each_entry(cl, n, &grp->slots[j], next) { | 
 | 1011 |  | 
 | 1012 | 				if (!cl->qdisc->ops->drop) | 
 | 1013 | 					continue; | 
 | 1014 |  | 
 | 1015 | 				len = cl->qdisc->ops->drop(cl->qdisc); | 
 | 1016 | 				if (len > 0) { | 
 | 1017 | 					sch->q.qlen--; | 
 | 1018 | 					if (!cl->qdisc->q.qlen) | 
 | 1019 | 						qfq_deactivate_class(q, cl); | 
 | 1020 |  | 
 | 1021 | 					return len; | 
 | 1022 | 				} | 
 | 1023 | 			} | 
 | 1024 | 		} | 
 | 1025 | 	} | 
 | 1026 |  | 
 | 1027 | 	return 0; | 
 | 1028 | } | 
 | 1029 |  | 
 | 1030 | static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt) | 
 | 1031 | { | 
 | 1032 | 	struct qfq_sched *q = qdisc_priv(sch); | 
 | 1033 | 	struct qfq_group *grp; | 
 | 1034 | 	int i, j, err; | 
 | 1035 |  | 
 | 1036 | 	err = qdisc_class_hash_init(&q->clhash); | 
 | 1037 | 	if (err < 0) | 
 | 1038 | 		return err; | 
 | 1039 |  | 
 | 1040 | 	for (i = 0; i <= QFQ_MAX_INDEX; i++) { | 
 | 1041 | 		grp = &q->groups[i]; | 
 | 1042 | 		grp->index = i; | 
 | 1043 | 		grp->slot_shift = QFQ_MTU_SHIFT + FRAC_BITS | 
 | 1044 | 				   - (QFQ_MAX_INDEX - i); | 
 | 1045 | 		for (j = 0; j < QFQ_MAX_SLOTS; j++) | 
 | 1046 | 			INIT_HLIST_HEAD(&grp->slots[j]); | 
 | 1047 | 	} | 
 | 1048 |  | 
 | 1049 | 	return 0; | 
 | 1050 | } | 
 | 1051 |  | 
 | 1052 | static void qfq_reset_qdisc(struct Qdisc *sch) | 
 | 1053 | { | 
 | 1054 | 	struct qfq_sched *q = qdisc_priv(sch); | 
 | 1055 | 	struct qfq_group *grp; | 
 | 1056 | 	struct qfq_class *cl; | 
 | 1057 | 	struct hlist_node *n, *tmp; | 
 | 1058 | 	unsigned int i, j; | 
 | 1059 |  | 
 | 1060 | 	for (i = 0; i <= QFQ_MAX_INDEX; i++) { | 
 | 1061 | 		grp = &q->groups[i]; | 
 | 1062 | 		for (j = 0; j < QFQ_MAX_SLOTS; j++) { | 
 | 1063 | 			hlist_for_each_entry_safe(cl, n, tmp, | 
 | 1064 | 						  &grp->slots[j], next) { | 
 | 1065 | 				qfq_deactivate_class(q, cl); | 
 | 1066 | 			} | 
 | 1067 | 		} | 
 | 1068 | 	} | 
 | 1069 |  | 
 | 1070 | 	for (i = 0; i < q->clhash.hashsize; i++) { | 
 | 1071 | 		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) | 
 | 1072 | 			qdisc_reset(cl->qdisc); | 
 | 1073 | 	} | 
 | 1074 | 	sch->q.qlen = 0; | 
 | 1075 | } | 
 | 1076 |  | 
 | 1077 | static void qfq_destroy_qdisc(struct Qdisc *sch) | 
 | 1078 | { | 
 | 1079 | 	struct qfq_sched *q = qdisc_priv(sch); | 
 | 1080 | 	struct qfq_class *cl; | 
 | 1081 | 	struct hlist_node *n, *next; | 
 | 1082 | 	unsigned int i; | 
 | 1083 |  | 
 | 1084 | 	tcf_destroy_chain(&q->filter_list); | 
 | 1085 |  | 
 | 1086 | 	for (i = 0; i < q->clhash.hashsize; i++) { | 
 | 1087 | 		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], | 
 | 1088 | 					  common.hnode) { | 
 | 1089 | 			qfq_destroy_class(sch, cl); | 
 | 1090 | 		} | 
 | 1091 | 	} | 
 | 1092 | 	qdisc_class_hash_destroy(&q->clhash); | 
 | 1093 | } | 
 | 1094 |  | 
 | 1095 | static const struct Qdisc_class_ops qfq_class_ops = { | 
 | 1096 | 	.change		= qfq_change_class, | 
 | 1097 | 	.delete		= qfq_delete_class, | 
 | 1098 | 	.get		= qfq_get_class, | 
 | 1099 | 	.put		= qfq_put_class, | 
 | 1100 | 	.tcf_chain	= qfq_tcf_chain, | 
 | 1101 | 	.bind_tcf	= qfq_bind_tcf, | 
 | 1102 | 	.unbind_tcf	= qfq_unbind_tcf, | 
 | 1103 | 	.graft		= qfq_graft_class, | 
 | 1104 | 	.leaf		= qfq_class_leaf, | 
 | 1105 | 	.qlen_notify	= qfq_qlen_notify, | 
 | 1106 | 	.dump		= qfq_dump_class, | 
 | 1107 | 	.dump_stats	= qfq_dump_class_stats, | 
 | 1108 | 	.walk		= qfq_walk, | 
 | 1109 | }; | 
 | 1110 |  | 
 | 1111 | static struct Qdisc_ops qfq_qdisc_ops __read_mostly = { | 
 | 1112 | 	.cl_ops		= &qfq_class_ops, | 
 | 1113 | 	.id		= "qfq", | 
 | 1114 | 	.priv_size	= sizeof(struct qfq_sched), | 
 | 1115 | 	.enqueue	= qfq_enqueue, | 
 | 1116 | 	.dequeue	= qfq_dequeue, | 
 | 1117 | 	.peek		= qdisc_peek_dequeued, | 
 | 1118 | 	.drop		= qfq_drop, | 
 | 1119 | 	.init		= qfq_init_qdisc, | 
 | 1120 | 	.reset		= qfq_reset_qdisc, | 
 | 1121 | 	.destroy	= qfq_destroy_qdisc, | 
 | 1122 | 	.owner		= THIS_MODULE, | 
 | 1123 | }; | 
 | 1124 |  | 
 | 1125 | static int __init qfq_init(void) | 
 | 1126 | { | 
 | 1127 | 	return register_qdisc(&qfq_qdisc_ops); | 
 | 1128 | } | 
 | 1129 |  | 
 | 1130 | static void __exit qfq_exit(void) | 
 | 1131 | { | 
 | 1132 | 	unregister_qdisc(&qfq_qdisc_ops); | 
 | 1133 | } | 
 | 1134 |  | 
 | 1135 | module_init(qfq_init); | 
 | 1136 | module_exit(qfq_exit); | 
 | 1137 | MODULE_LICENSE("GPL"); |