| John Fastabend | b8970f0 | 2011-01-17 08:06:09 +0000 | [diff] [blame] | 1 | /* | 
|  | 2 | * net/sched/sch_mqprio.c | 
|  | 3 | * | 
|  | 4 | * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com> | 
|  | 5 | * | 
|  | 6 | * This program is free software; you can redistribute it and/or | 
|  | 7 | * modify it under the terms of the GNU General Public License | 
|  | 8 | * version 2 as published by the Free Software Foundation. | 
|  | 9 | */ | 
|  | 10 |  | 
|  | 11 | #include <linux/types.h> | 
|  | 12 | #include <linux/slab.h> | 
|  | 13 | #include <linux/kernel.h> | 
|  | 14 | #include <linux/string.h> | 
|  | 15 | #include <linux/errno.h> | 
|  | 16 | #include <linux/skbuff.h> | 
|  | 17 | #include <net/netlink.h> | 
|  | 18 | #include <net/pkt_sched.h> | 
|  | 19 | #include <net/sch_generic.h> | 
|  | 20 |  | 
|  | 21 | struct mqprio_sched { | 
|  | 22 | struct Qdisc		**qdiscs; | 
|  | 23 | int hw_owned; | 
|  | 24 | }; | 
|  | 25 |  | 
|  | 26 | static void mqprio_destroy(struct Qdisc *sch) | 
|  | 27 | { | 
|  | 28 | struct net_device *dev = qdisc_dev(sch); | 
|  | 29 | struct mqprio_sched *priv = qdisc_priv(sch); | 
|  | 30 | unsigned int ntx; | 
|  | 31 |  | 
| Ben Hutchings | ac7100b | 2011-02-14 19:02:23 +0000 | [diff] [blame] | 32 | if (priv->qdiscs) { | 
|  | 33 | for (ntx = 0; | 
|  | 34 | ntx < dev->num_tx_queues && priv->qdiscs[ntx]; | 
|  | 35 | ntx++) | 
|  | 36 | qdisc_destroy(priv->qdiscs[ntx]); | 
|  | 37 | kfree(priv->qdiscs); | 
|  | 38 | } | 
| John Fastabend | b8970f0 | 2011-01-17 08:06:09 +0000 | [diff] [blame] | 39 |  | 
|  | 40 | if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc) | 
|  | 41 | dev->netdev_ops->ndo_setup_tc(dev, 0); | 
|  | 42 | else | 
|  | 43 | netdev_set_num_tc(dev, 0); | 
| John Fastabend | b8970f0 | 2011-01-17 08:06:09 +0000 | [diff] [blame] | 44 | } | 
|  | 45 |  | 
|  | 46 | static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt) | 
|  | 47 | { | 
|  | 48 | int i, j; | 
|  | 49 |  | 
|  | 50 | /* Verify num_tc is not out of max range */ | 
|  | 51 | if (qopt->num_tc > TC_MAX_QUEUE) | 
|  | 52 | return -EINVAL; | 
|  | 53 |  | 
|  | 54 | /* Verify priority mapping uses valid tcs */ | 
|  | 55 | for (i = 0; i < TC_BITMASK + 1; i++) { | 
|  | 56 | if (qopt->prio_tc_map[i] >= qopt->num_tc) | 
|  | 57 | return -EINVAL; | 
|  | 58 | } | 
|  | 59 |  | 
|  | 60 | /* net_device does not support requested operation */ | 
|  | 61 | if (qopt->hw && !dev->netdev_ops->ndo_setup_tc) | 
|  | 62 | return -EINVAL; | 
|  | 63 |  | 
|  | 64 | /* if hw owned qcount and qoffset are taken from LLD so | 
|  | 65 | * no reason to verify them here | 
|  | 66 | */ | 
|  | 67 | if (qopt->hw) | 
|  | 68 | return 0; | 
|  | 69 |  | 
|  | 70 | for (i = 0; i < qopt->num_tc; i++) { | 
|  | 71 | unsigned int last = qopt->offset[i] + qopt->count[i]; | 
|  | 72 |  | 
|  | 73 | /* Verify the queue count is in tx range being equal to the | 
|  | 74 | * real_num_tx_queues indicates the last queue is in use. | 
|  | 75 | */ | 
|  | 76 | if (qopt->offset[i] >= dev->real_num_tx_queues || | 
|  | 77 | !qopt->count[i] || | 
|  | 78 | last > dev->real_num_tx_queues) | 
|  | 79 | return -EINVAL; | 
|  | 80 |  | 
|  | 81 | /* Verify that the offset and counts do not overlap */ | 
|  | 82 | for (j = i + 1; j < qopt->num_tc; j++) { | 
|  | 83 | if (last > qopt->offset[j]) | 
|  | 84 | return -EINVAL; | 
|  | 85 | } | 
|  | 86 | } | 
|  | 87 |  | 
|  | 88 | return 0; | 
|  | 89 | } | 
|  | 90 |  | 
|  | 91 | static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) | 
|  | 92 | { | 
|  | 93 | struct net_device *dev = qdisc_dev(sch); | 
|  | 94 | struct mqprio_sched *priv = qdisc_priv(sch); | 
|  | 95 | struct netdev_queue *dev_queue; | 
|  | 96 | struct Qdisc *qdisc; | 
|  | 97 | int i, err = -EOPNOTSUPP; | 
|  | 98 | struct tc_mqprio_qopt *qopt = NULL; | 
|  | 99 |  | 
|  | 100 | BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE); | 
|  | 101 | BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK); | 
|  | 102 |  | 
|  | 103 | if (sch->parent != TC_H_ROOT) | 
|  | 104 | return -EOPNOTSUPP; | 
|  | 105 |  | 
|  | 106 | if (!netif_is_multiqueue(dev)) | 
|  | 107 | return -EOPNOTSUPP; | 
|  | 108 |  | 
|  | 109 | if (nla_len(opt) < sizeof(*qopt)) | 
|  | 110 | return -EINVAL; | 
|  | 111 |  | 
|  | 112 | qopt = nla_data(opt); | 
|  | 113 | if (mqprio_parse_opt(dev, qopt)) | 
|  | 114 | return -EINVAL; | 
|  | 115 |  | 
|  | 116 | /* pre-allocate qdisc, attachment can't fail */ | 
|  | 117 | priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), | 
|  | 118 | GFP_KERNEL); | 
|  | 119 | if (priv->qdiscs == NULL) { | 
|  | 120 | err = -ENOMEM; | 
|  | 121 | goto err; | 
|  | 122 | } | 
|  | 123 |  | 
|  | 124 | for (i = 0; i < dev->num_tx_queues; i++) { | 
|  | 125 | dev_queue = netdev_get_tx_queue(dev, i); | 
|  | 126 | qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops, | 
|  | 127 | TC_H_MAKE(TC_H_MAJ(sch->handle), | 
|  | 128 | TC_H_MIN(i + 1))); | 
|  | 129 | if (qdisc == NULL) { | 
|  | 130 | err = -ENOMEM; | 
|  | 131 | goto err; | 
|  | 132 | } | 
| John Fastabend | b8970f0 | 2011-01-17 08:06:09 +0000 | [diff] [blame] | 133 | priv->qdiscs[i] = qdisc; | 
|  | 134 | } | 
|  | 135 |  | 
|  | 136 | /* If the mqprio options indicate that hardware should own | 
|  | 137 | * the queue mapping then run ndo_setup_tc otherwise use the | 
|  | 138 | * supplied and verified mapping | 
|  | 139 | */ | 
|  | 140 | if (qopt->hw) { | 
|  | 141 | priv->hw_owned = 1; | 
|  | 142 | err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc); | 
|  | 143 | if (err) | 
|  | 144 | goto err; | 
|  | 145 | } else { | 
|  | 146 | netdev_set_num_tc(dev, qopt->num_tc); | 
|  | 147 | for (i = 0; i < qopt->num_tc; i++) | 
|  | 148 | netdev_set_tc_queue(dev, i, | 
|  | 149 | qopt->count[i], qopt->offset[i]); | 
|  | 150 | } | 
|  | 151 |  | 
|  | 152 | /* Always use supplied priority mappings */ | 
|  | 153 | for (i = 0; i < TC_BITMASK + 1; i++) | 
|  | 154 | netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]); | 
|  | 155 |  | 
|  | 156 | sch->flags |= TCQ_F_MQROOT; | 
|  | 157 | return 0; | 
|  | 158 |  | 
|  | 159 | err: | 
|  | 160 | mqprio_destroy(sch); | 
|  | 161 | return err; | 
|  | 162 | } | 
|  | 163 |  | 
|  | 164 | static void mqprio_attach(struct Qdisc *sch) | 
|  | 165 | { | 
|  | 166 | struct net_device *dev = qdisc_dev(sch); | 
|  | 167 | struct mqprio_sched *priv = qdisc_priv(sch); | 
|  | 168 | struct Qdisc *qdisc; | 
|  | 169 | unsigned int ntx; | 
|  | 170 |  | 
|  | 171 | /* Attach underlying qdisc */ | 
|  | 172 | for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { | 
|  | 173 | qdisc = priv->qdiscs[ntx]; | 
|  | 174 | qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc); | 
|  | 175 | if (qdisc) | 
|  | 176 | qdisc_destroy(qdisc); | 
|  | 177 | } | 
|  | 178 | kfree(priv->qdiscs); | 
|  | 179 | priv->qdiscs = NULL; | 
|  | 180 | } | 
|  | 181 |  | 
|  | 182 | static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch, | 
|  | 183 | unsigned long cl) | 
|  | 184 | { | 
|  | 185 | struct net_device *dev = qdisc_dev(sch); | 
|  | 186 | unsigned long ntx = cl - 1 - netdev_get_num_tc(dev); | 
|  | 187 |  | 
|  | 188 | if (ntx >= dev->num_tx_queues) | 
|  | 189 | return NULL; | 
|  | 190 | return netdev_get_tx_queue(dev, ntx); | 
|  | 191 | } | 
|  | 192 |  | 
|  | 193 | static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, | 
|  | 194 | struct Qdisc **old) | 
|  | 195 | { | 
|  | 196 | struct net_device *dev = qdisc_dev(sch); | 
|  | 197 | struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); | 
|  | 198 |  | 
|  | 199 | if (!dev_queue) | 
|  | 200 | return -EINVAL; | 
|  | 201 |  | 
|  | 202 | if (dev->flags & IFF_UP) | 
|  | 203 | dev_deactivate(dev); | 
|  | 204 |  | 
|  | 205 | *old = dev_graft_qdisc(dev_queue, new); | 
|  | 206 |  | 
|  | 207 | if (dev->flags & IFF_UP) | 
|  | 208 | dev_activate(dev); | 
|  | 209 |  | 
|  | 210 | return 0; | 
|  | 211 | } | 
|  | 212 |  | 
|  | 213 | static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) | 
|  | 214 | { | 
|  | 215 | struct net_device *dev = qdisc_dev(sch); | 
|  | 216 | struct mqprio_sched *priv = qdisc_priv(sch); | 
|  | 217 | unsigned char *b = skb_tail_pointer(skb); | 
| Eric Dumazet | 144ce87 | 2011-01-26 07:21:57 +0000 | [diff] [blame] | 218 | struct tc_mqprio_qopt opt = { 0 }; | 
| John Fastabend | b8970f0 | 2011-01-17 08:06:09 +0000 | [diff] [blame] | 219 | struct Qdisc *qdisc; | 
|  | 220 | unsigned int i; | 
|  | 221 |  | 
|  | 222 | sch->q.qlen = 0; | 
|  | 223 | memset(&sch->bstats, 0, sizeof(sch->bstats)); | 
|  | 224 | memset(&sch->qstats, 0, sizeof(sch->qstats)); | 
|  | 225 |  | 
|  | 226 | for (i = 0; i < dev->num_tx_queues; i++) { | 
|  | 227 | qdisc = netdev_get_tx_queue(dev, i)->qdisc; | 
|  | 228 | spin_lock_bh(qdisc_lock(qdisc)); | 
|  | 229 | sch->q.qlen		+= qdisc->q.qlen; | 
|  | 230 | sch->bstats.bytes	+= qdisc->bstats.bytes; | 
|  | 231 | sch->bstats.packets	+= qdisc->bstats.packets; | 
|  | 232 | sch->qstats.qlen	+= qdisc->qstats.qlen; | 
|  | 233 | sch->qstats.backlog	+= qdisc->qstats.backlog; | 
|  | 234 | sch->qstats.drops	+= qdisc->qstats.drops; | 
|  | 235 | sch->qstats.requeues	+= qdisc->qstats.requeues; | 
|  | 236 | sch->qstats.overlimits	+= qdisc->qstats.overlimits; | 
|  | 237 | spin_unlock_bh(qdisc_lock(qdisc)); | 
|  | 238 | } | 
|  | 239 |  | 
|  | 240 | opt.num_tc = netdev_get_num_tc(dev); | 
|  | 241 | memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); | 
|  | 242 | opt.hw = priv->hw_owned; | 
|  | 243 |  | 
|  | 244 | for (i = 0; i < netdev_get_num_tc(dev); i++) { | 
|  | 245 | opt.count[i] = dev->tc_to_txq[i].count; | 
|  | 246 | opt.offset[i] = dev->tc_to_txq[i].offset; | 
|  | 247 | } | 
|  | 248 |  | 
|  | 249 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | 
|  | 250 |  | 
|  | 251 | return skb->len; | 
|  | 252 | nla_put_failure: | 
|  | 253 | nlmsg_trim(skb, b); | 
|  | 254 | return -1; | 
|  | 255 | } | 
|  | 256 |  | 
|  | 257 | static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl) | 
|  | 258 | { | 
|  | 259 | struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); | 
|  | 260 |  | 
|  | 261 | if (!dev_queue) | 
|  | 262 | return NULL; | 
|  | 263 |  | 
|  | 264 | return dev_queue->qdisc_sleeping; | 
|  | 265 | } | 
|  | 266 |  | 
|  | 267 | static unsigned long mqprio_get(struct Qdisc *sch, u32 classid) | 
|  | 268 | { | 
|  | 269 | struct net_device *dev = qdisc_dev(sch); | 
|  | 270 | unsigned int ntx = TC_H_MIN(classid); | 
|  | 271 |  | 
|  | 272 | if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev)) | 
|  | 273 | return 0; | 
|  | 274 | return ntx; | 
|  | 275 | } | 
|  | 276 |  | 
|  | 277 | static void mqprio_put(struct Qdisc *sch, unsigned long cl) | 
|  | 278 | { | 
|  | 279 | } | 
|  | 280 |  | 
|  | 281 | static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl, | 
|  | 282 | struct sk_buff *skb, struct tcmsg *tcm) | 
|  | 283 | { | 
|  | 284 | struct net_device *dev = qdisc_dev(sch); | 
|  | 285 |  | 
|  | 286 | if (cl <= netdev_get_num_tc(dev)) { | 
|  | 287 | tcm->tcm_parent = TC_H_ROOT; | 
|  | 288 | tcm->tcm_info = 0; | 
|  | 289 | } else { | 
|  | 290 | int i; | 
|  | 291 | struct netdev_queue *dev_queue; | 
|  | 292 |  | 
|  | 293 | dev_queue = mqprio_queue_get(sch, cl); | 
|  | 294 | tcm->tcm_parent = 0; | 
|  | 295 | for (i = 0; i < netdev_get_num_tc(dev); i++) { | 
|  | 296 | struct netdev_tc_txq tc = dev->tc_to_txq[i]; | 
|  | 297 | int q_idx = cl - netdev_get_num_tc(dev); | 
|  | 298 |  | 
|  | 299 | if (q_idx > tc.offset && | 
|  | 300 | q_idx <= tc.offset + tc.count) { | 
|  | 301 | tcm->tcm_parent = | 
|  | 302 | TC_H_MAKE(TC_H_MAJ(sch->handle), | 
|  | 303 | TC_H_MIN(i + 1)); | 
|  | 304 | break; | 
|  | 305 | } | 
|  | 306 | } | 
|  | 307 | tcm->tcm_info = dev_queue->qdisc_sleeping->handle; | 
|  | 308 | } | 
|  | 309 | tcm->tcm_handle |= TC_H_MIN(cl); | 
|  | 310 | return 0; | 
|  | 311 | } | 
|  | 312 |  | 
|  | 313 | static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, | 
| stephen hemminger | ea18fd9 | 2011-02-23 09:06:51 +0000 | [diff] [blame] | 314 | struct gnet_dump *d) | 
|  | 315 | __releases(d->lock) | 
|  | 316 | __acquires(d->lock) | 
| John Fastabend | b8970f0 | 2011-01-17 08:06:09 +0000 | [diff] [blame] | 317 | { | 
|  | 318 | struct net_device *dev = qdisc_dev(sch); | 
|  | 319 |  | 
|  | 320 | if (cl <= netdev_get_num_tc(dev)) { | 
|  | 321 | int i; | 
|  | 322 | struct Qdisc *qdisc; | 
|  | 323 | struct gnet_stats_queue qstats = {0}; | 
|  | 324 | struct gnet_stats_basic_packed bstats = {0}; | 
|  | 325 | struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1]; | 
|  | 326 |  | 
|  | 327 | /* Drop lock here it will be reclaimed before touching | 
|  | 328 | * statistics this is required because the d->lock we | 
|  | 329 | * hold here is the look on dev_queue->qdisc_sleeping | 
|  | 330 | * also acquired below. | 
|  | 331 | */ | 
|  | 332 | spin_unlock_bh(d->lock); | 
|  | 333 |  | 
|  | 334 | for (i = tc.offset; i < tc.offset + tc.count; i++) { | 
|  | 335 | qdisc = netdev_get_tx_queue(dev, i)->qdisc; | 
|  | 336 | spin_lock_bh(qdisc_lock(qdisc)); | 
|  | 337 | bstats.bytes      += qdisc->bstats.bytes; | 
|  | 338 | bstats.packets    += qdisc->bstats.packets; | 
|  | 339 | qstats.qlen       += qdisc->qstats.qlen; | 
|  | 340 | qstats.backlog    += qdisc->qstats.backlog; | 
|  | 341 | qstats.drops      += qdisc->qstats.drops; | 
|  | 342 | qstats.requeues   += qdisc->qstats.requeues; | 
|  | 343 | qstats.overlimits += qdisc->qstats.overlimits; | 
|  | 344 | spin_unlock_bh(qdisc_lock(qdisc)); | 
|  | 345 | } | 
|  | 346 | /* Reclaim root sleeping lock before completing stats */ | 
|  | 347 | spin_lock_bh(d->lock); | 
|  | 348 | if (gnet_stats_copy_basic(d, &bstats) < 0 || | 
|  | 349 | gnet_stats_copy_queue(d, &qstats) < 0) | 
|  | 350 | return -1; | 
|  | 351 | } else { | 
|  | 352 | struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); | 
|  | 353 |  | 
|  | 354 | sch = dev_queue->qdisc_sleeping; | 
|  | 355 | sch->qstats.qlen = sch->q.qlen; | 
|  | 356 | if (gnet_stats_copy_basic(d, &sch->bstats) < 0 || | 
|  | 357 | gnet_stats_copy_queue(d, &sch->qstats) < 0) | 
|  | 358 | return -1; | 
|  | 359 | } | 
|  | 360 | return 0; | 
|  | 361 | } | 
|  | 362 |  | 
|  | 363 | static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) | 
|  | 364 | { | 
|  | 365 | struct net_device *dev = qdisc_dev(sch); | 
|  | 366 | unsigned long ntx; | 
|  | 367 |  | 
|  | 368 | if (arg->stop) | 
|  | 369 | return; | 
|  | 370 |  | 
|  | 371 | /* Walk hierarchy with a virtual class per tc */ | 
|  | 372 | arg->count = arg->skip; | 
|  | 373 | for (ntx = arg->skip; | 
|  | 374 | ntx < dev->num_tx_queues + netdev_get_num_tc(dev); | 
|  | 375 | ntx++) { | 
|  | 376 | if (arg->fn(sch, ntx + 1, arg) < 0) { | 
|  | 377 | arg->stop = 1; | 
|  | 378 | break; | 
|  | 379 | } | 
|  | 380 | arg->count++; | 
|  | 381 | } | 
|  | 382 | } | 
|  | 383 |  | 
|  | 384 | static const struct Qdisc_class_ops mqprio_class_ops = { | 
|  | 385 | .graft		= mqprio_graft, | 
|  | 386 | .leaf		= mqprio_leaf, | 
|  | 387 | .get		= mqprio_get, | 
|  | 388 | .put		= mqprio_put, | 
|  | 389 | .walk		= mqprio_walk, | 
|  | 390 | .dump		= mqprio_dump_class, | 
|  | 391 | .dump_stats	= mqprio_dump_class_stats, | 
|  | 392 | }; | 
|  | 393 |  | 
| stephen hemminger | ea18fd9 | 2011-02-23 09:06:51 +0000 | [diff] [blame] | 394 | static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { | 
| John Fastabend | b8970f0 | 2011-01-17 08:06:09 +0000 | [diff] [blame] | 395 | .cl_ops		= &mqprio_class_ops, | 
|  | 396 | .id		= "mqprio", | 
|  | 397 | .priv_size	= sizeof(struct mqprio_sched), | 
|  | 398 | .init		= mqprio_init, | 
|  | 399 | .destroy	= mqprio_destroy, | 
|  | 400 | .attach		= mqprio_attach, | 
|  | 401 | .dump		= mqprio_dump, | 
|  | 402 | .owner		= THIS_MODULE, | 
|  | 403 | }; | 
|  | 404 |  | 
|  | 405 | static int __init mqprio_module_init(void) | 
|  | 406 | { | 
|  | 407 | return register_qdisc(&mqprio_qdisc_ops); | 
|  | 408 | } | 
|  | 409 |  | 
|  | 410 | static void __exit mqprio_module_exit(void) | 
|  | 411 | { | 
|  | 412 | unregister_qdisc(&mqprio_qdisc_ops); | 
|  | 413 | } | 
|  | 414 |  | 
|  | 415 | module_init(mqprio_module_init); | 
|  | 416 | module_exit(mqprio_module_exit); | 
|  | 417 |  | 
|  | 418 | MODULE_LICENSE("GPL"); |