| Casey Leedom | c6e0d91 | 2010-06-25 12:13:28 +0000 | [diff] [blame] | 1 | /* | 
|  | 2 | * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet | 
|  | 3 | * driver for Linux. | 
|  | 4 | * | 
|  | 5 | * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved. | 
|  | 6 | * | 
|  | 7 | * This software is available to you under a choice of one of two | 
|  | 8 | * licenses.  You may choose to be licensed under the terms of the GNU | 
|  | 9 | * General Public License (GPL) Version 2, available from the file | 
|  | 10 | * COPYING in the main directory of this source tree, or the | 
|  | 11 | * OpenIB.org BSD license below: | 
|  | 12 | * | 
|  | 13 | *     Redistribution and use in source and binary forms, with or | 
|  | 14 | *     without modification, are permitted provided that the following | 
|  | 15 | *     conditions are met: | 
|  | 16 | * | 
|  | 17 | *      - Redistributions of source code must retain the above | 
|  | 18 | *        copyright notice, this list of conditions and the following | 
|  | 19 | *        disclaimer. | 
|  | 20 | * | 
|  | 21 | *      - Redistributions in binary form must reproduce the above | 
|  | 22 | *        copyright notice, this list of conditions and the following | 
|  | 23 | *        disclaimer in the documentation and/or other materials | 
|  | 24 | *        provided with the distribution. | 
|  | 25 | * | 
|  | 26 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | 
|  | 27 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | 
|  | 28 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | 
|  | 29 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | 
|  | 30 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | 
|  | 31 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | 
|  | 32 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | 
|  | 33 | * SOFTWARE. | 
|  | 34 | */ | 
|  | 35 |  | 
|  | 36 | #include <linux/skbuff.h> | 
|  | 37 | #include <linux/netdevice.h> | 
|  | 38 | #include <linux/etherdevice.h> | 
|  | 39 | #include <linux/if_vlan.h> | 
|  | 40 | #include <linux/ip.h> | 
|  | 41 | #include <net/ipv6.h> | 
|  | 42 | #include <net/tcp.h> | 
|  | 43 | #include <linux/dma-mapping.h> | 
|  | 44 |  | 
|  | 45 | #include "t4vf_common.h" | 
|  | 46 | #include "t4vf_defs.h" | 
|  | 47 |  | 
|  | 48 | #include "../cxgb4/t4_regs.h" | 
|  | 49 | #include "../cxgb4/t4fw_api.h" | 
|  | 50 | #include "../cxgb4/t4_msg.h" | 
|  | 51 |  | 
|  | 52 | /* | 
|  | 53 | * Decoded Adapter Parameters. | 
|  | 54 | */ | 
|  | 55 | static u32 FL_PG_ORDER;		/* large page allocation size */ | 
|  | 56 | static u32 STAT_LEN;		/* length of status page at ring end */ | 
|  | 57 | static u32 PKTSHIFT;		/* padding between CPL and packet data */ | 
|  | 58 | static u32 FL_ALIGN;		/* response queue message alignment */ | 
|  | 59 |  | 
|  | 60 | /* | 
|  | 61 | * Constants ... | 
|  | 62 | */ | 
|  | 63 | enum { | 
|  | 64 | /* | 
|  | 65 | * Egress Queue sizes, producer and consumer indices are all in units | 
|  | 66 | * of Egress Context Units bytes.  Note that as far as the hardware is | 
|  | 67 | * concerned, the free list is an Egress Queue (the host produces free | 
|  | 68 | * buffers which the hardware consumes) and free list entries are | 
|  | 69 | * 64-bit PCI DMA addresses. | 
|  | 70 | */ | 
|  | 71 | EQ_UNIT = SGE_EQ_IDXSIZE, | 
|  | 72 | FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64), | 
|  | 73 | TXD_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64), | 
|  | 74 |  | 
|  | 75 | /* | 
|  | 76 | * Max number of TX descriptors we clean up at a time.  Should be | 
|  | 77 | * modest as freeing skbs isn't cheap and it happens while holding | 
|  | 78 | * locks.  We just need to free packets faster than they arrive, we | 
|  | 79 | * eventually catch up and keep the amortized cost reasonable. | 
|  | 80 | */ | 
|  | 81 | MAX_TX_RECLAIM = 16, | 
|  | 82 |  | 
|  | 83 | /* | 
|  | 84 | * Max number of Rx buffers we replenish at a time.  Again keep this | 
|  | 85 | * modest, allocating buffers isn't cheap either. | 
|  | 86 | */ | 
|  | 87 | MAX_RX_REFILL = 16, | 
|  | 88 |  | 
|  | 89 | /* | 
|  | 90 | * Period of the Rx queue check timer.  This timer is infrequent as it | 
|  | 91 | * has something to do only when the system experiences severe memory | 
|  | 92 | * shortage. | 
|  | 93 | */ | 
|  | 94 | RX_QCHECK_PERIOD = (HZ / 2), | 
|  | 95 |  | 
|  | 96 | /* | 
|  | 97 | * Period of the TX queue check timer and the maximum number of TX | 
|  | 98 | * descriptors to be reclaimed by the TX timer. | 
|  | 99 | */ | 
|  | 100 | TX_QCHECK_PERIOD = (HZ / 2), | 
|  | 101 | MAX_TIMER_TX_RECLAIM = 100, | 
|  | 102 |  | 
|  | 103 | /* | 
|  | 104 | * An FL with <= FL_STARVE_THRES buffers is starving and a periodic | 
|  | 105 | * timer will attempt to refill it. | 
|  | 106 | */ | 
|  | 107 | FL_STARVE_THRES = 4, | 
|  | 108 |  | 
|  | 109 | /* | 
|  | 110 | * Suspend an Ethernet TX queue with fewer available descriptors than | 
|  | 111 | * this.  We always want to have room for a maximum sized packet: | 
|  | 112 | * inline immediate data + MAX_SKB_FRAGS. This is the same as | 
|  | 113 | * calc_tx_flits() for a TSO packet with nr_frags == MAX_SKB_FRAGS | 
|  | 114 | * (see that function and its helpers for a description of the | 
|  | 115 | * calculation). | 
|  | 116 | */ | 
|  | 117 | ETHTXQ_MAX_FRAGS = MAX_SKB_FRAGS + 1, | 
|  | 118 | ETHTXQ_MAX_SGL_LEN = ((3 * (ETHTXQ_MAX_FRAGS-1))/2 + | 
|  | 119 | ((ETHTXQ_MAX_FRAGS-1) & 1) + | 
|  | 120 | 2), | 
|  | 121 | ETHTXQ_MAX_HDR = (sizeof(struct fw_eth_tx_pkt_vm_wr) + | 
|  | 122 | sizeof(struct cpl_tx_pkt_lso_core) + | 
|  | 123 | sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64), | 
|  | 124 | ETHTXQ_MAX_FLITS = ETHTXQ_MAX_SGL_LEN + ETHTXQ_MAX_HDR, | 
|  | 125 |  | 
|  | 126 | ETHTXQ_STOP_THRES = 1 + DIV_ROUND_UP(ETHTXQ_MAX_FLITS, TXD_PER_EQ_UNIT), | 
|  | 127 |  | 
|  | 128 | /* | 
|  | 129 | * Max TX descriptor space we allow for an Ethernet packet to be | 
|  | 130 | * inlined into a WR.  This is limited by the maximum value which | 
|  | 131 | * we can specify for immediate data in the firmware Ethernet TX | 
|  | 132 | * Work Request. | 
|  | 133 | */ | 
|  | 134 | MAX_IMM_TX_PKT_LEN = FW_WR_IMMDLEN_MASK, | 
|  | 135 |  | 
|  | 136 | /* | 
|  | 137 | * Max size of a WR sent through a control TX queue. | 
|  | 138 | */ | 
|  | 139 | MAX_CTRL_WR_LEN = 256, | 
|  | 140 |  | 
|  | 141 | /* | 
|  | 142 | * Maximum amount of data which we'll ever need to inline into a | 
|  | 143 | * TX ring: max(MAX_IMM_TX_PKT_LEN, MAX_CTRL_WR_LEN). | 
|  | 144 | */ | 
|  | 145 | MAX_IMM_TX_LEN = (MAX_IMM_TX_PKT_LEN > MAX_CTRL_WR_LEN | 
|  | 146 | ? MAX_IMM_TX_PKT_LEN | 
|  | 147 | : MAX_CTRL_WR_LEN), | 
|  | 148 |  | 
|  | 149 | /* | 
|  | 150 | * For incoming packets less than RX_COPY_THRES, we copy the data into | 
|  | 151 | * an skb rather than referencing the data.  We allocate enough | 
|  | 152 | * in-line room in skb's to accommodate pulling in RX_PULL_LEN bytes | 
|  | 153 | * of the data (header). | 
|  | 154 | */ | 
|  | 155 | RX_COPY_THRES = 256, | 
|  | 156 | RX_PULL_LEN = 128, | 
|  | 157 | }; | 
|  | 158 |  | 
|  | 159 | /* | 
|  | 160 | * Can't define this in the above enum because PKTSHIFT isn't a constant in | 
|  | 161 | * the VF Driver ... | 
|  | 162 | */ | 
|  | 163 | #define RX_PKT_PULL_LEN (RX_PULL_LEN + PKTSHIFT) | 
|  | 164 |  | 
|  | 165 | /* | 
|  | 166 | * Software state per TX descriptor. | 
|  | 167 | */ | 
|  | 168 | struct tx_sw_desc { | 
|  | 169 | struct sk_buff *skb;		/* socket buffer of TX data source */ | 
|  | 170 | struct ulptx_sgl *sgl;		/* scatter/gather list in TX Queue */ | 
|  | 171 | }; | 
|  | 172 |  | 
|  | 173 | /* | 
|  | 174 | * Software state per RX Free List descriptor.  We keep track of the allocated | 
|  | 175 | * FL page, its size, and its PCI DMA address (if the page is mapped).  The FL | 
|  | 176 | * page size and its PCI DMA mapped state are stored in the low bits of the | 
|  | 177 | * PCI DMA address as per below. | 
|  | 178 | */ | 
|  | 179 | struct rx_sw_desc { | 
|  | 180 | struct page *page;		/* Free List page buffer */ | 
|  | 181 | dma_addr_t dma_addr;		/* PCI DMA address (if mapped) */ | 
|  | 182 | /*   and flags (see below) */ | 
|  | 183 | }; | 
|  | 184 |  | 
|  | 185 | /* | 
|  | 186 | * The low bits of rx_sw_desc.dma_addr have special meaning.  Note that the | 
|  | 187 | * SGE also uses the low 4 bits to determine the size of the buffer.  It uses | 
|  | 188 | * those bits to index into the SGE_FL_BUFFER_SIZE[index] register array. | 
|  | 189 | * Since we only use SGE_FL_BUFFER_SIZE0 and SGE_FL_BUFFER_SIZE1, these low 4 | 
|  | 190 | * bits can only contain a 0 or a 1 to indicate which size buffer we're giving | 
|  | 191 | * to the SGE.  Thus, our software state of "is the buffer mapped for DMA" is | 
|  | 192 | * maintained in an inverse sense so the hardware never sees that bit high. | 
|  | 193 | */ | 
|  | 194 | enum { | 
|  | 195 | RX_LARGE_BUF    = 1 << 0,	/* buffer is SGE_FL_BUFFER_SIZE[1] */ | 
|  | 196 | RX_UNMAPPED_BUF = 1 << 1,	/* buffer is not mapped */ | 
|  | 197 | }; | 
|  | 198 |  | 
|  | 199 | /** | 
|  | 200 | *	get_buf_addr - return DMA buffer address of software descriptor | 
|  | 201 | *	@sdesc: pointer to the software buffer descriptor | 
|  | 202 | * | 
|  | 203 | *	Return the DMA buffer address of a software descriptor (stripping out | 
|  | 204 | *	our low-order flag bits). | 
|  | 205 | */ | 
|  | 206 | static inline dma_addr_t get_buf_addr(const struct rx_sw_desc *sdesc) | 
|  | 207 | { | 
|  | 208 | return sdesc->dma_addr & ~(dma_addr_t)(RX_LARGE_BUF | RX_UNMAPPED_BUF); | 
|  | 209 | } | 
|  | 210 |  | 
|  | 211 | /** | 
|  | 212 | *	is_buf_mapped - is buffer mapped for DMA? | 
|  | 213 | *	@sdesc: pointer to the software buffer descriptor | 
|  | 214 | * | 
|  | 215 | *	Determine whether the buffer associated with a software descriptor in | 
|  | 216 | *	mapped for DMA or not. | 
|  | 217 | */ | 
|  | 218 | static inline bool is_buf_mapped(const struct rx_sw_desc *sdesc) | 
|  | 219 | { | 
|  | 220 | return !(sdesc->dma_addr & RX_UNMAPPED_BUF); | 
|  | 221 | } | 
|  | 222 |  | 
|  | 223 | /** | 
|  | 224 | *	need_skb_unmap - does the platform need unmapping of sk_buffs? | 
|  | 225 | * | 
|  | 226 | *	Returns true if the platfrom needs sk_buff unmapping.  The compiler | 
|  | 227 | *	optimizes away unecessary code if this returns true. | 
|  | 228 | */ | 
|  | 229 | static inline int need_skb_unmap(void) | 
|  | 230 | { | 
| FUJITA Tomonori | 57b2eaf | 2010-07-07 23:52:37 +0000 | [diff] [blame^] | 231 | #ifdef CONFIG_NEED_DMA_MAP_STATE | 
|  | 232 | return 1; | 
|  | 233 | #else | 
|  | 234 | return 0; | 
|  | 235 | #endif | 
| Casey Leedom | c6e0d91 | 2010-06-25 12:13:28 +0000 | [diff] [blame] | 236 | } | 
|  | 237 |  | 
|  | 238 | /** | 
|  | 239 | *	txq_avail - return the number of available slots in a TX queue | 
|  | 240 | *	@tq: the TX queue | 
|  | 241 | * | 
|  | 242 | *	Returns the number of available descriptors in a TX queue. | 
|  | 243 | */ | 
|  | 244 | static inline unsigned int txq_avail(const struct sge_txq *tq) | 
|  | 245 | { | 
|  | 246 | return tq->size - 1 - tq->in_use; | 
|  | 247 | } | 
|  | 248 |  | 
|  | 249 | /** | 
|  | 250 | *	fl_cap - return the capacity of a Free List | 
|  | 251 | *	@fl: the Free List | 
|  | 252 | * | 
|  | 253 | *	Returns the capacity of a Free List.  The capacity is less than the | 
|  | 254 | *	size because an Egress Queue Index Unit worth of descriptors needs to | 
|  | 255 | *	be left unpopulated, otherwise the Producer and Consumer indices PIDX | 
|  | 256 | *	and CIDX will match and the hardware will think the FL is empty. | 
|  | 257 | */ | 
|  | 258 | static inline unsigned int fl_cap(const struct sge_fl *fl) | 
|  | 259 | { | 
|  | 260 | return fl->size - FL_PER_EQ_UNIT; | 
|  | 261 | } | 
|  | 262 |  | 
|  | 263 | /** | 
|  | 264 | *	fl_starving - return whether a Free List is starving. | 
|  | 265 | *	@fl: the Free List | 
|  | 266 | * | 
|  | 267 | *	Tests specified Free List to see whether the number of buffers | 
|  | 268 | *	available to the hardware has falled below our "starvation" | 
|  | 269 | *	threshhold. | 
|  | 270 | */ | 
|  | 271 | static inline bool fl_starving(const struct sge_fl *fl) | 
|  | 272 | { | 
|  | 273 | return fl->avail - fl->pend_cred <= FL_STARVE_THRES; | 
|  | 274 | } | 
|  | 275 |  | 
|  | 276 | /** | 
|  | 277 | *	map_skb -  map an skb for DMA to the device | 
|  | 278 | *	@dev: the egress net device | 
|  | 279 | *	@skb: the packet to map | 
|  | 280 | *	@addr: a pointer to the base of the DMA mapping array | 
|  | 281 | * | 
|  | 282 | *	Map an skb for DMA to the device and return an array of DMA addresses. | 
|  | 283 | */ | 
|  | 284 | static int map_skb(struct device *dev, const struct sk_buff *skb, | 
|  | 285 | dma_addr_t *addr) | 
|  | 286 | { | 
|  | 287 | const skb_frag_t *fp, *end; | 
|  | 288 | const struct skb_shared_info *si; | 
|  | 289 |  | 
|  | 290 | *addr = dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); | 
|  | 291 | if (dma_mapping_error(dev, *addr)) | 
|  | 292 | goto out_err; | 
|  | 293 |  | 
|  | 294 | si = skb_shinfo(skb); | 
|  | 295 | end = &si->frags[si->nr_frags]; | 
|  | 296 | for (fp = si->frags; fp < end; fp++) { | 
|  | 297 | *++addr = dma_map_page(dev, fp->page, fp->page_offset, fp->size, | 
|  | 298 | DMA_TO_DEVICE); | 
|  | 299 | if (dma_mapping_error(dev, *addr)) | 
|  | 300 | goto unwind; | 
|  | 301 | } | 
|  | 302 | return 0; | 
|  | 303 |  | 
|  | 304 | unwind: | 
|  | 305 | while (fp-- > si->frags) | 
|  | 306 | dma_unmap_page(dev, *--addr, fp->size, DMA_TO_DEVICE); | 
|  | 307 | dma_unmap_single(dev, addr[-1], skb_headlen(skb), DMA_TO_DEVICE); | 
|  | 308 |  | 
|  | 309 | out_err: | 
|  | 310 | return -ENOMEM; | 
|  | 311 | } | 
|  | 312 |  | 
|  | 313 | static void unmap_sgl(struct device *dev, const struct sk_buff *skb, | 
|  | 314 | const struct ulptx_sgl *sgl, const struct sge_txq *tq) | 
|  | 315 | { | 
|  | 316 | const struct ulptx_sge_pair *p; | 
|  | 317 | unsigned int nfrags = skb_shinfo(skb)->nr_frags; | 
|  | 318 |  | 
|  | 319 | if (likely(skb_headlen(skb))) | 
|  | 320 | dma_unmap_single(dev, be64_to_cpu(sgl->addr0), | 
|  | 321 | be32_to_cpu(sgl->len0), DMA_TO_DEVICE); | 
|  | 322 | else { | 
|  | 323 | dma_unmap_page(dev, be64_to_cpu(sgl->addr0), | 
|  | 324 | be32_to_cpu(sgl->len0), DMA_TO_DEVICE); | 
|  | 325 | nfrags--; | 
|  | 326 | } | 
|  | 327 |  | 
|  | 328 | /* | 
|  | 329 | * the complexity below is because of the possibility of a wrap-around | 
|  | 330 | * in the middle of an SGL | 
|  | 331 | */ | 
|  | 332 | for (p = sgl->sge; nfrags >= 2; nfrags -= 2) { | 
|  | 333 | if (likely((u8 *)(p + 1) <= (u8 *)tq->stat)) { | 
|  | 334 | unmap: | 
|  | 335 | dma_unmap_page(dev, be64_to_cpu(p->addr[0]), | 
|  | 336 | be32_to_cpu(p->len[0]), DMA_TO_DEVICE); | 
|  | 337 | dma_unmap_page(dev, be64_to_cpu(p->addr[1]), | 
|  | 338 | be32_to_cpu(p->len[1]), DMA_TO_DEVICE); | 
|  | 339 | p++; | 
|  | 340 | } else if ((u8 *)p == (u8 *)tq->stat) { | 
|  | 341 | p = (const struct ulptx_sge_pair *)tq->desc; | 
|  | 342 | goto unmap; | 
|  | 343 | } else if ((u8 *)p + 8 == (u8 *)tq->stat) { | 
|  | 344 | const __be64 *addr = (const __be64 *)tq->desc; | 
|  | 345 |  | 
|  | 346 | dma_unmap_page(dev, be64_to_cpu(addr[0]), | 
|  | 347 | be32_to_cpu(p->len[0]), DMA_TO_DEVICE); | 
|  | 348 | dma_unmap_page(dev, be64_to_cpu(addr[1]), | 
|  | 349 | be32_to_cpu(p->len[1]), DMA_TO_DEVICE); | 
|  | 350 | p = (const struct ulptx_sge_pair *)&addr[2]; | 
|  | 351 | } else { | 
|  | 352 | const __be64 *addr = (const __be64 *)tq->desc; | 
|  | 353 |  | 
|  | 354 | dma_unmap_page(dev, be64_to_cpu(p->addr[0]), | 
|  | 355 | be32_to_cpu(p->len[0]), DMA_TO_DEVICE); | 
|  | 356 | dma_unmap_page(dev, be64_to_cpu(addr[0]), | 
|  | 357 | be32_to_cpu(p->len[1]), DMA_TO_DEVICE); | 
|  | 358 | p = (const struct ulptx_sge_pair *)&addr[1]; | 
|  | 359 | } | 
|  | 360 | } | 
|  | 361 | if (nfrags) { | 
|  | 362 | __be64 addr; | 
|  | 363 |  | 
|  | 364 | if ((u8 *)p == (u8 *)tq->stat) | 
|  | 365 | p = (const struct ulptx_sge_pair *)tq->desc; | 
|  | 366 | addr = ((u8 *)p + 16 <= (u8 *)tq->stat | 
|  | 367 | ? p->addr[0] | 
|  | 368 | : *(const __be64 *)tq->desc); | 
|  | 369 | dma_unmap_page(dev, be64_to_cpu(addr), be32_to_cpu(p->len[0]), | 
|  | 370 | DMA_TO_DEVICE); | 
|  | 371 | } | 
|  | 372 | } | 
|  | 373 |  | 
|  | 374 | /** | 
|  | 375 | *	free_tx_desc - reclaims TX descriptors and their buffers | 
|  | 376 | *	@adapter: the adapter | 
|  | 377 | *	@tq: the TX queue to reclaim descriptors from | 
|  | 378 | *	@n: the number of descriptors to reclaim | 
|  | 379 | *	@unmap: whether the buffers should be unmapped for DMA | 
|  | 380 | * | 
|  | 381 | *	Reclaims TX descriptors from an SGE TX queue and frees the associated | 
|  | 382 | *	TX buffers.  Called with the TX queue lock held. | 
|  | 383 | */ | 
|  | 384 | static void free_tx_desc(struct adapter *adapter, struct sge_txq *tq, | 
|  | 385 | unsigned int n, bool unmap) | 
|  | 386 | { | 
|  | 387 | struct tx_sw_desc *sdesc; | 
|  | 388 | unsigned int cidx = tq->cidx; | 
|  | 389 | struct device *dev = adapter->pdev_dev; | 
|  | 390 |  | 
|  | 391 | const int need_unmap = need_skb_unmap() && unmap; | 
|  | 392 |  | 
|  | 393 | sdesc = &tq->sdesc[cidx]; | 
|  | 394 | while (n--) { | 
|  | 395 | /* | 
|  | 396 | * If we kept a reference to the original TX skb, we need to | 
|  | 397 | * unmap it from PCI DMA space (if required) and free it. | 
|  | 398 | */ | 
|  | 399 | if (sdesc->skb) { | 
|  | 400 | if (need_unmap) | 
|  | 401 | unmap_sgl(dev, sdesc->skb, sdesc->sgl, tq); | 
|  | 402 | kfree_skb(sdesc->skb); | 
|  | 403 | sdesc->skb = NULL; | 
|  | 404 | } | 
|  | 405 |  | 
|  | 406 | sdesc++; | 
|  | 407 | if (++cidx == tq->size) { | 
|  | 408 | cidx = 0; | 
|  | 409 | sdesc = tq->sdesc; | 
|  | 410 | } | 
|  | 411 | } | 
|  | 412 | tq->cidx = cidx; | 
|  | 413 | } | 
|  | 414 |  | 
|  | 415 | /* | 
|  | 416 | * Return the number of reclaimable descriptors in a TX queue. | 
|  | 417 | */ | 
|  | 418 | static inline int reclaimable(const struct sge_txq *tq) | 
|  | 419 | { | 
|  | 420 | int hw_cidx = be16_to_cpu(tq->stat->cidx); | 
|  | 421 | int reclaimable = hw_cidx - tq->cidx; | 
|  | 422 | if (reclaimable < 0) | 
|  | 423 | reclaimable += tq->size; | 
|  | 424 | return reclaimable; | 
|  | 425 | } | 
|  | 426 |  | 
|  | 427 | /** | 
|  | 428 | *	reclaim_completed_tx - reclaims completed TX descriptors | 
|  | 429 | *	@adapter: the adapter | 
|  | 430 | *	@tq: the TX queue to reclaim completed descriptors from | 
|  | 431 | *	@unmap: whether the buffers should be unmapped for DMA | 
|  | 432 | * | 
|  | 433 | *	Reclaims TX descriptors that the SGE has indicated it has processed, | 
|  | 434 | *	and frees the associated buffers if possible.  Called with the TX | 
|  | 435 | *	queue locked. | 
|  | 436 | */ | 
|  | 437 | static inline void reclaim_completed_tx(struct adapter *adapter, | 
|  | 438 | struct sge_txq *tq, | 
|  | 439 | bool unmap) | 
|  | 440 | { | 
|  | 441 | int avail = reclaimable(tq); | 
|  | 442 |  | 
|  | 443 | if (avail) { | 
|  | 444 | /* | 
|  | 445 | * Limit the amount of clean up work we do at a time to keep | 
|  | 446 | * the TX lock hold time O(1). | 
|  | 447 | */ | 
|  | 448 | if (avail > MAX_TX_RECLAIM) | 
|  | 449 | avail = MAX_TX_RECLAIM; | 
|  | 450 |  | 
|  | 451 | free_tx_desc(adapter, tq, avail, unmap); | 
|  | 452 | tq->in_use -= avail; | 
|  | 453 | } | 
|  | 454 | } | 
|  | 455 |  | 
|  | 456 | /** | 
|  | 457 | *	get_buf_size - return the size of an RX Free List buffer. | 
|  | 458 | *	@sdesc: pointer to the software buffer descriptor | 
|  | 459 | */ | 
|  | 460 | static inline int get_buf_size(const struct rx_sw_desc *sdesc) | 
|  | 461 | { | 
|  | 462 | return FL_PG_ORDER > 0 && (sdesc->dma_addr & RX_LARGE_BUF) | 
|  | 463 | ? (PAGE_SIZE << FL_PG_ORDER) | 
|  | 464 | : PAGE_SIZE; | 
|  | 465 | } | 
|  | 466 |  | 
|  | 467 | /** | 
|  | 468 | *	free_rx_bufs - free RX buffers on an SGE Free List | 
|  | 469 | *	@adapter: the adapter | 
|  | 470 | *	@fl: the SGE Free List to free buffers from | 
|  | 471 | *	@n: how many buffers to free | 
|  | 472 | * | 
|  | 473 | *	Release the next @n buffers on an SGE Free List RX queue.   The | 
|  | 474 | *	buffers must be made inaccessible to hardware before calling this | 
|  | 475 | *	function. | 
|  | 476 | */ | 
|  | 477 | static void free_rx_bufs(struct adapter *adapter, struct sge_fl *fl, int n) | 
|  | 478 | { | 
|  | 479 | while (n--) { | 
|  | 480 | struct rx_sw_desc *sdesc = &fl->sdesc[fl->cidx]; | 
|  | 481 |  | 
|  | 482 | if (is_buf_mapped(sdesc)) | 
|  | 483 | dma_unmap_page(adapter->pdev_dev, get_buf_addr(sdesc), | 
|  | 484 | get_buf_size(sdesc), PCI_DMA_FROMDEVICE); | 
|  | 485 | put_page(sdesc->page); | 
|  | 486 | sdesc->page = NULL; | 
|  | 487 | if (++fl->cidx == fl->size) | 
|  | 488 | fl->cidx = 0; | 
|  | 489 | fl->avail--; | 
|  | 490 | } | 
|  | 491 | } | 
|  | 492 |  | 
|  | 493 | /** | 
|  | 494 | *	unmap_rx_buf - unmap the current RX buffer on an SGE Free List | 
|  | 495 | *	@adapter: the adapter | 
|  | 496 | *	@fl: the SGE Free List | 
|  | 497 | * | 
|  | 498 | *	Unmap the current buffer on an SGE Free List RX queue.   The | 
|  | 499 | *	buffer must be made inaccessible to HW before calling this function. | 
|  | 500 | * | 
|  | 501 | *	This is similar to @free_rx_bufs above but does not free the buffer. | 
|  | 502 | *	Do note that the FL still loses any further access to the buffer. | 
|  | 503 | *	This is used predominantly to "transfer ownership" of an FL buffer | 
|  | 504 | *	to another entity (typically an skb's fragment list). | 
|  | 505 | */ | 
|  | 506 | static void unmap_rx_buf(struct adapter *adapter, struct sge_fl *fl) | 
|  | 507 | { | 
|  | 508 | struct rx_sw_desc *sdesc = &fl->sdesc[fl->cidx]; | 
|  | 509 |  | 
|  | 510 | if (is_buf_mapped(sdesc)) | 
|  | 511 | dma_unmap_page(adapter->pdev_dev, get_buf_addr(sdesc), | 
|  | 512 | get_buf_size(sdesc), PCI_DMA_FROMDEVICE); | 
|  | 513 | sdesc->page = NULL; | 
|  | 514 | if (++fl->cidx == fl->size) | 
|  | 515 | fl->cidx = 0; | 
|  | 516 | fl->avail--; | 
|  | 517 | } | 
|  | 518 |  | 
|  | 519 | /** | 
|  | 520 | *	ring_fl_db - righ doorbell on free list | 
|  | 521 | *	@adapter: the adapter | 
|  | 522 | *	@fl: the Free List whose doorbell should be rung ... | 
|  | 523 | * | 
|  | 524 | *	Tell the Scatter Gather Engine that there are new free list entries | 
|  | 525 | *	available. | 
|  | 526 | */ | 
|  | 527 | static inline void ring_fl_db(struct adapter *adapter, struct sge_fl *fl) | 
|  | 528 | { | 
|  | 529 | /* | 
|  | 530 | * The SGE keeps track of its Producer and Consumer Indices in terms | 
|  | 531 | * of Egress Queue Units so we can only tell it about integral numbers | 
|  | 532 | * of multiples of Free List Entries per Egress Queue Units ... | 
|  | 533 | */ | 
|  | 534 | if (fl->pend_cred >= FL_PER_EQ_UNIT) { | 
|  | 535 | wmb(); | 
|  | 536 | t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL, | 
|  | 537 | DBPRIO | | 
|  | 538 | QID(fl->cntxt_id) | | 
|  | 539 | PIDX(fl->pend_cred / FL_PER_EQ_UNIT)); | 
|  | 540 | fl->pend_cred %= FL_PER_EQ_UNIT; | 
|  | 541 | } | 
|  | 542 | } | 
|  | 543 |  | 
|  | 544 | /** | 
|  | 545 | *	set_rx_sw_desc - initialize software RX buffer descriptor | 
|  | 546 | *	@sdesc: pointer to the softwore RX buffer descriptor | 
|  | 547 | *	@page: pointer to the page data structure backing the RX buffer | 
|  | 548 | *	@dma_addr: PCI DMA address (possibly with low-bit flags) | 
|  | 549 | */ | 
|  | 550 | static inline void set_rx_sw_desc(struct rx_sw_desc *sdesc, struct page *page, | 
|  | 551 | dma_addr_t dma_addr) | 
|  | 552 | { | 
|  | 553 | sdesc->page = page; | 
|  | 554 | sdesc->dma_addr = dma_addr; | 
|  | 555 | } | 
|  | 556 |  | 
|  | 557 | /* | 
|  | 558 | * Support for poisoning RX buffers ... | 
|  | 559 | */ | 
|  | 560 | #define POISON_BUF_VAL -1 | 
|  | 561 |  | 
|  | 562 | static inline void poison_buf(struct page *page, size_t sz) | 
|  | 563 | { | 
|  | 564 | #if POISON_BUF_VAL >= 0 | 
|  | 565 | memset(page_address(page), POISON_BUF_VAL, sz); | 
|  | 566 | #endif | 
|  | 567 | } | 
|  | 568 |  | 
|  | 569 | /** | 
|  | 570 | *	refill_fl - refill an SGE RX buffer ring | 
|  | 571 | *	@adapter: the adapter | 
|  | 572 | *	@fl: the Free List ring to refill | 
|  | 573 | *	@n: the number of new buffers to allocate | 
|  | 574 | *	@gfp: the gfp flags for the allocations | 
|  | 575 | * | 
|  | 576 | *	(Re)populate an SGE free-buffer queue with up to @n new packet buffers, | 
|  | 577 | *	allocated with the supplied gfp flags.  The caller must assure that | 
|  | 578 | *	@n does not exceed the queue's capacity -- i.e. (cidx == pidx) _IN | 
|  | 579 | *	EGRESS QUEUE UNITS_ indicates an empty Free List!  Returns the number | 
|  | 580 | *	of buffers allocated.  If afterwards the queue is found critically low, | 
|  | 581 | *	mark it as starving in the bitmap of starving FLs. | 
|  | 582 | */ | 
|  | 583 | static unsigned int refill_fl(struct adapter *adapter, struct sge_fl *fl, | 
|  | 584 | int n, gfp_t gfp) | 
|  | 585 | { | 
|  | 586 | struct page *page; | 
|  | 587 | dma_addr_t dma_addr; | 
|  | 588 | unsigned int cred = fl->avail; | 
|  | 589 | __be64 *d = &fl->desc[fl->pidx]; | 
|  | 590 | struct rx_sw_desc *sdesc = &fl->sdesc[fl->pidx]; | 
|  | 591 |  | 
|  | 592 | /* | 
|  | 593 | * Sanity: ensure that the result of adding n Free List buffers | 
|  | 594 | * won't result in wrapping the SGE's Producer Index around to | 
|  | 595 | * it's Consumer Index thereby indicating an empty Free List ... | 
|  | 596 | */ | 
|  | 597 | BUG_ON(fl->avail + n > fl->size - FL_PER_EQ_UNIT); | 
|  | 598 |  | 
|  | 599 | /* | 
|  | 600 | * If we support large pages, prefer large buffers and fail over to | 
|  | 601 | * small pages if we can't allocate large pages to satisfy the refill. | 
|  | 602 | * If we don't support large pages, drop directly into the small page | 
|  | 603 | * allocation code. | 
|  | 604 | */ | 
|  | 605 | if (FL_PG_ORDER == 0) | 
|  | 606 | goto alloc_small_pages; | 
|  | 607 |  | 
|  | 608 | while (n) { | 
|  | 609 | page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, | 
|  | 610 | FL_PG_ORDER); | 
|  | 611 | if (unlikely(!page)) { | 
|  | 612 | /* | 
|  | 613 | * We've failed inour attempt to allocate a "large | 
|  | 614 | * page".  Fail over to the "small page" allocation | 
|  | 615 | * below. | 
|  | 616 | */ | 
|  | 617 | fl->large_alloc_failed++; | 
|  | 618 | break; | 
|  | 619 | } | 
|  | 620 | poison_buf(page, PAGE_SIZE << FL_PG_ORDER); | 
|  | 621 |  | 
|  | 622 | dma_addr = dma_map_page(adapter->pdev_dev, page, 0, | 
|  | 623 | PAGE_SIZE << FL_PG_ORDER, | 
|  | 624 | PCI_DMA_FROMDEVICE); | 
|  | 625 | if (unlikely(dma_mapping_error(adapter->pdev_dev, dma_addr))) { | 
|  | 626 | /* | 
|  | 627 | * We've run out of DMA mapping space.  Free up the | 
|  | 628 | * buffer and return with what we've managed to put | 
|  | 629 | * into the free list.  We don't want to fail over to | 
|  | 630 | * the small page allocation below in this case | 
|  | 631 | * because DMA mapping resources are typically | 
|  | 632 | * critical resources once they become scarse. | 
|  | 633 | */ | 
|  | 634 | __free_pages(page, FL_PG_ORDER); | 
|  | 635 | goto out; | 
|  | 636 | } | 
|  | 637 | dma_addr |= RX_LARGE_BUF; | 
|  | 638 | *d++ = cpu_to_be64(dma_addr); | 
|  | 639 |  | 
|  | 640 | set_rx_sw_desc(sdesc, page, dma_addr); | 
|  | 641 | sdesc++; | 
|  | 642 |  | 
|  | 643 | fl->avail++; | 
|  | 644 | if (++fl->pidx == fl->size) { | 
|  | 645 | fl->pidx = 0; | 
|  | 646 | sdesc = fl->sdesc; | 
|  | 647 | d = fl->desc; | 
|  | 648 | } | 
|  | 649 | n--; | 
|  | 650 | } | 
|  | 651 |  | 
|  | 652 | alloc_small_pages: | 
|  | 653 | while (n--) { | 
|  | 654 | page = __netdev_alloc_page(adapter->port[0], | 
|  | 655 | gfp | __GFP_NOWARN); | 
|  | 656 | if (unlikely(!page)) { | 
|  | 657 | fl->alloc_failed++; | 
|  | 658 | break; | 
|  | 659 | } | 
|  | 660 | poison_buf(page, PAGE_SIZE); | 
|  | 661 |  | 
|  | 662 | dma_addr = dma_map_page(adapter->pdev_dev, page, 0, PAGE_SIZE, | 
|  | 663 | PCI_DMA_FROMDEVICE); | 
|  | 664 | if (unlikely(dma_mapping_error(adapter->pdev_dev, dma_addr))) { | 
|  | 665 | netdev_free_page(adapter->port[0], page); | 
|  | 666 | break; | 
|  | 667 | } | 
|  | 668 | *d++ = cpu_to_be64(dma_addr); | 
|  | 669 |  | 
|  | 670 | set_rx_sw_desc(sdesc, page, dma_addr); | 
|  | 671 | sdesc++; | 
|  | 672 |  | 
|  | 673 | fl->avail++; | 
|  | 674 | if (++fl->pidx == fl->size) { | 
|  | 675 | fl->pidx = 0; | 
|  | 676 | sdesc = fl->sdesc; | 
|  | 677 | d = fl->desc; | 
|  | 678 | } | 
|  | 679 | } | 
|  | 680 |  | 
|  | 681 | out: | 
|  | 682 | /* | 
|  | 683 | * Update our accounting state to incorporate the new Free List | 
|  | 684 | * buffers, tell the hardware about them and return the number of | 
|  | 685 | * bufers which we were able to allocate. | 
|  | 686 | */ | 
|  | 687 | cred = fl->avail - cred; | 
|  | 688 | fl->pend_cred += cred; | 
|  | 689 | ring_fl_db(adapter, fl); | 
|  | 690 |  | 
|  | 691 | if (unlikely(fl_starving(fl))) { | 
|  | 692 | smp_wmb(); | 
|  | 693 | set_bit(fl->cntxt_id, adapter->sge.starving_fl); | 
|  | 694 | } | 
|  | 695 |  | 
|  | 696 | return cred; | 
|  | 697 | } | 
|  | 698 |  | 
|  | 699 | /* | 
|  | 700 | * Refill a Free List to its capacity or the Maximum Refill Increment, | 
|  | 701 | * whichever is smaller ... | 
|  | 702 | */ | 
|  | 703 | static inline void __refill_fl(struct adapter *adapter, struct sge_fl *fl) | 
|  | 704 | { | 
|  | 705 | refill_fl(adapter, fl, | 
|  | 706 | min((unsigned int)MAX_RX_REFILL, fl_cap(fl) - fl->avail), | 
|  | 707 | GFP_ATOMIC); | 
|  | 708 | } | 
|  | 709 |  | 
|  | 710 | /** | 
|  | 711 | *	alloc_ring - allocate resources for an SGE descriptor ring | 
|  | 712 | *	@dev: the PCI device's core device | 
|  | 713 | *	@nelem: the number of descriptors | 
|  | 714 | *	@hwsize: the size of each hardware descriptor | 
|  | 715 | *	@swsize: the size of each software descriptor | 
|  | 716 | *	@busaddrp: the physical PCI bus address of the allocated ring | 
|  | 717 | *	@swringp: return address pointer for software ring | 
|  | 718 | *	@stat_size: extra space in hardware ring for status information | 
|  | 719 | * | 
|  | 720 | *	Allocates resources for an SGE descriptor ring, such as TX queues, | 
|  | 721 | *	free buffer lists, response queues, etc.  Each SGE ring requires | 
|  | 722 | *	space for its hardware descriptors plus, optionally, space for software | 
|  | 723 | *	state associated with each hardware entry (the metadata).  The function | 
|  | 724 | *	returns three values: the virtual address for the hardware ring (the | 
|  | 725 | *	return value of the function), the PCI bus address of the hardware | 
|  | 726 | *	ring (in *busaddrp), and the address of the software ring (in swringp). | 
|  | 727 | *	Both the hardware and software rings are returned zeroed out. | 
|  | 728 | */ | 
|  | 729 | static void *alloc_ring(struct device *dev, size_t nelem, size_t hwsize, | 
|  | 730 | size_t swsize, dma_addr_t *busaddrp, void *swringp, | 
|  | 731 | size_t stat_size) | 
|  | 732 | { | 
|  | 733 | /* | 
|  | 734 | * Allocate the hardware ring and PCI DMA bus address space for said. | 
|  | 735 | */ | 
|  | 736 | size_t hwlen = nelem * hwsize + stat_size; | 
|  | 737 | void *hwring = dma_alloc_coherent(dev, hwlen, busaddrp, GFP_KERNEL); | 
|  | 738 |  | 
|  | 739 | if (!hwring) | 
|  | 740 | return NULL; | 
|  | 741 |  | 
|  | 742 | /* | 
|  | 743 | * If the caller wants a software ring, allocate it and return a | 
|  | 744 | * pointer to it in *swringp. | 
|  | 745 | */ | 
|  | 746 | BUG_ON((swsize != 0) != (swringp != NULL)); | 
|  | 747 | if (swsize) { | 
|  | 748 | void *swring = kcalloc(nelem, swsize, GFP_KERNEL); | 
|  | 749 |  | 
|  | 750 | if (!swring) { | 
|  | 751 | dma_free_coherent(dev, hwlen, hwring, *busaddrp); | 
|  | 752 | return NULL; | 
|  | 753 | } | 
|  | 754 | *(void **)swringp = swring; | 
|  | 755 | } | 
|  | 756 |  | 
|  | 757 | /* | 
|  | 758 | * Zero out the hardware ring and return its address as our function | 
|  | 759 | * value. | 
|  | 760 | */ | 
|  | 761 | memset(hwring, 0, hwlen); | 
|  | 762 | return hwring; | 
|  | 763 | } | 
|  | 764 |  | 
|  | 765 | /** | 
|  | 766 | *	sgl_len - calculates the size of an SGL of the given capacity | 
|  | 767 | *	@n: the number of SGL entries | 
|  | 768 | * | 
|  | 769 | *	Calculates the number of flits (8-byte units) needed for a Direct | 
|  | 770 | *	Scatter/Gather List that can hold the given number of entries. | 
|  | 771 | */ | 
|  | 772 | static inline unsigned int sgl_len(unsigned int n) | 
|  | 773 | { | 
|  | 774 | /* | 
|  | 775 | * A Direct Scatter Gather List uses 32-bit lengths and 64-bit PCI DMA | 
|  | 776 | * addresses.  The DSGL Work Request starts off with a 32-bit DSGL | 
|  | 777 | * ULPTX header, then Length0, then Address0, then, for 1 <= i <= N, | 
|  | 778 | * repeated sequences of { Length[i], Length[i+1], Address[i], | 
|  | 779 | * Address[i+1] } (this ensures that all addresses are on 64-bit | 
|  | 780 | * boundaries).  If N is even, then Length[N+1] should be set to 0 and | 
|  | 781 | * Address[N+1] is omitted. | 
|  | 782 | * | 
|  | 783 | * The following calculation incorporates all of the above.  It's | 
|  | 784 | * somewhat hard to follow but, briefly: the "+2" accounts for the | 
|  | 785 | * first two flits which include the DSGL header, Length0 and | 
|  | 786 | * Address0; the "(3*(n-1))/2" covers the main body of list entries (3 | 
|  | 787 | * flits for every pair of the remaining N) +1 if (n-1) is odd; and | 
|  | 788 | * finally the "+((n-1)&1)" adds the one remaining flit needed if | 
|  | 789 | * (n-1) is odd ... | 
|  | 790 | */ | 
|  | 791 | n--; | 
|  | 792 | return (3 * n) / 2 + (n & 1) + 2; | 
|  | 793 | } | 
|  | 794 |  | 
|  | 795 | /** | 
|  | 796 | *	flits_to_desc - returns the num of TX descriptors for the given flits | 
|  | 797 | *	@flits: the number of flits | 
|  | 798 | * | 
|  | 799 | *	Returns the number of TX descriptors needed for the supplied number | 
|  | 800 | *	of flits. | 
|  | 801 | */ | 
|  | 802 | static inline unsigned int flits_to_desc(unsigned int flits) | 
|  | 803 | { | 
|  | 804 | BUG_ON(flits > SGE_MAX_WR_LEN / sizeof(__be64)); | 
|  | 805 | return DIV_ROUND_UP(flits, TXD_PER_EQ_UNIT); | 
|  | 806 | } | 
|  | 807 |  | 
|  | 808 | /** | 
|  | 809 | *	is_eth_imm - can an Ethernet packet be sent as immediate data? | 
|  | 810 | *	@skb: the packet | 
|  | 811 | * | 
|  | 812 | *	Returns whether an Ethernet packet is small enough to fit completely as | 
|  | 813 | *	immediate data. | 
|  | 814 | */ | 
|  | 815 | static inline int is_eth_imm(const struct sk_buff *skb) | 
|  | 816 | { | 
|  | 817 | /* | 
|  | 818 | * The VF Driver uses the FW_ETH_TX_PKT_VM_WR firmware Work Request | 
|  | 819 | * which does not accommodate immediate data.  We could dike out all | 
|  | 820 | * of the support code for immediate data but that would tie our hands | 
|  | 821 | * too much if we ever want to enhace the firmware.  It would also | 
|  | 822 | * create more differences between the PF and VF Drivers. | 
|  | 823 | */ | 
|  | 824 | return false; | 
|  | 825 | } | 
|  | 826 |  | 
|  | 827 | /** | 
|  | 828 | *	calc_tx_flits - calculate the number of flits for a packet TX WR | 
|  | 829 | *	@skb: the packet | 
|  | 830 | * | 
|  | 831 | *	Returns the number of flits needed for a TX Work Request for the | 
|  | 832 | *	given Ethernet packet, including the needed WR and CPL headers. | 
|  | 833 | */ | 
|  | 834 | static inline unsigned int calc_tx_flits(const struct sk_buff *skb) | 
|  | 835 | { | 
|  | 836 | unsigned int flits; | 
|  | 837 |  | 
|  | 838 | /* | 
|  | 839 | * If the skb is small enough, we can pump it out as a work request | 
|  | 840 | * with only immediate data.  In that case we just have to have the | 
|  | 841 | * TX Packet header plus the skb data in the Work Request. | 
|  | 842 | */ | 
|  | 843 | if (is_eth_imm(skb)) | 
|  | 844 | return DIV_ROUND_UP(skb->len + sizeof(struct cpl_tx_pkt), | 
|  | 845 | sizeof(__be64)); | 
|  | 846 |  | 
|  | 847 | /* | 
|  | 848 | * Otherwise, we're going to have to construct a Scatter gather list | 
|  | 849 | * of the skb body and fragments.  We also include the flits necessary | 
|  | 850 | * for the TX Packet Work Request and CPL.  We always have a firmware | 
|  | 851 | * Write Header (incorporated as part of the cpl_tx_pkt_lso and | 
|  | 852 | * cpl_tx_pkt structures), followed by either a TX Packet Write CPL | 
|  | 853 | * message or, if we're doing a Large Send Offload, an LSO CPL message | 
|  | 854 | * with an embeded TX Packet Write CPL message. | 
|  | 855 | */ | 
|  | 856 | flits = sgl_len(skb_shinfo(skb)->nr_frags + 1); | 
|  | 857 | if (skb_shinfo(skb)->gso_size) | 
|  | 858 | flits += (sizeof(struct fw_eth_tx_pkt_vm_wr) + | 
|  | 859 | sizeof(struct cpl_tx_pkt_lso_core) + | 
|  | 860 | sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64); | 
|  | 861 | else | 
|  | 862 | flits += (sizeof(struct fw_eth_tx_pkt_vm_wr) + | 
|  | 863 | sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64); | 
|  | 864 | return flits; | 
|  | 865 | } | 
|  | 866 |  | 
|  | 867 | /** | 
|  | 868 | *	write_sgl - populate a Scatter/Gather List for a packet | 
|  | 869 | *	@skb: the packet | 
|  | 870 | *	@tq: the TX queue we are writing into | 
|  | 871 | *	@sgl: starting location for writing the SGL | 
|  | 872 | *	@end: points right after the end of the SGL | 
|  | 873 | *	@start: start offset into skb main-body data to include in the SGL | 
|  | 874 | *	@addr: the list of DMA bus addresses for the SGL elements | 
|  | 875 | * | 
|  | 876 | *	Generates a Scatter/Gather List for the buffers that make up a packet. | 
|  | 877 | *	The caller must provide adequate space for the SGL that will be written. | 
|  | 878 | *	The SGL includes all of the packet's page fragments and the data in its | 
|  | 879 | *	main body except for the first @start bytes.  @pos must be 16-byte | 
|  | 880 | *	aligned and within a TX descriptor with available space.  @end points | 
|  | 881 | *	write after the end of the SGL but does not account for any potential | 
|  | 882 | *	wrap around, i.e., @end > @tq->stat. | 
|  | 883 | */ | 
|  | 884 | static void write_sgl(const struct sk_buff *skb, struct sge_txq *tq, | 
|  | 885 | struct ulptx_sgl *sgl, u64 *end, unsigned int start, | 
|  | 886 | const dma_addr_t *addr) | 
|  | 887 | { | 
|  | 888 | unsigned int i, len; | 
|  | 889 | struct ulptx_sge_pair *to; | 
|  | 890 | const struct skb_shared_info *si = skb_shinfo(skb); | 
|  | 891 | unsigned int nfrags = si->nr_frags; | 
|  | 892 | struct ulptx_sge_pair buf[MAX_SKB_FRAGS / 2 + 1]; | 
|  | 893 |  | 
|  | 894 | len = skb_headlen(skb) - start; | 
|  | 895 | if (likely(len)) { | 
|  | 896 | sgl->len0 = htonl(len); | 
|  | 897 | sgl->addr0 = cpu_to_be64(addr[0] + start); | 
|  | 898 | nfrags++; | 
|  | 899 | } else { | 
|  | 900 | sgl->len0 = htonl(si->frags[0].size); | 
|  | 901 | sgl->addr0 = cpu_to_be64(addr[1]); | 
|  | 902 | } | 
|  | 903 |  | 
|  | 904 | sgl->cmd_nsge = htonl(ULPTX_CMD(ULP_TX_SC_DSGL) | | 
|  | 905 | ULPTX_NSGE(nfrags)); | 
|  | 906 | if (likely(--nfrags == 0)) | 
|  | 907 | return; | 
|  | 908 | /* | 
|  | 909 | * Most of the complexity below deals with the possibility we hit the | 
|  | 910 | * end of the queue in the middle of writing the SGL.  For this case | 
|  | 911 | * only we create the SGL in a temporary buffer and then copy it. | 
|  | 912 | */ | 
|  | 913 | to = (u8 *)end > (u8 *)tq->stat ? buf : sgl->sge; | 
|  | 914 |  | 
|  | 915 | for (i = (nfrags != si->nr_frags); nfrags >= 2; nfrags -= 2, to++) { | 
|  | 916 | to->len[0] = cpu_to_be32(si->frags[i].size); | 
|  | 917 | to->len[1] = cpu_to_be32(si->frags[++i].size); | 
|  | 918 | to->addr[0] = cpu_to_be64(addr[i]); | 
|  | 919 | to->addr[1] = cpu_to_be64(addr[++i]); | 
|  | 920 | } | 
|  | 921 | if (nfrags) { | 
|  | 922 | to->len[0] = cpu_to_be32(si->frags[i].size); | 
|  | 923 | to->len[1] = cpu_to_be32(0); | 
|  | 924 | to->addr[0] = cpu_to_be64(addr[i + 1]); | 
|  | 925 | } | 
|  | 926 | if (unlikely((u8 *)end > (u8 *)tq->stat)) { | 
|  | 927 | unsigned int part0 = (u8 *)tq->stat - (u8 *)sgl->sge, part1; | 
|  | 928 |  | 
|  | 929 | if (likely(part0)) | 
|  | 930 | memcpy(sgl->sge, buf, part0); | 
|  | 931 | part1 = (u8 *)end - (u8 *)tq->stat; | 
|  | 932 | memcpy(tq->desc, (u8 *)buf + part0, part1); | 
|  | 933 | end = (void *)tq->desc + part1; | 
|  | 934 | } | 
|  | 935 | if ((uintptr_t)end & 8)           /* 0-pad to multiple of 16 */ | 
|  | 936 | *(u64 *)end = 0; | 
|  | 937 | } | 
|  | 938 |  | 
|  | 939 | /** | 
|  | 940 | *	check_ring_tx_db - check and potentially ring a TX queue's doorbell | 
|  | 941 | *	@adapter: the adapter | 
|  | 942 | *	@tq: the TX queue | 
|  | 943 | *	@n: number of new descriptors to give to HW | 
|  | 944 | * | 
|  | 945 | *	Ring the doorbel for a TX queue. | 
|  | 946 | */ | 
|  | 947 | static inline void ring_tx_db(struct adapter *adapter, struct sge_txq *tq, | 
|  | 948 | int n) | 
|  | 949 | { | 
|  | 950 | /* | 
|  | 951 | * Warn if we write doorbells with the wrong priority and write | 
|  | 952 | * descriptors before telling HW. | 
|  | 953 | */ | 
|  | 954 | WARN_ON((QID(tq->cntxt_id) | PIDX(n)) & DBPRIO); | 
|  | 955 | wmb(); | 
|  | 956 | t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL, | 
|  | 957 | QID(tq->cntxt_id) | PIDX(n)); | 
|  | 958 | } | 
|  | 959 |  | 
|  | 960 | /** | 
|  | 961 | *	inline_tx_skb - inline a packet's data into TX descriptors | 
|  | 962 | *	@skb: the packet | 
|  | 963 | *	@tq: the TX queue where the packet will be inlined | 
|  | 964 | *	@pos: starting position in the TX queue to inline the packet | 
|  | 965 | * | 
|  | 966 | *	Inline a packet's contents directly into TX descriptors, starting at | 
|  | 967 | *	the given position within the TX DMA ring. | 
|  | 968 | *	Most of the complexity of this operation is dealing with wrap arounds | 
|  | 969 | *	in the middle of the packet we want to inline. | 
|  | 970 | */ | 
|  | 971 | static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *tq, | 
|  | 972 | void *pos) | 
|  | 973 | { | 
|  | 974 | u64 *p; | 
|  | 975 | int left = (void *)tq->stat - pos; | 
|  | 976 |  | 
|  | 977 | if (likely(skb->len <= left)) { | 
|  | 978 | if (likely(!skb->data_len)) | 
|  | 979 | skb_copy_from_linear_data(skb, pos, skb->len); | 
|  | 980 | else | 
|  | 981 | skb_copy_bits(skb, 0, pos, skb->len); | 
|  | 982 | pos += skb->len; | 
|  | 983 | } else { | 
|  | 984 | skb_copy_bits(skb, 0, pos, left); | 
|  | 985 | skb_copy_bits(skb, left, tq->desc, skb->len - left); | 
|  | 986 | pos = (void *)tq->desc + (skb->len - left); | 
|  | 987 | } | 
|  | 988 |  | 
|  | 989 | /* 0-pad to multiple of 16 */ | 
|  | 990 | p = PTR_ALIGN(pos, 8); | 
|  | 991 | if ((uintptr_t)p & 8) | 
|  | 992 | *p = 0; | 
|  | 993 | } | 
|  | 994 |  | 
|  | 995 | /* | 
|  | 996 | * Figure out what HW csum a packet wants and return the appropriate control | 
|  | 997 | * bits. | 
|  | 998 | */ | 
|  | 999 | static u64 hwcsum(const struct sk_buff *skb) | 
|  | 1000 | { | 
|  | 1001 | int csum_type; | 
|  | 1002 | const struct iphdr *iph = ip_hdr(skb); | 
|  | 1003 |  | 
|  | 1004 | if (iph->version == 4) { | 
|  | 1005 | if (iph->protocol == IPPROTO_TCP) | 
|  | 1006 | csum_type = TX_CSUM_TCPIP; | 
|  | 1007 | else if (iph->protocol == IPPROTO_UDP) | 
|  | 1008 | csum_type = TX_CSUM_UDPIP; | 
|  | 1009 | else { | 
|  | 1010 | nocsum: | 
|  | 1011 | /* | 
|  | 1012 | * unknown protocol, disable HW csum | 
|  | 1013 | * and hope a bad packet is detected | 
|  | 1014 | */ | 
|  | 1015 | return TXPKT_L4CSUM_DIS; | 
|  | 1016 | } | 
|  | 1017 | } else { | 
|  | 1018 | /* | 
|  | 1019 | * this doesn't work with extension headers | 
|  | 1020 | */ | 
|  | 1021 | const struct ipv6hdr *ip6h = (const struct ipv6hdr *)iph; | 
|  | 1022 |  | 
|  | 1023 | if (ip6h->nexthdr == IPPROTO_TCP) | 
|  | 1024 | csum_type = TX_CSUM_TCPIP6; | 
|  | 1025 | else if (ip6h->nexthdr == IPPROTO_UDP) | 
|  | 1026 | csum_type = TX_CSUM_UDPIP6; | 
|  | 1027 | else | 
|  | 1028 | goto nocsum; | 
|  | 1029 | } | 
|  | 1030 |  | 
|  | 1031 | if (likely(csum_type >= TX_CSUM_TCPIP)) | 
|  | 1032 | return TXPKT_CSUM_TYPE(csum_type) | | 
|  | 1033 | TXPKT_IPHDR_LEN(skb_network_header_len(skb)) | | 
|  | 1034 | TXPKT_ETHHDR_LEN(skb_network_offset(skb) - ETH_HLEN); | 
|  | 1035 | else { | 
|  | 1036 | int start = skb_transport_offset(skb); | 
|  | 1037 |  | 
|  | 1038 | return TXPKT_CSUM_TYPE(csum_type) | | 
|  | 1039 | TXPKT_CSUM_START(start) | | 
|  | 1040 | TXPKT_CSUM_LOC(start + skb->csum_offset); | 
|  | 1041 | } | 
|  | 1042 | } | 
|  | 1043 |  | 
|  | 1044 | /* | 
|  | 1045 | * Stop an Ethernet TX queue and record that state change. | 
|  | 1046 | */ | 
|  | 1047 | static void txq_stop(struct sge_eth_txq *txq) | 
|  | 1048 | { | 
|  | 1049 | netif_tx_stop_queue(txq->txq); | 
|  | 1050 | txq->q.stops++; | 
|  | 1051 | } | 
|  | 1052 |  | 
|  | 1053 | /* | 
|  | 1054 | * Advance our software state for a TX queue by adding n in use descriptors. | 
|  | 1055 | */ | 
|  | 1056 | static inline void txq_advance(struct sge_txq *tq, unsigned int n) | 
|  | 1057 | { | 
|  | 1058 | tq->in_use += n; | 
|  | 1059 | tq->pidx += n; | 
|  | 1060 | if (tq->pidx >= tq->size) | 
|  | 1061 | tq->pidx -= tq->size; | 
|  | 1062 | } | 
|  | 1063 |  | 
|  | 1064 | /** | 
|  | 1065 | *	t4vf_eth_xmit - add a packet to an Ethernet TX queue | 
|  | 1066 | *	@skb: the packet | 
|  | 1067 | *	@dev: the egress net device | 
|  | 1068 | * | 
|  | 1069 | *	Add a packet to an SGE Ethernet TX queue.  Runs with softirqs disabled. | 
|  | 1070 | */ | 
|  | 1071 | int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) | 
|  | 1072 | { | 
|  | 1073 | u64 cntrl, *end; | 
|  | 1074 | int qidx, credits; | 
|  | 1075 | unsigned int flits, ndesc; | 
|  | 1076 | struct adapter *adapter; | 
|  | 1077 | struct sge_eth_txq *txq; | 
|  | 1078 | const struct port_info *pi; | 
|  | 1079 | struct fw_eth_tx_pkt_vm_wr *wr; | 
|  | 1080 | struct cpl_tx_pkt_core *cpl; | 
|  | 1081 | const struct skb_shared_info *ssi; | 
|  | 1082 | dma_addr_t addr[MAX_SKB_FRAGS + 1]; | 
|  | 1083 | const size_t fw_hdr_copy_len = (sizeof(wr->ethmacdst) + | 
|  | 1084 | sizeof(wr->ethmacsrc) + | 
|  | 1085 | sizeof(wr->ethtype) + | 
|  | 1086 | sizeof(wr->vlantci)); | 
|  | 1087 |  | 
|  | 1088 | /* | 
|  | 1089 | * The chip minimum packet length is 10 octets but the firmware | 
|  | 1090 | * command that we are using requires that we copy the Ethernet header | 
|  | 1091 | * (including the VLAN tag) into the header so we reject anything | 
|  | 1092 | * smaller than that ... | 
|  | 1093 | */ | 
|  | 1094 | if (unlikely(skb->len < fw_hdr_copy_len)) | 
|  | 1095 | goto out_free; | 
|  | 1096 |  | 
|  | 1097 | /* | 
|  | 1098 | * Figure out which TX Queue we're going to use. | 
|  | 1099 | */ | 
|  | 1100 | pi = netdev_priv(dev); | 
|  | 1101 | adapter = pi->adapter; | 
|  | 1102 | qidx = skb_get_queue_mapping(skb); | 
|  | 1103 | BUG_ON(qidx >= pi->nqsets); | 
|  | 1104 | txq = &adapter->sge.ethtxq[pi->first_qset + qidx]; | 
|  | 1105 |  | 
|  | 1106 | /* | 
|  | 1107 | * Take this opportunity to reclaim any TX Descriptors whose DMA | 
|  | 1108 | * transfers have completed. | 
|  | 1109 | */ | 
|  | 1110 | reclaim_completed_tx(adapter, &txq->q, true); | 
|  | 1111 |  | 
|  | 1112 | /* | 
|  | 1113 | * Calculate the number of flits and TX Descriptors we're going to | 
|  | 1114 | * need along with how many TX Descriptors will be left over after | 
|  | 1115 | * we inject our Work Request. | 
|  | 1116 | */ | 
|  | 1117 | flits = calc_tx_flits(skb); | 
|  | 1118 | ndesc = flits_to_desc(flits); | 
|  | 1119 | credits = txq_avail(&txq->q) - ndesc; | 
|  | 1120 |  | 
|  | 1121 | if (unlikely(credits < 0)) { | 
|  | 1122 | /* | 
|  | 1123 | * Not enough room for this packet's Work Request.  Stop the | 
|  | 1124 | * TX Queue and return a "busy" condition.  The queue will get | 
|  | 1125 | * started later on when the firmware informs us that space | 
|  | 1126 | * has opened up. | 
|  | 1127 | */ | 
|  | 1128 | txq_stop(txq); | 
|  | 1129 | dev_err(adapter->pdev_dev, | 
|  | 1130 | "%s: TX ring %u full while queue awake!\n", | 
|  | 1131 | dev->name, qidx); | 
|  | 1132 | return NETDEV_TX_BUSY; | 
|  | 1133 | } | 
|  | 1134 |  | 
|  | 1135 | if (!is_eth_imm(skb) && | 
|  | 1136 | unlikely(map_skb(adapter->pdev_dev, skb, addr) < 0)) { | 
|  | 1137 | /* | 
|  | 1138 | * We need to map the skb into PCI DMA space (because it can't | 
|  | 1139 | * be in-lined directly into the Work Request) and the mapping | 
|  | 1140 | * operation failed.  Record the error and drop the packet. | 
|  | 1141 | */ | 
|  | 1142 | txq->mapping_err++; | 
|  | 1143 | goto out_free; | 
|  | 1144 | } | 
|  | 1145 |  | 
|  | 1146 | if (unlikely(credits < ETHTXQ_STOP_THRES)) { | 
|  | 1147 | /* | 
|  | 1148 | * After we're done injecting the Work Request for this | 
|  | 1149 | * packet, we'll be below our "stop threshhold" so stop the TX | 
|  | 1150 | * Queue now.  The queue will get started later on when the | 
|  | 1151 | * firmware informs us that space has opened up. | 
|  | 1152 | */ | 
|  | 1153 | txq_stop(txq); | 
|  | 1154 | } | 
|  | 1155 |  | 
|  | 1156 | /* | 
|  | 1157 | * Start filling in our Work Request.  Note that we do _not_ handle | 
|  | 1158 | * the WR Header wrapping around the TX Descriptor Ring.  If our | 
|  | 1159 | * maximum header size ever exceeds one TX Descriptor, we'll need to | 
|  | 1160 | * do something else here. | 
|  | 1161 | */ | 
|  | 1162 | BUG_ON(DIV_ROUND_UP(ETHTXQ_MAX_HDR, TXD_PER_EQ_UNIT) > 1); | 
|  | 1163 | wr = (void *)&txq->q.desc[txq->q.pidx]; | 
|  | 1164 | wr->equiq_to_len16 = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(flits, 2))); | 
|  | 1165 | wr->r3[0] = cpu_to_be64(0); | 
|  | 1166 | wr->r3[1] = cpu_to_be64(0); | 
|  | 1167 | skb_copy_from_linear_data(skb, (void *)wr->ethmacdst, fw_hdr_copy_len); | 
|  | 1168 | end = (u64 *)wr + flits; | 
|  | 1169 |  | 
|  | 1170 | /* | 
|  | 1171 | * If this is a Large Send Offload packet we'll put in an LSO CPL | 
|  | 1172 | * message with an encapsulated TX Packet CPL message.  Otherwise we | 
|  | 1173 | * just use a TX Packet CPL message. | 
|  | 1174 | */ | 
|  | 1175 | ssi = skb_shinfo(skb); | 
|  | 1176 | if (ssi->gso_size) { | 
|  | 1177 | struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); | 
|  | 1178 | bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0; | 
|  | 1179 | int l3hdr_len = skb_network_header_len(skb); | 
|  | 1180 | int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN; | 
|  | 1181 |  | 
|  | 1182 | wr->op_immdlen = | 
|  | 1183 | cpu_to_be32(FW_WR_OP(FW_ETH_TX_PKT_VM_WR) | | 
|  | 1184 | FW_WR_IMMDLEN(sizeof(*lso) + | 
|  | 1185 | sizeof(*cpl))); | 
|  | 1186 | /* | 
|  | 1187 | * Fill in the LSO CPL message. | 
|  | 1188 | */ | 
|  | 1189 | lso->lso_ctrl = | 
|  | 1190 | cpu_to_be32(LSO_OPCODE(CPL_TX_PKT_LSO) | | 
|  | 1191 | LSO_FIRST_SLICE | | 
|  | 1192 | LSO_LAST_SLICE | | 
|  | 1193 | LSO_IPV6(v6) | | 
|  | 1194 | LSO_ETHHDR_LEN(eth_xtra_len/4) | | 
|  | 1195 | LSO_IPHDR_LEN(l3hdr_len/4) | | 
|  | 1196 | LSO_TCPHDR_LEN(tcp_hdr(skb)->doff)); | 
|  | 1197 | lso->ipid_ofst = cpu_to_be16(0); | 
|  | 1198 | lso->mss = cpu_to_be16(ssi->gso_size); | 
|  | 1199 | lso->seqno_offset = cpu_to_be32(0); | 
|  | 1200 | lso->len = cpu_to_be32(skb->len); | 
|  | 1201 |  | 
|  | 1202 | /* | 
|  | 1203 | * Set up TX Packet CPL pointer, control word and perform | 
|  | 1204 | * accounting. | 
|  | 1205 | */ | 
|  | 1206 | cpl = (void *)(lso + 1); | 
|  | 1207 | cntrl = (TXPKT_CSUM_TYPE(v6 ? TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) | | 
|  | 1208 | TXPKT_IPHDR_LEN(l3hdr_len) | | 
|  | 1209 | TXPKT_ETHHDR_LEN(eth_xtra_len)); | 
|  | 1210 | txq->tso++; | 
|  | 1211 | txq->tx_cso += ssi->gso_segs; | 
|  | 1212 | } else { | 
|  | 1213 | int len; | 
|  | 1214 |  | 
|  | 1215 | len = is_eth_imm(skb) ? skb->len + sizeof(*cpl) : sizeof(*cpl); | 
|  | 1216 | wr->op_immdlen = | 
|  | 1217 | cpu_to_be32(FW_WR_OP(FW_ETH_TX_PKT_VM_WR) | | 
|  | 1218 | FW_WR_IMMDLEN(len)); | 
|  | 1219 |  | 
|  | 1220 | /* | 
|  | 1221 | * Set up TX Packet CPL pointer, control word and perform | 
|  | 1222 | * accounting. | 
|  | 1223 | */ | 
|  | 1224 | cpl = (void *)(wr + 1); | 
|  | 1225 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | 
|  | 1226 | cntrl = hwcsum(skb) | TXPKT_IPCSUM_DIS; | 
|  | 1227 | txq->tx_cso++; | 
|  | 1228 | } else | 
|  | 1229 | cntrl = TXPKT_L4CSUM_DIS | TXPKT_IPCSUM_DIS; | 
|  | 1230 | } | 
|  | 1231 |  | 
|  | 1232 | /* | 
|  | 1233 | * If there's a VLAN tag present, add that to the list of things to | 
|  | 1234 | * do in this Work Request. | 
|  | 1235 | */ | 
|  | 1236 | if (vlan_tx_tag_present(skb)) { | 
|  | 1237 | txq->vlan_ins++; | 
|  | 1238 | cntrl |= TXPKT_VLAN_VLD | TXPKT_VLAN(vlan_tx_tag_get(skb)); | 
|  | 1239 | } | 
|  | 1240 |  | 
|  | 1241 | /* | 
|  | 1242 | * Fill in the TX Packet CPL message header. | 
|  | 1243 | */ | 
|  | 1244 | cpl->ctrl0 = cpu_to_be32(TXPKT_OPCODE(CPL_TX_PKT_XT) | | 
|  | 1245 | TXPKT_INTF(pi->port_id) | | 
|  | 1246 | TXPKT_PF(0)); | 
|  | 1247 | cpl->pack = cpu_to_be16(0); | 
|  | 1248 | cpl->len = cpu_to_be16(skb->len); | 
|  | 1249 | cpl->ctrl1 = cpu_to_be64(cntrl); | 
|  | 1250 |  | 
|  | 1251 | #ifdef T4_TRACE | 
|  | 1252 | T4_TRACE5(adapter->tb[txq->q.cntxt_id & 7], | 
|  | 1253 | "eth_xmit: ndesc %u, credits %u, pidx %u, len %u, frags %u", | 
|  | 1254 | ndesc, credits, txq->q.pidx, skb->len, ssi->nr_frags); | 
|  | 1255 | #endif | 
|  | 1256 |  | 
|  | 1257 | /* | 
|  | 1258 | * Fill in the body of the TX Packet CPL message with either in-lined | 
|  | 1259 | * data or a Scatter/Gather List. | 
|  | 1260 | */ | 
|  | 1261 | if (is_eth_imm(skb)) { | 
|  | 1262 | /* | 
|  | 1263 | * In-line the packet's data and free the skb since we don't | 
|  | 1264 | * need it any longer. | 
|  | 1265 | */ | 
|  | 1266 | inline_tx_skb(skb, &txq->q, cpl + 1); | 
|  | 1267 | dev_kfree_skb(skb); | 
|  | 1268 | } else { | 
|  | 1269 | /* | 
|  | 1270 | * Write the skb's Scatter/Gather list into the TX Packet CPL | 
|  | 1271 | * message and retain a pointer to the skb so we can free it | 
|  | 1272 | * later when its DMA completes.  (We store the skb pointer | 
|  | 1273 | * in the Software Descriptor corresponding to the last TX | 
|  | 1274 | * Descriptor used by the Work Request.) | 
|  | 1275 | * | 
|  | 1276 | * The retained skb will be freed when the corresponding TX | 
|  | 1277 | * Descriptors are reclaimed after their DMAs complete. | 
|  | 1278 | * However, this could take quite a while since, in general, | 
|  | 1279 | * the hardware is set up to be lazy about sending DMA | 
|  | 1280 | * completion notifications to us and we mostly perform TX | 
|  | 1281 | * reclaims in the transmit routine. | 
|  | 1282 | * | 
|  | 1283 | * This is good for performamce but means that we rely on new | 
|  | 1284 | * TX packets arriving to run the destructors of completed | 
|  | 1285 | * packets, which open up space in their sockets' send queues. | 
|  | 1286 | * Sometimes we do not get such new packets causing TX to | 
|  | 1287 | * stall.  A single UDP transmitter is a good example of this | 
|  | 1288 | * situation.  We have a clean up timer that periodically | 
|  | 1289 | * reclaims completed packets but it doesn't run often enough | 
|  | 1290 | * (nor do we want it to) to prevent lengthy stalls.  A | 
|  | 1291 | * solution to this problem is to run the destructor early, | 
|  | 1292 | * after the packet is queued but before it's DMAd.  A con is | 
|  | 1293 | * that we lie to socket memory accounting, but the amount of | 
|  | 1294 | * extra memory is reasonable (limited by the number of TX | 
|  | 1295 | * descriptors), the packets do actually get freed quickly by | 
|  | 1296 | * new packets almost always, and for protocols like TCP that | 
|  | 1297 | * wait for acks to really free up the data the extra memory | 
|  | 1298 | * is even less.  On the positive side we run the destructors | 
|  | 1299 | * on the sending CPU rather than on a potentially different | 
| Casey Leedom | 64bb336 | 2010-06-29 12:53:39 +0000 | [diff] [blame] | 1300 | * completing CPU, usually a good thing. | 
| Casey Leedom | c6e0d91 | 2010-06-25 12:13:28 +0000 | [diff] [blame] | 1301 | * | 
|  | 1302 | * Run the destructor before telling the DMA engine about the | 
|  | 1303 | * packet to make sure it doesn't complete and get freed | 
|  | 1304 | * prematurely. | 
|  | 1305 | */ | 
|  | 1306 | struct ulptx_sgl *sgl = (struct ulptx_sgl *)(cpl + 1); | 
|  | 1307 | struct sge_txq *tq = &txq->q; | 
|  | 1308 | int last_desc; | 
|  | 1309 |  | 
|  | 1310 | /* | 
|  | 1311 | * If the Work Request header was an exact multiple of our TX | 
|  | 1312 | * Descriptor length, then it's possible that the starting SGL | 
|  | 1313 | * pointer lines up exactly with the end of our TX Descriptor | 
|  | 1314 | * ring.  If that's the case, wrap around to the beginning | 
|  | 1315 | * here ... | 
|  | 1316 | */ | 
|  | 1317 | if (unlikely((void *)sgl == (void *)tq->stat)) { | 
|  | 1318 | sgl = (void *)tq->desc; | 
|  | 1319 | end = (void *)((void *)tq->desc + | 
|  | 1320 | ((void *)end - (void *)tq->stat)); | 
|  | 1321 | } | 
|  | 1322 |  | 
|  | 1323 | write_sgl(skb, tq, sgl, end, 0, addr); | 
|  | 1324 | skb_orphan(skb); | 
|  | 1325 |  | 
|  | 1326 | last_desc = tq->pidx + ndesc - 1; | 
|  | 1327 | if (last_desc >= tq->size) | 
|  | 1328 | last_desc -= tq->size; | 
|  | 1329 | tq->sdesc[last_desc].skb = skb; | 
|  | 1330 | tq->sdesc[last_desc].sgl = sgl; | 
|  | 1331 | } | 
|  | 1332 |  | 
|  | 1333 | /* | 
|  | 1334 | * Advance our internal TX Queue state, tell the hardware about | 
|  | 1335 | * the new TX descriptors and return success. | 
|  | 1336 | */ | 
|  | 1337 | txq_advance(&txq->q, ndesc); | 
|  | 1338 | dev->trans_start = jiffies; | 
|  | 1339 | ring_tx_db(adapter, &txq->q, ndesc); | 
|  | 1340 | return NETDEV_TX_OK; | 
|  | 1341 |  | 
|  | 1342 | out_free: | 
|  | 1343 | /* | 
|  | 1344 | * An error of some sort happened.  Free the TX skb and tell the | 
|  | 1345 | * OS that we've "dealt" with the packet ... | 
|  | 1346 | */ | 
|  | 1347 | dev_kfree_skb(skb); | 
|  | 1348 | return NETDEV_TX_OK; | 
|  | 1349 | } | 
|  | 1350 |  | 
|  | 1351 | /** | 
|  | 1352 | *	t4vf_pktgl_free - free a packet gather list | 
|  | 1353 | *	@gl: the gather list | 
|  | 1354 | * | 
|  | 1355 | *	Releases the pages of a packet gather list.  We do not own the last | 
|  | 1356 | *	page on the list and do not free it. | 
|  | 1357 | */ | 
|  | 1358 | void t4vf_pktgl_free(const struct pkt_gl *gl) | 
|  | 1359 | { | 
|  | 1360 | int frag; | 
|  | 1361 |  | 
|  | 1362 | frag = gl->nfrags - 1; | 
|  | 1363 | while (frag--) | 
|  | 1364 | put_page(gl->frags[frag].page); | 
|  | 1365 | } | 
|  | 1366 |  | 
|  | 1367 | /** | 
|  | 1368 | *	copy_frags - copy fragments from gather list into skb_shared_info | 
|  | 1369 | *	@si: destination skb shared info structure | 
|  | 1370 | *	@gl: source internal packet gather list | 
|  | 1371 | *	@offset: packet start offset in first page | 
|  | 1372 | * | 
|  | 1373 | *	Copy an internal packet gather list into a Linux skb_shared_info | 
|  | 1374 | *	structure. | 
|  | 1375 | */ | 
|  | 1376 | static inline void copy_frags(struct skb_shared_info *si, | 
|  | 1377 | const struct pkt_gl *gl, | 
|  | 1378 | unsigned int offset) | 
|  | 1379 | { | 
|  | 1380 | unsigned int n; | 
|  | 1381 |  | 
|  | 1382 | /* usually there's just one frag */ | 
|  | 1383 | si->frags[0].page = gl->frags[0].page; | 
|  | 1384 | si->frags[0].page_offset = gl->frags[0].page_offset + offset; | 
|  | 1385 | si->frags[0].size = gl->frags[0].size - offset; | 
|  | 1386 | si->nr_frags = gl->nfrags; | 
|  | 1387 |  | 
|  | 1388 | n = gl->nfrags - 1; | 
|  | 1389 | if (n) | 
|  | 1390 | memcpy(&si->frags[1], &gl->frags[1], n * sizeof(skb_frag_t)); | 
|  | 1391 |  | 
|  | 1392 | /* get a reference to the last page, we don't own it */ | 
|  | 1393 | get_page(gl->frags[n].page); | 
|  | 1394 | } | 
|  | 1395 |  | 
|  | 1396 | /** | 
|  | 1397 | *	do_gro - perform Generic Receive Offload ingress packet processing | 
|  | 1398 | *	@rxq: ingress RX Ethernet Queue | 
|  | 1399 | *	@gl: gather list for ingress packet | 
|  | 1400 | *	@pkt: CPL header for last packet fragment | 
|  | 1401 | * | 
|  | 1402 | *	Perform Generic Receive Offload (GRO) ingress packet processing. | 
|  | 1403 | *	We use the standard Linux GRO interfaces for this. | 
|  | 1404 | */ | 
|  | 1405 | static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl, | 
|  | 1406 | const struct cpl_rx_pkt *pkt) | 
|  | 1407 | { | 
|  | 1408 | int ret; | 
|  | 1409 | struct sk_buff *skb; | 
|  | 1410 |  | 
|  | 1411 | skb = napi_get_frags(&rxq->rspq.napi); | 
|  | 1412 | if (unlikely(!skb)) { | 
|  | 1413 | t4vf_pktgl_free(gl); | 
|  | 1414 | rxq->stats.rx_drops++; | 
|  | 1415 | return; | 
|  | 1416 | } | 
|  | 1417 |  | 
|  | 1418 | copy_frags(skb_shinfo(skb), gl, PKTSHIFT); | 
|  | 1419 | skb->len = gl->tot_len - PKTSHIFT; | 
|  | 1420 | skb->data_len = skb->len; | 
|  | 1421 | skb->truesize += skb->data_len; | 
|  | 1422 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 
|  | 1423 | skb_record_rx_queue(skb, rxq->rspq.idx); | 
|  | 1424 |  | 
|  | 1425 | if (unlikely(pkt->vlan_ex)) { | 
|  | 1426 | struct port_info *pi = netdev_priv(rxq->rspq.netdev); | 
|  | 1427 | struct vlan_group *grp = pi->vlan_grp; | 
|  | 1428 |  | 
|  | 1429 | rxq->stats.vlan_ex++; | 
|  | 1430 | if (likely(grp)) { | 
|  | 1431 | ret = vlan_gro_frags(&rxq->rspq.napi, grp, | 
|  | 1432 | be16_to_cpu(pkt->vlan)); | 
|  | 1433 | goto stats; | 
|  | 1434 | } | 
|  | 1435 | } | 
|  | 1436 | ret = napi_gro_frags(&rxq->rspq.napi); | 
|  | 1437 |  | 
|  | 1438 | stats: | 
|  | 1439 | if (ret == GRO_HELD) | 
|  | 1440 | rxq->stats.lro_pkts++; | 
|  | 1441 | else if (ret == GRO_MERGED || ret == GRO_MERGED_FREE) | 
|  | 1442 | rxq->stats.lro_merged++; | 
|  | 1443 | rxq->stats.pkts++; | 
|  | 1444 | rxq->stats.rx_cso++; | 
|  | 1445 | } | 
|  | 1446 |  | 
|  | 1447 | /** | 
|  | 1448 | *	t4vf_ethrx_handler - process an ingress ethernet packet | 
|  | 1449 | *	@rspq: the response queue that received the packet | 
|  | 1450 | *	@rsp: the response queue descriptor holding the RX_PKT message | 
|  | 1451 | *	@gl: the gather list of packet fragments | 
|  | 1452 | * | 
|  | 1453 | *	Process an ingress ethernet packet and deliver it to the stack. | 
|  | 1454 | */ | 
|  | 1455 | int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp, | 
|  | 1456 | const struct pkt_gl *gl) | 
|  | 1457 | { | 
|  | 1458 | struct sk_buff *skb; | 
|  | 1459 | struct port_info *pi; | 
|  | 1460 | struct skb_shared_info *ssi; | 
|  | 1461 | const struct cpl_rx_pkt *pkt = (void *)&rsp[1]; | 
|  | 1462 | bool csum_ok = pkt->csum_calc && !pkt->err_vec; | 
|  | 1463 | unsigned int len = be16_to_cpu(pkt->len); | 
|  | 1464 | struct sge_eth_rxq *rxq = container_of(rspq, struct sge_eth_rxq, rspq); | 
|  | 1465 |  | 
|  | 1466 | /* | 
|  | 1467 | * If this is a good TCP packet and we have Generic Receive Offload | 
|  | 1468 | * enabled, handle the packet in the GRO path. | 
|  | 1469 | */ | 
|  | 1470 | if ((pkt->l2info & cpu_to_be32(RXF_TCP)) && | 
|  | 1471 | (rspq->netdev->features & NETIF_F_GRO) && csum_ok && | 
|  | 1472 | !pkt->ip_frag) { | 
|  | 1473 | do_gro(rxq, gl, pkt); | 
|  | 1474 | return 0; | 
|  | 1475 | } | 
|  | 1476 |  | 
|  | 1477 | /* | 
|  | 1478 | * If the ingress packet is small enough, allocate an skb large enough | 
|  | 1479 | * for all of the data and copy it inline.  Otherwise, allocate an skb | 
|  | 1480 | * with enough room to pull in the header and reference the rest of | 
|  | 1481 | * the data via the skb fragment list. | 
|  | 1482 | */ | 
|  | 1483 | if (len <= RX_COPY_THRES) { | 
|  | 1484 | /* small packets have only one fragment */ | 
|  | 1485 | skb = alloc_skb(gl->frags[0].size, GFP_ATOMIC); | 
|  | 1486 | if (!skb) | 
|  | 1487 | goto nomem; | 
|  | 1488 | __skb_put(skb, gl->frags[0].size); | 
|  | 1489 | skb_copy_to_linear_data(skb, gl->va, gl->frags[0].size); | 
|  | 1490 | } else { | 
|  | 1491 | skb = alloc_skb(RX_PKT_PULL_LEN, GFP_ATOMIC); | 
|  | 1492 | if (!skb) | 
|  | 1493 | goto nomem; | 
|  | 1494 | __skb_put(skb, RX_PKT_PULL_LEN); | 
|  | 1495 | skb_copy_to_linear_data(skb, gl->va, RX_PKT_PULL_LEN); | 
|  | 1496 |  | 
|  | 1497 | ssi = skb_shinfo(skb); | 
|  | 1498 | ssi->frags[0].page = gl->frags[0].page; | 
|  | 1499 | ssi->frags[0].page_offset = (gl->frags[0].page_offset + | 
|  | 1500 | RX_PKT_PULL_LEN); | 
|  | 1501 | ssi->frags[0].size = gl->frags[0].size - RX_PKT_PULL_LEN; | 
|  | 1502 | if (gl->nfrags > 1) | 
|  | 1503 | memcpy(&ssi->frags[1], &gl->frags[1], | 
|  | 1504 | (gl->nfrags-1) * sizeof(skb_frag_t)); | 
|  | 1505 | ssi->nr_frags = gl->nfrags; | 
|  | 1506 | skb->len = len + PKTSHIFT; | 
|  | 1507 | skb->data_len = skb->len - RX_PKT_PULL_LEN; | 
|  | 1508 | skb->truesize += skb->data_len; | 
|  | 1509 |  | 
|  | 1510 | /* Get a reference for the last page, we don't own it */ | 
|  | 1511 | get_page(gl->frags[gl->nfrags - 1].page); | 
|  | 1512 | } | 
|  | 1513 |  | 
|  | 1514 | __skb_pull(skb, PKTSHIFT); | 
|  | 1515 | skb->protocol = eth_type_trans(skb, rspq->netdev); | 
|  | 1516 | skb_record_rx_queue(skb, rspq->idx); | 
|  | 1517 | skb->dev->last_rx = jiffies;                  /* XXX removed 2.6.29 */ | 
|  | 1518 | pi = netdev_priv(skb->dev); | 
|  | 1519 | rxq->stats.pkts++; | 
|  | 1520 |  | 
|  | 1521 | if (csum_ok && (pi->rx_offload & RX_CSO) && !pkt->err_vec && | 
|  | 1522 | (be32_to_cpu(pkt->l2info) & (RXF_UDP|RXF_TCP))) { | 
|  | 1523 | if (!pkt->ip_frag) | 
|  | 1524 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 
|  | 1525 | else { | 
|  | 1526 | __sum16 c = (__force __sum16)pkt->csum; | 
|  | 1527 | skb->csum = csum_unfold(c); | 
|  | 1528 | skb->ip_summed = CHECKSUM_COMPLETE; | 
|  | 1529 | } | 
|  | 1530 | rxq->stats.rx_cso++; | 
|  | 1531 | } else | 
|  | 1532 | skb->ip_summed = CHECKSUM_NONE; | 
|  | 1533 |  | 
|  | 1534 | if (unlikely(pkt->vlan_ex)) { | 
|  | 1535 | struct vlan_group *grp = pi->vlan_grp; | 
|  | 1536 |  | 
|  | 1537 | rxq->stats.vlan_ex++; | 
|  | 1538 | if (likely(grp)) | 
|  | 1539 | vlan_hwaccel_receive_skb(skb, grp, | 
|  | 1540 | be16_to_cpu(pkt->vlan)); | 
|  | 1541 | else | 
|  | 1542 | dev_kfree_skb_any(skb); | 
|  | 1543 | } else | 
|  | 1544 | netif_receive_skb(skb); | 
|  | 1545 |  | 
|  | 1546 | return 0; | 
|  | 1547 |  | 
|  | 1548 | nomem: | 
|  | 1549 | t4vf_pktgl_free(gl); | 
|  | 1550 | rxq->stats.rx_drops++; | 
|  | 1551 | return 0; | 
|  | 1552 | } | 
|  | 1553 |  | 
|  | 1554 | /** | 
|  | 1555 | *	is_new_response - check if a response is newly written | 
|  | 1556 | *	@rc: the response control descriptor | 
|  | 1557 | *	@rspq: the response queue | 
|  | 1558 | * | 
|  | 1559 | *	Returns true if a response descriptor contains a yet unprocessed | 
|  | 1560 | *	response. | 
|  | 1561 | */ | 
|  | 1562 | static inline bool is_new_response(const struct rsp_ctrl *rc, | 
|  | 1563 | const struct sge_rspq *rspq) | 
|  | 1564 | { | 
|  | 1565 | return RSPD_GEN(rc->type_gen) == rspq->gen; | 
|  | 1566 | } | 
|  | 1567 |  | 
|  | 1568 | /** | 
|  | 1569 | *	restore_rx_bufs - put back a packet's RX buffers | 
|  | 1570 | *	@gl: the packet gather list | 
|  | 1571 | *	@fl: the SGE Free List | 
|  | 1572 | *	@nfrags: how many fragments in @si | 
|  | 1573 | * | 
|  | 1574 | *	Called when we find out that the current packet, @si, can't be | 
|  | 1575 | *	processed right away for some reason.  This is a very rare event and | 
|  | 1576 | *	there's no effort to make this suspension/resumption process | 
|  | 1577 | *	particularly efficient. | 
|  | 1578 | * | 
|  | 1579 | *	We implement the suspension by putting all of the RX buffers associated | 
|  | 1580 | *	with the current packet back on the original Free List.  The buffers | 
|  | 1581 | *	have already been unmapped and are left unmapped, we mark them as | 
|  | 1582 | *	unmapped in order to prevent further unmapping attempts.  (Effectively | 
|  | 1583 | *	this function undoes the series of @unmap_rx_buf calls which were done | 
|  | 1584 | *	to create the current packet's gather list.)  This leaves us ready to | 
|  | 1585 | *	restart processing of the packet the next time we start processing the | 
|  | 1586 | *	RX Queue ... | 
|  | 1587 | */ | 
|  | 1588 | static void restore_rx_bufs(const struct pkt_gl *gl, struct sge_fl *fl, | 
|  | 1589 | int frags) | 
|  | 1590 | { | 
|  | 1591 | struct rx_sw_desc *sdesc; | 
|  | 1592 |  | 
|  | 1593 | while (frags--) { | 
|  | 1594 | if (fl->cidx == 0) | 
|  | 1595 | fl->cidx = fl->size - 1; | 
|  | 1596 | else | 
|  | 1597 | fl->cidx--; | 
|  | 1598 | sdesc = &fl->sdesc[fl->cidx]; | 
|  | 1599 | sdesc->page = gl->frags[frags].page; | 
|  | 1600 | sdesc->dma_addr |= RX_UNMAPPED_BUF; | 
|  | 1601 | fl->avail++; | 
|  | 1602 | } | 
|  | 1603 | } | 
|  | 1604 |  | 
|  | 1605 | /** | 
|  | 1606 | *	rspq_next - advance to the next entry in a response queue | 
|  | 1607 | *	@rspq: the queue | 
|  | 1608 | * | 
|  | 1609 | *	Updates the state of a response queue to advance it to the next entry. | 
|  | 1610 | */ | 
|  | 1611 | static inline void rspq_next(struct sge_rspq *rspq) | 
|  | 1612 | { | 
|  | 1613 | rspq->cur_desc = (void *)rspq->cur_desc + rspq->iqe_len; | 
|  | 1614 | if (unlikely(++rspq->cidx == rspq->size)) { | 
|  | 1615 | rspq->cidx = 0; | 
|  | 1616 | rspq->gen ^= 1; | 
|  | 1617 | rspq->cur_desc = rspq->desc; | 
|  | 1618 | } | 
|  | 1619 | } | 
|  | 1620 |  | 
|  | 1621 | /** | 
|  | 1622 | *	process_responses - process responses from an SGE response queue | 
|  | 1623 | *	@rspq: the ingress response queue to process | 
|  | 1624 | *	@budget: how many responses can be processed in this round | 
|  | 1625 | * | 
|  | 1626 | *	Process responses from a Scatter Gather Engine response queue up to | 
|  | 1627 | *	the supplied budget.  Responses include received packets as well as | 
|  | 1628 | *	control messages from firmware or hardware. | 
|  | 1629 | * | 
|  | 1630 | *	Additionally choose the interrupt holdoff time for the next interrupt | 
|  | 1631 | *	on this queue.  If the system is under memory shortage use a fairly | 
|  | 1632 | *	long delay to help recovery. | 
|  | 1633 | */ | 
|  | 1634 | int process_responses(struct sge_rspq *rspq, int budget) | 
|  | 1635 | { | 
|  | 1636 | struct sge_eth_rxq *rxq = container_of(rspq, struct sge_eth_rxq, rspq); | 
|  | 1637 | int budget_left = budget; | 
|  | 1638 |  | 
|  | 1639 | while (likely(budget_left)) { | 
|  | 1640 | int ret, rsp_type; | 
|  | 1641 | const struct rsp_ctrl *rc; | 
|  | 1642 |  | 
|  | 1643 | rc = (void *)rspq->cur_desc + (rspq->iqe_len - sizeof(*rc)); | 
|  | 1644 | if (!is_new_response(rc, rspq)) | 
|  | 1645 | break; | 
|  | 1646 |  | 
|  | 1647 | /* | 
|  | 1648 | * Figure out what kind of response we've received from the | 
|  | 1649 | * SGE. | 
|  | 1650 | */ | 
|  | 1651 | rmb(); | 
|  | 1652 | rsp_type = RSPD_TYPE(rc->type_gen); | 
|  | 1653 | if (likely(rsp_type == RSP_TYPE_FLBUF)) { | 
|  | 1654 | skb_frag_t *fp; | 
|  | 1655 | struct pkt_gl gl; | 
|  | 1656 | const struct rx_sw_desc *sdesc; | 
|  | 1657 | u32 bufsz, frag; | 
|  | 1658 | u32 len = be32_to_cpu(rc->pldbuflen_qid); | 
|  | 1659 |  | 
|  | 1660 | /* | 
|  | 1661 | * If we get a "new buffer" message from the SGE we | 
|  | 1662 | * need to move on to the next Free List buffer. | 
|  | 1663 | */ | 
|  | 1664 | if (len & RSPD_NEWBUF) { | 
|  | 1665 | /* | 
|  | 1666 | * We get one "new buffer" message when we | 
|  | 1667 | * first start up a queue so we need to ignore | 
|  | 1668 | * it when our offset into the buffer is 0. | 
|  | 1669 | */ | 
|  | 1670 | if (likely(rspq->offset > 0)) { | 
|  | 1671 | free_rx_bufs(rspq->adapter, &rxq->fl, | 
|  | 1672 | 1); | 
|  | 1673 | rspq->offset = 0; | 
|  | 1674 | } | 
|  | 1675 | len = RSPD_LEN(len); | 
|  | 1676 | } | 
|  | 1677 |  | 
|  | 1678 | /* | 
|  | 1679 | * Gather packet fragments. | 
|  | 1680 | */ | 
|  | 1681 | for (frag = 0, fp = gl.frags; /**/; frag++, fp++) { | 
|  | 1682 | BUG_ON(frag >= MAX_SKB_FRAGS); | 
|  | 1683 | BUG_ON(rxq->fl.avail == 0); | 
|  | 1684 | sdesc = &rxq->fl.sdesc[rxq->fl.cidx]; | 
|  | 1685 | bufsz = get_buf_size(sdesc); | 
|  | 1686 | fp->page = sdesc->page; | 
|  | 1687 | fp->page_offset = rspq->offset; | 
|  | 1688 | fp->size = min(bufsz, len); | 
|  | 1689 | len -= fp->size; | 
|  | 1690 | if (!len) | 
|  | 1691 | break; | 
|  | 1692 | unmap_rx_buf(rspq->adapter, &rxq->fl); | 
|  | 1693 | } | 
|  | 1694 | gl.nfrags = frag+1; | 
|  | 1695 |  | 
|  | 1696 | /* | 
|  | 1697 | * Last buffer remains mapped so explicitly make it | 
|  | 1698 | * coherent for CPU access and start preloading first | 
|  | 1699 | * cache line ... | 
|  | 1700 | */ | 
|  | 1701 | dma_sync_single_for_cpu(rspq->adapter->pdev_dev, | 
|  | 1702 | get_buf_addr(sdesc), | 
|  | 1703 | fp->size, DMA_FROM_DEVICE); | 
|  | 1704 | gl.va = (page_address(gl.frags[0].page) + | 
|  | 1705 | gl.frags[0].page_offset); | 
|  | 1706 | prefetch(gl.va); | 
|  | 1707 |  | 
|  | 1708 | /* | 
|  | 1709 | * Hand the new ingress packet to the handler for | 
|  | 1710 | * this Response Queue. | 
|  | 1711 | */ | 
|  | 1712 | ret = rspq->handler(rspq, rspq->cur_desc, &gl); | 
|  | 1713 | if (likely(ret == 0)) | 
|  | 1714 | rspq->offset += ALIGN(fp->size, FL_ALIGN); | 
|  | 1715 | else | 
|  | 1716 | restore_rx_bufs(&gl, &rxq->fl, frag); | 
|  | 1717 | } else if (likely(rsp_type == RSP_TYPE_CPL)) { | 
|  | 1718 | ret = rspq->handler(rspq, rspq->cur_desc, NULL); | 
|  | 1719 | } else { | 
|  | 1720 | WARN_ON(rsp_type > RSP_TYPE_CPL); | 
|  | 1721 | ret = 0; | 
|  | 1722 | } | 
|  | 1723 |  | 
|  | 1724 | if (unlikely(ret)) { | 
|  | 1725 | /* | 
|  | 1726 | * Couldn't process descriptor, back off for recovery. | 
|  | 1727 | * We use the SGE's last timer which has the longest | 
|  | 1728 | * interrupt coalescing value ... | 
|  | 1729 | */ | 
|  | 1730 | const int NOMEM_TIMER_IDX = SGE_NTIMERS-1; | 
|  | 1731 | rspq->next_intr_params = | 
|  | 1732 | QINTR_TIMER_IDX(NOMEM_TIMER_IDX); | 
|  | 1733 | break; | 
|  | 1734 | } | 
|  | 1735 |  | 
|  | 1736 | rspq_next(rspq); | 
|  | 1737 | budget_left--; | 
|  | 1738 | } | 
|  | 1739 |  | 
|  | 1740 | /* | 
|  | 1741 | * If this is a Response Queue with an associated Free List and | 
|  | 1742 | * at least two Egress Queue units available in the Free List | 
|  | 1743 | * for new buffer pointers, refill the Free List. | 
|  | 1744 | */ | 
|  | 1745 | if (rspq->offset >= 0 && | 
|  | 1746 | rxq->fl.size - rxq->fl.avail >= 2*FL_PER_EQ_UNIT) | 
|  | 1747 | __refill_fl(rspq->adapter, &rxq->fl); | 
|  | 1748 | return budget - budget_left; | 
|  | 1749 | } | 
|  | 1750 |  | 
|  | 1751 | /** | 
|  | 1752 | *	napi_rx_handler - the NAPI handler for RX processing | 
|  | 1753 | *	@napi: the napi instance | 
|  | 1754 | *	@budget: how many packets we can process in this round | 
|  | 1755 | * | 
|  | 1756 | *	Handler for new data events when using NAPI.  This does not need any | 
|  | 1757 | *	locking or protection from interrupts as data interrupts are off at | 
|  | 1758 | *	this point and other adapter interrupts do not interfere (the latter | 
|  | 1759 | *	in not a concern at all with MSI-X as non-data interrupts then have | 
|  | 1760 | *	a separate handler). | 
|  | 1761 | */ | 
|  | 1762 | static int napi_rx_handler(struct napi_struct *napi, int budget) | 
|  | 1763 | { | 
|  | 1764 | unsigned int intr_params; | 
|  | 1765 | struct sge_rspq *rspq = container_of(napi, struct sge_rspq, napi); | 
|  | 1766 | int work_done = process_responses(rspq, budget); | 
|  | 1767 |  | 
|  | 1768 | if (likely(work_done < budget)) { | 
|  | 1769 | napi_complete(napi); | 
|  | 1770 | intr_params = rspq->next_intr_params; | 
|  | 1771 | rspq->next_intr_params = rspq->intr_params; | 
|  | 1772 | } else | 
|  | 1773 | intr_params = QINTR_TIMER_IDX(SGE_TIMER_UPD_CIDX); | 
|  | 1774 |  | 
|  | 1775 | t4_write_reg(rspq->adapter, | 
|  | 1776 | T4VF_SGE_BASE_ADDR + SGE_VF_GTS, | 
|  | 1777 | CIDXINC(work_done) | | 
|  | 1778 | INGRESSQID((u32)rspq->cntxt_id) | | 
|  | 1779 | SEINTARM(intr_params)); | 
|  | 1780 | return work_done; | 
|  | 1781 | } | 
|  | 1782 |  | 
|  | 1783 | /* | 
|  | 1784 | * The MSI-X interrupt handler for an SGE response queue for the NAPI case | 
|  | 1785 | * (i.e., response queue serviced by NAPI polling). | 
|  | 1786 | */ | 
|  | 1787 | irqreturn_t t4vf_sge_intr_msix(int irq, void *cookie) | 
|  | 1788 | { | 
|  | 1789 | struct sge_rspq *rspq = cookie; | 
|  | 1790 |  | 
|  | 1791 | napi_schedule(&rspq->napi); | 
|  | 1792 | return IRQ_HANDLED; | 
|  | 1793 | } | 
|  | 1794 |  | 
|  | 1795 | /* | 
|  | 1796 | * Process the indirect interrupt entries in the interrupt queue and kick off | 
|  | 1797 | * NAPI for each queue that has generated an entry. | 
|  | 1798 | */ | 
|  | 1799 | static unsigned int process_intrq(struct adapter *adapter) | 
|  | 1800 | { | 
|  | 1801 | struct sge *s = &adapter->sge; | 
|  | 1802 | struct sge_rspq *intrq = &s->intrq; | 
|  | 1803 | unsigned int work_done; | 
|  | 1804 |  | 
|  | 1805 | spin_lock(&adapter->sge.intrq_lock); | 
|  | 1806 | for (work_done = 0; ; work_done++) { | 
|  | 1807 | const struct rsp_ctrl *rc; | 
|  | 1808 | unsigned int qid, iq_idx; | 
|  | 1809 | struct sge_rspq *rspq; | 
|  | 1810 |  | 
|  | 1811 | /* | 
|  | 1812 | * Grab the next response from the interrupt queue and bail | 
|  | 1813 | * out if it's not a new response. | 
|  | 1814 | */ | 
|  | 1815 | rc = (void *)intrq->cur_desc + (intrq->iqe_len - sizeof(*rc)); | 
|  | 1816 | if (!is_new_response(rc, intrq)) | 
|  | 1817 | break; | 
|  | 1818 |  | 
|  | 1819 | /* | 
|  | 1820 | * If the response isn't a forwarded interrupt message issue a | 
|  | 1821 | * error and go on to the next response message.  This should | 
|  | 1822 | * never happen ... | 
|  | 1823 | */ | 
|  | 1824 | rmb(); | 
|  | 1825 | if (unlikely(RSPD_TYPE(rc->type_gen) != RSP_TYPE_INTR)) { | 
|  | 1826 | dev_err(adapter->pdev_dev, | 
|  | 1827 | "Unexpected INTRQ response type %d\n", | 
|  | 1828 | RSPD_TYPE(rc->type_gen)); | 
|  | 1829 | continue; | 
|  | 1830 | } | 
|  | 1831 |  | 
|  | 1832 | /* | 
|  | 1833 | * Extract the Queue ID from the interrupt message and perform | 
|  | 1834 | * sanity checking to make sure it really refers to one of our | 
|  | 1835 | * Ingress Queues which is active and matches the queue's ID. | 
|  | 1836 | * None of these error conditions should ever happen so we may | 
|  | 1837 | * want to either make them fatal and/or conditionalized under | 
|  | 1838 | * DEBUG. | 
|  | 1839 | */ | 
|  | 1840 | qid = RSPD_QID(be32_to_cpu(rc->pldbuflen_qid)); | 
|  | 1841 | iq_idx = IQ_IDX(s, qid); | 
|  | 1842 | if (unlikely(iq_idx >= MAX_INGQ)) { | 
|  | 1843 | dev_err(adapter->pdev_dev, | 
|  | 1844 | "Ingress QID %d out of range\n", qid); | 
|  | 1845 | continue; | 
|  | 1846 | } | 
|  | 1847 | rspq = s->ingr_map[iq_idx]; | 
|  | 1848 | if (unlikely(rspq == NULL)) { | 
|  | 1849 | dev_err(adapter->pdev_dev, | 
|  | 1850 | "Ingress QID %d RSPQ=NULL\n", qid); | 
|  | 1851 | continue; | 
|  | 1852 | } | 
|  | 1853 | if (unlikely(rspq->abs_id != qid)) { | 
|  | 1854 | dev_err(adapter->pdev_dev, | 
|  | 1855 | "Ingress QID %d refers to RSPQ %d\n", | 
|  | 1856 | qid, rspq->abs_id); | 
|  | 1857 | continue; | 
|  | 1858 | } | 
|  | 1859 |  | 
|  | 1860 | /* | 
|  | 1861 | * Schedule NAPI processing on the indicated Response Queue | 
|  | 1862 | * and move on to the next entry in the Forwarded Interrupt | 
|  | 1863 | * Queue. | 
|  | 1864 | */ | 
|  | 1865 | napi_schedule(&rspq->napi); | 
|  | 1866 | rspq_next(intrq); | 
|  | 1867 | } | 
|  | 1868 |  | 
|  | 1869 | t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS, | 
|  | 1870 | CIDXINC(work_done) | | 
|  | 1871 | INGRESSQID(intrq->cntxt_id) | | 
|  | 1872 | SEINTARM(intrq->intr_params)); | 
|  | 1873 |  | 
|  | 1874 | spin_unlock(&adapter->sge.intrq_lock); | 
|  | 1875 |  | 
|  | 1876 | return work_done; | 
|  | 1877 | } | 
|  | 1878 |  | 
|  | 1879 | /* | 
|  | 1880 | * The MSI interrupt handler handles data events from SGE response queues as | 
|  | 1881 | * well as error and other async events as they all use the same MSI vector. | 
|  | 1882 | */ | 
|  | 1883 | irqreturn_t t4vf_intr_msi(int irq, void *cookie) | 
|  | 1884 | { | 
|  | 1885 | struct adapter *adapter = cookie; | 
|  | 1886 |  | 
|  | 1887 | process_intrq(adapter); | 
|  | 1888 | return IRQ_HANDLED; | 
|  | 1889 | } | 
|  | 1890 |  | 
|  | 1891 | /** | 
|  | 1892 | *	t4vf_intr_handler - select the top-level interrupt handler | 
|  | 1893 | *	@adapter: the adapter | 
|  | 1894 | * | 
|  | 1895 | *	Selects the top-level interrupt handler based on the type of interrupts | 
|  | 1896 | *	(MSI-X or MSI). | 
|  | 1897 | */ | 
|  | 1898 | irq_handler_t t4vf_intr_handler(struct adapter *adapter) | 
|  | 1899 | { | 
|  | 1900 | BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0); | 
|  | 1901 | if (adapter->flags & USING_MSIX) | 
|  | 1902 | return t4vf_sge_intr_msix; | 
|  | 1903 | else | 
|  | 1904 | return t4vf_intr_msi; | 
|  | 1905 | } | 
|  | 1906 |  | 
|  | 1907 | /** | 
|  | 1908 | *	sge_rx_timer_cb - perform periodic maintenance of SGE RX queues | 
|  | 1909 | *	@data: the adapter | 
|  | 1910 | * | 
|  | 1911 | *	Runs periodically from a timer to perform maintenance of SGE RX queues. | 
|  | 1912 | * | 
|  | 1913 | *	a) Replenishes RX queues that have run out due to memory shortage. | 
|  | 1914 | *	Normally new RX buffers are added when existing ones are consumed but | 
|  | 1915 | *	when out of memory a queue can become empty.  We schedule NAPI to do | 
|  | 1916 | *	the actual refill. | 
|  | 1917 | */ | 
|  | 1918 | static void sge_rx_timer_cb(unsigned long data) | 
|  | 1919 | { | 
|  | 1920 | struct adapter *adapter = (struct adapter *)data; | 
|  | 1921 | struct sge *s = &adapter->sge; | 
|  | 1922 | unsigned int i; | 
|  | 1923 |  | 
|  | 1924 | /* | 
|  | 1925 | * Scan the "Starving Free Lists" flag array looking for any Free | 
|  | 1926 | * Lists in need of more free buffers.  If we find one and it's not | 
|  | 1927 | * being actively polled, then bump its "starving" counter and attempt | 
|  | 1928 | * to refill it.  If we're successful in adding enough buffers to push | 
|  | 1929 | * the Free List over the starving threshold, then we can clear its | 
|  | 1930 | * "starving" status. | 
|  | 1931 | */ | 
|  | 1932 | for (i = 0; i < ARRAY_SIZE(s->starving_fl); i++) { | 
|  | 1933 | unsigned long m; | 
|  | 1934 |  | 
|  | 1935 | for (m = s->starving_fl[i]; m; m &= m - 1) { | 
|  | 1936 | unsigned int id = __ffs(m) + i * BITS_PER_LONG; | 
|  | 1937 | struct sge_fl *fl = s->egr_map[id]; | 
|  | 1938 |  | 
|  | 1939 | clear_bit(id, s->starving_fl); | 
|  | 1940 | smp_mb__after_clear_bit(); | 
|  | 1941 |  | 
|  | 1942 | /* | 
|  | 1943 | * Since we are accessing fl without a lock there's a | 
|  | 1944 | * small probability of a false positive where we | 
|  | 1945 | * schedule napi but the FL is no longer starving. | 
|  | 1946 | * No biggie. | 
|  | 1947 | */ | 
|  | 1948 | if (fl_starving(fl)) { | 
|  | 1949 | struct sge_eth_rxq *rxq; | 
|  | 1950 |  | 
|  | 1951 | rxq = container_of(fl, struct sge_eth_rxq, fl); | 
|  | 1952 | if (napi_reschedule(&rxq->rspq.napi)) | 
|  | 1953 | fl->starving++; | 
|  | 1954 | else | 
|  | 1955 | set_bit(id, s->starving_fl); | 
|  | 1956 | } | 
|  | 1957 | } | 
|  | 1958 | } | 
|  | 1959 |  | 
|  | 1960 | /* | 
|  | 1961 | * Reschedule the next scan for starving Free Lists ... | 
|  | 1962 | */ | 
|  | 1963 | mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD); | 
|  | 1964 | } | 
|  | 1965 |  | 
|  | 1966 | /** | 
|  | 1967 | *	sge_tx_timer_cb - perform periodic maintenance of SGE Tx queues | 
|  | 1968 | *	@data: the adapter | 
|  | 1969 | * | 
|  | 1970 | *	Runs periodically from a timer to perform maintenance of SGE TX queues. | 
|  | 1971 | * | 
|  | 1972 | *	b) Reclaims completed Tx packets for the Ethernet queues.  Normally | 
|  | 1973 | *	packets are cleaned up by new Tx packets, this timer cleans up packets | 
|  | 1974 | *	when no new packets are being submitted.  This is essential for pktgen, | 
|  | 1975 | *	at least. | 
|  | 1976 | */ | 
|  | 1977 | static void sge_tx_timer_cb(unsigned long data) | 
|  | 1978 | { | 
|  | 1979 | struct adapter *adapter = (struct adapter *)data; | 
|  | 1980 | struct sge *s = &adapter->sge; | 
|  | 1981 | unsigned int i, budget; | 
|  | 1982 |  | 
|  | 1983 | budget = MAX_TIMER_TX_RECLAIM; | 
|  | 1984 | i = s->ethtxq_rover; | 
|  | 1985 | do { | 
|  | 1986 | struct sge_eth_txq *txq = &s->ethtxq[i]; | 
|  | 1987 |  | 
|  | 1988 | if (reclaimable(&txq->q) && __netif_tx_trylock(txq->txq)) { | 
|  | 1989 | int avail = reclaimable(&txq->q); | 
|  | 1990 |  | 
|  | 1991 | if (avail > budget) | 
|  | 1992 | avail = budget; | 
|  | 1993 |  | 
|  | 1994 | free_tx_desc(adapter, &txq->q, avail, true); | 
|  | 1995 | txq->q.in_use -= avail; | 
|  | 1996 | __netif_tx_unlock(txq->txq); | 
|  | 1997 |  | 
|  | 1998 | budget -= avail; | 
|  | 1999 | if (!budget) | 
|  | 2000 | break; | 
|  | 2001 | } | 
|  | 2002 |  | 
|  | 2003 | i++; | 
|  | 2004 | if (i >= s->ethqsets) | 
|  | 2005 | i = 0; | 
|  | 2006 | } while (i != s->ethtxq_rover); | 
|  | 2007 | s->ethtxq_rover = i; | 
|  | 2008 |  | 
|  | 2009 | /* | 
|  | 2010 | * If we found too many reclaimable packets schedule a timer in the | 
|  | 2011 | * near future to continue where we left off.  Otherwise the next timer | 
|  | 2012 | * will be at its normal interval. | 
|  | 2013 | */ | 
|  | 2014 | mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2)); | 
|  | 2015 | } | 
|  | 2016 |  | 
|  | 2017 | /** | 
|  | 2018 | *	t4vf_sge_alloc_rxq - allocate an SGE RX Queue | 
|  | 2019 | *	@adapter: the adapter | 
|  | 2020 | *	@rspq: pointer to to the new rxq's Response Queue to be filled in | 
|  | 2021 | *	@iqasynch: if 0, a normal rspq; if 1, an asynchronous event queue | 
|  | 2022 | *	@dev: the network device associated with the new rspq | 
|  | 2023 | *	@intr_dest: MSI-X vector index (overriden in MSI mode) | 
|  | 2024 | *	@fl: pointer to the new rxq's Free List to be filled in | 
|  | 2025 | *	@hnd: the interrupt handler to invoke for the rspq | 
|  | 2026 | */ | 
|  | 2027 | int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, | 
|  | 2028 | bool iqasynch, struct net_device *dev, | 
|  | 2029 | int intr_dest, | 
|  | 2030 | struct sge_fl *fl, rspq_handler_t hnd) | 
|  | 2031 | { | 
|  | 2032 | struct port_info *pi = netdev_priv(dev); | 
|  | 2033 | struct fw_iq_cmd cmd, rpl; | 
|  | 2034 | int ret, iqandst, flsz = 0; | 
|  | 2035 |  | 
|  | 2036 | /* | 
|  | 2037 | * If we're using MSI interrupts and we're not initializing the | 
|  | 2038 | * Forwarded Interrupt Queue itself, then set up this queue for | 
|  | 2039 | * indirect interrupts to the Forwarded Interrupt Queue.  Obviously | 
|  | 2040 | * the Forwarded Interrupt Queue must be set up before any other | 
|  | 2041 | * ingress queue ... | 
|  | 2042 | */ | 
|  | 2043 | if ((adapter->flags & USING_MSI) && rspq != &adapter->sge.intrq) { | 
|  | 2044 | iqandst = SGE_INTRDST_IQ; | 
|  | 2045 | intr_dest = adapter->sge.intrq.abs_id; | 
|  | 2046 | } else | 
|  | 2047 | iqandst = SGE_INTRDST_PCI; | 
|  | 2048 |  | 
|  | 2049 | /* | 
|  | 2050 | * Allocate the hardware ring for the Response Queue.  The size needs | 
|  | 2051 | * to be a multiple of 16 which includes the mandatory status entry | 
|  | 2052 | * (regardless of whether the Status Page capabilities are enabled or | 
|  | 2053 | * not). | 
|  | 2054 | */ | 
|  | 2055 | rspq->size = roundup(rspq->size, 16); | 
|  | 2056 | rspq->desc = alloc_ring(adapter->pdev_dev, rspq->size, rspq->iqe_len, | 
|  | 2057 | 0, &rspq->phys_addr, NULL, 0); | 
|  | 2058 | if (!rspq->desc) | 
|  | 2059 | return -ENOMEM; | 
|  | 2060 |  | 
|  | 2061 | /* | 
|  | 2062 | * Fill in the Ingress Queue Command.  Note: Ideally this code would | 
|  | 2063 | * be in t4vf_hw.c but there are so many parameters and dependencies | 
|  | 2064 | * on our Linux SGE state that we would end up having to pass tons of | 
|  | 2065 | * parameters.  We'll have to think about how this might be migrated | 
|  | 2066 | * into OS-independent common code ... | 
|  | 2067 | */ | 
|  | 2068 | memset(&cmd, 0, sizeof(cmd)); | 
|  | 2069 | cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP(FW_IQ_CMD) | | 
|  | 2070 | FW_CMD_REQUEST | | 
|  | 2071 | FW_CMD_WRITE | | 
|  | 2072 | FW_CMD_EXEC); | 
|  | 2073 | cmd.alloc_to_len16 = cpu_to_be32(FW_IQ_CMD_ALLOC | | 
|  | 2074 | FW_IQ_CMD_IQSTART(1) | | 
|  | 2075 | FW_LEN16(cmd)); | 
|  | 2076 | cmd.type_to_iqandstindex = | 
|  | 2077 | cpu_to_be32(FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | | 
|  | 2078 | FW_IQ_CMD_IQASYNCH(iqasynch) | | 
|  | 2079 | FW_IQ_CMD_VIID(pi->viid) | | 
|  | 2080 | FW_IQ_CMD_IQANDST(iqandst) | | 
|  | 2081 | FW_IQ_CMD_IQANUS(1) | | 
|  | 2082 | FW_IQ_CMD_IQANUD(SGE_UPDATEDEL_INTR) | | 
|  | 2083 | FW_IQ_CMD_IQANDSTINDEX(intr_dest)); | 
|  | 2084 | cmd.iqdroprss_to_iqesize = | 
|  | 2085 | cpu_to_be16(FW_IQ_CMD_IQPCIECH(pi->port_id) | | 
|  | 2086 | FW_IQ_CMD_IQGTSMODE | | 
|  | 2087 | FW_IQ_CMD_IQINTCNTTHRESH(rspq->pktcnt_idx) | | 
|  | 2088 | FW_IQ_CMD_IQESIZE(ilog2(rspq->iqe_len) - 4)); | 
|  | 2089 | cmd.iqsize = cpu_to_be16(rspq->size); | 
|  | 2090 | cmd.iqaddr = cpu_to_be64(rspq->phys_addr); | 
|  | 2091 |  | 
|  | 2092 | if (fl) { | 
|  | 2093 | /* | 
|  | 2094 | * Allocate the ring for the hardware free list (with space | 
|  | 2095 | * for its status page) along with the associated software | 
|  | 2096 | * descriptor ring.  The free list size needs to be a multiple | 
|  | 2097 | * of the Egress Queue Unit. | 
|  | 2098 | */ | 
|  | 2099 | fl->size = roundup(fl->size, FL_PER_EQ_UNIT); | 
|  | 2100 | fl->desc = alloc_ring(adapter->pdev_dev, fl->size, | 
|  | 2101 | sizeof(__be64), sizeof(struct rx_sw_desc), | 
|  | 2102 | &fl->addr, &fl->sdesc, STAT_LEN); | 
|  | 2103 | if (!fl->desc) { | 
|  | 2104 | ret = -ENOMEM; | 
|  | 2105 | goto err; | 
|  | 2106 | } | 
|  | 2107 |  | 
|  | 2108 | /* | 
|  | 2109 | * Calculate the size of the hardware free list ring plus | 
|  | 2110 | * status page (which the SGE will place at the end of the | 
|  | 2111 | * free list ring) in Egress Queue Units. | 
|  | 2112 | */ | 
|  | 2113 | flsz = (fl->size / FL_PER_EQ_UNIT + | 
|  | 2114 | STAT_LEN / EQ_UNIT); | 
|  | 2115 |  | 
|  | 2116 | /* | 
|  | 2117 | * Fill in all the relevant firmware Ingress Queue Command | 
|  | 2118 | * fields for the free list. | 
|  | 2119 | */ | 
|  | 2120 | cmd.iqns_to_fl0congen = | 
|  | 2121 | cpu_to_be32( | 
|  | 2122 | FW_IQ_CMD_FL0HOSTFCMODE(SGE_HOSTFCMODE_NONE) | | 
|  | 2123 | FW_IQ_CMD_FL0PACKEN | | 
|  | 2124 | FW_IQ_CMD_FL0PADEN); | 
|  | 2125 | cmd.fl0dcaen_to_fl0cidxfthresh = | 
|  | 2126 | cpu_to_be16( | 
|  | 2127 | FW_IQ_CMD_FL0FBMIN(SGE_FETCHBURSTMIN_64B) | | 
|  | 2128 | FW_IQ_CMD_FL0FBMAX(SGE_FETCHBURSTMAX_512B)); | 
|  | 2129 | cmd.fl0size = cpu_to_be16(flsz); | 
|  | 2130 | cmd.fl0addr = cpu_to_be64(fl->addr); | 
|  | 2131 | } | 
|  | 2132 |  | 
|  | 2133 | /* | 
|  | 2134 | * Issue the firmware Ingress Queue Command and extract the results if | 
|  | 2135 | * it completes successfully. | 
|  | 2136 | */ | 
|  | 2137 | ret = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &rpl); | 
|  | 2138 | if (ret) | 
|  | 2139 | goto err; | 
|  | 2140 |  | 
|  | 2141 | netif_napi_add(dev, &rspq->napi, napi_rx_handler, 64); | 
|  | 2142 | rspq->cur_desc = rspq->desc; | 
|  | 2143 | rspq->cidx = 0; | 
|  | 2144 | rspq->gen = 1; | 
|  | 2145 | rspq->next_intr_params = rspq->intr_params; | 
|  | 2146 | rspq->cntxt_id = be16_to_cpu(rpl.iqid); | 
|  | 2147 | rspq->abs_id = be16_to_cpu(rpl.physiqid); | 
|  | 2148 | rspq->size--;			/* subtract status entry */ | 
|  | 2149 | rspq->adapter = adapter; | 
|  | 2150 | rspq->netdev = dev; | 
|  | 2151 | rspq->handler = hnd; | 
|  | 2152 |  | 
|  | 2153 | /* set offset to -1 to distinguish ingress queues without FL */ | 
|  | 2154 | rspq->offset = fl ? 0 : -1; | 
|  | 2155 |  | 
|  | 2156 | if (fl) { | 
|  | 2157 | fl->cntxt_id = be16_to_cpu(rpl.fl0id); | 
|  | 2158 | fl->avail = 0; | 
|  | 2159 | fl->pend_cred = 0; | 
|  | 2160 | fl->pidx = 0; | 
|  | 2161 | fl->cidx = 0; | 
|  | 2162 | fl->alloc_failed = 0; | 
|  | 2163 | fl->large_alloc_failed = 0; | 
|  | 2164 | fl->starving = 0; | 
|  | 2165 | refill_fl(adapter, fl, fl_cap(fl), GFP_KERNEL); | 
|  | 2166 | } | 
|  | 2167 |  | 
|  | 2168 | return 0; | 
|  | 2169 |  | 
|  | 2170 | err: | 
|  | 2171 | /* | 
|  | 2172 | * An error occurred.  Clean up our partial allocation state and | 
|  | 2173 | * return the error. | 
|  | 2174 | */ | 
|  | 2175 | if (rspq->desc) { | 
|  | 2176 | dma_free_coherent(adapter->pdev_dev, rspq->size * rspq->iqe_len, | 
|  | 2177 | rspq->desc, rspq->phys_addr); | 
|  | 2178 | rspq->desc = NULL; | 
|  | 2179 | } | 
|  | 2180 | if (fl && fl->desc) { | 
|  | 2181 | kfree(fl->sdesc); | 
|  | 2182 | fl->sdesc = NULL; | 
|  | 2183 | dma_free_coherent(adapter->pdev_dev, flsz * EQ_UNIT, | 
|  | 2184 | fl->desc, fl->addr); | 
|  | 2185 | fl->desc = NULL; | 
|  | 2186 | } | 
|  | 2187 | return ret; | 
|  | 2188 | } | 
|  | 2189 |  | 
|  | 2190 | /** | 
|  | 2191 | *	t4vf_sge_alloc_eth_txq - allocate an SGE Ethernet TX Queue | 
|  | 2192 | *	@adapter: the adapter | 
|  | 2193 | *	@txq: pointer to the new txq to be filled in | 
|  | 2194 | *	@devq: the network TX queue associated with the new txq | 
|  | 2195 | *	@iqid: the relative ingress queue ID to which events relating to | 
|  | 2196 | *		the new txq should be directed | 
|  | 2197 | */ | 
|  | 2198 | int t4vf_sge_alloc_eth_txq(struct adapter *adapter, struct sge_eth_txq *txq, | 
|  | 2199 | struct net_device *dev, struct netdev_queue *devq, | 
|  | 2200 | unsigned int iqid) | 
|  | 2201 | { | 
|  | 2202 | int ret, nentries; | 
|  | 2203 | struct fw_eq_eth_cmd cmd, rpl; | 
|  | 2204 | struct port_info *pi = netdev_priv(dev); | 
|  | 2205 |  | 
|  | 2206 | /* | 
|  | 2207 | * Calculate the size of the hardware TX Queue (including the | 
|  | 2208 | * status age on the end) in units of TX Descriptors. | 
|  | 2209 | */ | 
|  | 2210 | nentries = txq->q.size + STAT_LEN / sizeof(struct tx_desc); | 
|  | 2211 |  | 
|  | 2212 | /* | 
|  | 2213 | * Allocate the hardware ring for the TX ring (with space for its | 
|  | 2214 | * status page) along with the associated software descriptor ring. | 
|  | 2215 | */ | 
|  | 2216 | txq->q.desc = alloc_ring(adapter->pdev_dev, txq->q.size, | 
|  | 2217 | sizeof(struct tx_desc), | 
|  | 2218 | sizeof(struct tx_sw_desc), | 
|  | 2219 | &txq->q.phys_addr, &txq->q.sdesc, STAT_LEN); | 
|  | 2220 | if (!txq->q.desc) | 
|  | 2221 | return -ENOMEM; | 
|  | 2222 |  | 
|  | 2223 | /* | 
|  | 2224 | * Fill in the Egress Queue Command.  Note: As with the direct use of | 
|  | 2225 | * the firmware Ingress Queue COmmand above in our RXQ allocation | 
|  | 2226 | * routine, ideally, this code would be in t4vf_hw.c.  Again, we'll | 
|  | 2227 | * have to see if there's some reasonable way to parameterize it | 
|  | 2228 | * into the common code ... | 
|  | 2229 | */ | 
|  | 2230 | memset(&cmd, 0, sizeof(cmd)); | 
|  | 2231 | cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP(FW_EQ_ETH_CMD) | | 
|  | 2232 | FW_CMD_REQUEST | | 
|  | 2233 | FW_CMD_WRITE | | 
|  | 2234 | FW_CMD_EXEC); | 
|  | 2235 | cmd.alloc_to_len16 = cpu_to_be32(FW_EQ_ETH_CMD_ALLOC | | 
|  | 2236 | FW_EQ_ETH_CMD_EQSTART | | 
|  | 2237 | FW_LEN16(cmd)); | 
|  | 2238 | cmd.viid_pkd = cpu_to_be32(FW_EQ_ETH_CMD_VIID(pi->viid)); | 
|  | 2239 | cmd.fetchszm_to_iqid = | 
|  | 2240 | cpu_to_be32(FW_EQ_ETH_CMD_HOSTFCMODE(SGE_HOSTFCMODE_STPG) | | 
|  | 2241 | FW_EQ_ETH_CMD_PCIECHN(pi->port_id) | | 
|  | 2242 | FW_EQ_ETH_CMD_IQID(iqid)); | 
|  | 2243 | cmd.dcaen_to_eqsize = | 
|  | 2244 | cpu_to_be32(FW_EQ_ETH_CMD_FBMIN(SGE_FETCHBURSTMIN_64B) | | 
|  | 2245 | FW_EQ_ETH_CMD_FBMAX(SGE_FETCHBURSTMAX_512B) | | 
|  | 2246 | FW_EQ_ETH_CMD_CIDXFTHRESH(SGE_CIDXFLUSHTHRESH_32) | | 
|  | 2247 | FW_EQ_ETH_CMD_EQSIZE(nentries)); | 
|  | 2248 | cmd.eqaddr = cpu_to_be64(txq->q.phys_addr); | 
|  | 2249 |  | 
|  | 2250 | /* | 
|  | 2251 | * Issue the firmware Egress Queue Command and extract the results if | 
|  | 2252 | * it completes successfully. | 
|  | 2253 | */ | 
|  | 2254 | ret = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &rpl); | 
|  | 2255 | if (ret) { | 
|  | 2256 | /* | 
|  | 2257 | * The girmware Ingress Queue Command failed for some reason. | 
|  | 2258 | * Free up our partial allocation state and return the error. | 
|  | 2259 | */ | 
|  | 2260 | kfree(txq->q.sdesc); | 
|  | 2261 | txq->q.sdesc = NULL; | 
|  | 2262 | dma_free_coherent(adapter->pdev_dev, | 
|  | 2263 | nentries * sizeof(struct tx_desc), | 
|  | 2264 | txq->q.desc, txq->q.phys_addr); | 
|  | 2265 | txq->q.desc = NULL; | 
|  | 2266 | return ret; | 
|  | 2267 | } | 
|  | 2268 |  | 
|  | 2269 | txq->q.in_use = 0; | 
|  | 2270 | txq->q.cidx = 0; | 
|  | 2271 | txq->q.pidx = 0; | 
|  | 2272 | txq->q.stat = (void *)&txq->q.desc[txq->q.size]; | 
|  | 2273 | txq->q.cntxt_id = FW_EQ_ETH_CMD_EQID_GET(be32_to_cpu(rpl.eqid_pkd)); | 
|  | 2274 | txq->q.abs_id = | 
|  | 2275 | FW_EQ_ETH_CMD_PHYSEQID_GET(be32_to_cpu(rpl.physeqid_pkd)); | 
|  | 2276 | txq->txq = devq; | 
|  | 2277 | txq->tso = 0; | 
|  | 2278 | txq->tx_cso = 0; | 
|  | 2279 | txq->vlan_ins = 0; | 
|  | 2280 | txq->q.stops = 0; | 
|  | 2281 | txq->q.restarts = 0; | 
|  | 2282 | txq->mapping_err = 0; | 
|  | 2283 | return 0; | 
|  | 2284 | } | 
|  | 2285 |  | 
|  | 2286 | /* | 
|  | 2287 | * Free the DMA map resources associated with a TX queue. | 
|  | 2288 | */ | 
|  | 2289 | static void free_txq(struct adapter *adapter, struct sge_txq *tq) | 
|  | 2290 | { | 
|  | 2291 | dma_free_coherent(adapter->pdev_dev, | 
|  | 2292 | tq->size * sizeof(*tq->desc) + STAT_LEN, | 
|  | 2293 | tq->desc, tq->phys_addr); | 
|  | 2294 | tq->cntxt_id = 0; | 
|  | 2295 | tq->sdesc = NULL; | 
|  | 2296 | tq->desc = NULL; | 
|  | 2297 | } | 
|  | 2298 |  | 
|  | 2299 | /* | 
|  | 2300 | * Free the resources associated with a response queue (possibly including a | 
|  | 2301 | * free list). | 
|  | 2302 | */ | 
|  | 2303 | static void free_rspq_fl(struct adapter *adapter, struct sge_rspq *rspq, | 
|  | 2304 | struct sge_fl *fl) | 
|  | 2305 | { | 
|  | 2306 | unsigned int flid = fl ? fl->cntxt_id : 0xffff; | 
|  | 2307 |  | 
|  | 2308 | t4vf_iq_free(adapter, FW_IQ_TYPE_FL_INT_CAP, | 
|  | 2309 | rspq->cntxt_id, flid, 0xffff); | 
|  | 2310 | dma_free_coherent(adapter->pdev_dev, (rspq->size + 1) * rspq->iqe_len, | 
|  | 2311 | rspq->desc, rspq->phys_addr); | 
|  | 2312 | netif_napi_del(&rspq->napi); | 
|  | 2313 | rspq->netdev = NULL; | 
|  | 2314 | rspq->cntxt_id = 0; | 
|  | 2315 | rspq->abs_id = 0; | 
|  | 2316 | rspq->desc = NULL; | 
|  | 2317 |  | 
|  | 2318 | if (fl) { | 
|  | 2319 | free_rx_bufs(adapter, fl, fl->avail); | 
|  | 2320 | dma_free_coherent(adapter->pdev_dev, | 
|  | 2321 | fl->size * sizeof(*fl->desc) + STAT_LEN, | 
|  | 2322 | fl->desc, fl->addr); | 
|  | 2323 | kfree(fl->sdesc); | 
|  | 2324 | fl->sdesc = NULL; | 
|  | 2325 | fl->cntxt_id = 0; | 
|  | 2326 | fl->desc = NULL; | 
|  | 2327 | } | 
|  | 2328 | } | 
|  | 2329 |  | 
|  | 2330 | /** | 
|  | 2331 | *	t4vf_free_sge_resources - free SGE resources | 
|  | 2332 | *	@adapter: the adapter | 
|  | 2333 | * | 
|  | 2334 | *	Frees resources used by the SGE queue sets. | 
|  | 2335 | */ | 
|  | 2336 | void t4vf_free_sge_resources(struct adapter *adapter) | 
|  | 2337 | { | 
|  | 2338 | struct sge *s = &adapter->sge; | 
|  | 2339 | struct sge_eth_rxq *rxq = s->ethrxq; | 
|  | 2340 | struct sge_eth_txq *txq = s->ethtxq; | 
|  | 2341 | struct sge_rspq *evtq = &s->fw_evtq; | 
|  | 2342 | struct sge_rspq *intrq = &s->intrq; | 
|  | 2343 | int qs; | 
|  | 2344 |  | 
|  | 2345 | for (qs = 0; qs < adapter->sge.ethqsets; qs++) { | 
|  | 2346 | if (rxq->rspq.desc) | 
|  | 2347 | free_rspq_fl(adapter, &rxq->rspq, &rxq->fl); | 
|  | 2348 | if (txq->q.desc) { | 
|  | 2349 | t4vf_eth_eq_free(adapter, txq->q.cntxt_id); | 
|  | 2350 | free_tx_desc(adapter, &txq->q, txq->q.in_use, true); | 
|  | 2351 | kfree(txq->q.sdesc); | 
|  | 2352 | free_txq(adapter, &txq->q); | 
|  | 2353 | } | 
|  | 2354 | } | 
|  | 2355 | if (evtq->desc) | 
|  | 2356 | free_rspq_fl(adapter, evtq, NULL); | 
|  | 2357 | if (intrq->desc) | 
|  | 2358 | free_rspq_fl(adapter, intrq, NULL); | 
|  | 2359 | } | 
|  | 2360 |  | 
|  | 2361 | /** | 
|  | 2362 | *	t4vf_sge_start - enable SGE operation | 
|  | 2363 | *	@adapter: the adapter | 
|  | 2364 | * | 
|  | 2365 | *	Start tasklets and timers associated with the DMA engine. | 
|  | 2366 | */ | 
|  | 2367 | void t4vf_sge_start(struct adapter *adapter) | 
|  | 2368 | { | 
|  | 2369 | adapter->sge.ethtxq_rover = 0; | 
|  | 2370 | mod_timer(&adapter->sge.rx_timer, jiffies + RX_QCHECK_PERIOD); | 
|  | 2371 | mod_timer(&adapter->sge.tx_timer, jiffies + TX_QCHECK_PERIOD); | 
|  | 2372 | } | 
|  | 2373 |  | 
|  | 2374 | /** | 
|  | 2375 | *	t4vf_sge_stop - disable SGE operation | 
|  | 2376 | *	@adapter: the adapter | 
|  | 2377 | * | 
|  | 2378 | *	Stop tasklets and timers associated with the DMA engine.  Note that | 
|  | 2379 | *	this is effective only if measures have been taken to disable any HW | 
|  | 2380 | *	events that may restart them. | 
|  | 2381 | */ | 
|  | 2382 | void t4vf_sge_stop(struct adapter *adapter) | 
|  | 2383 | { | 
|  | 2384 | struct sge *s = &adapter->sge; | 
|  | 2385 |  | 
|  | 2386 | if (s->rx_timer.function) | 
|  | 2387 | del_timer_sync(&s->rx_timer); | 
|  | 2388 | if (s->tx_timer.function) | 
|  | 2389 | del_timer_sync(&s->tx_timer); | 
|  | 2390 | } | 
|  | 2391 |  | 
|  | 2392 | /** | 
|  | 2393 | *	t4vf_sge_init - initialize SGE | 
|  | 2394 | *	@adapter: the adapter | 
|  | 2395 | * | 
|  | 2396 | *	Performs SGE initialization needed every time after a chip reset. | 
|  | 2397 | *	We do not initialize any of the queue sets here, instead the driver | 
|  | 2398 | *	top-level must request those individually.  We also do not enable DMA | 
|  | 2399 | *	here, that should be done after the queues have been set up. | 
|  | 2400 | */ | 
|  | 2401 | int t4vf_sge_init(struct adapter *adapter) | 
|  | 2402 | { | 
|  | 2403 | struct sge_params *sge_params = &adapter->params.sge; | 
|  | 2404 | u32 fl0 = sge_params->sge_fl_buffer_size[0]; | 
|  | 2405 | u32 fl1 = sge_params->sge_fl_buffer_size[1]; | 
|  | 2406 | struct sge *s = &adapter->sge; | 
|  | 2407 |  | 
|  | 2408 | /* | 
|  | 2409 | * Start by vetting the basic SGE parameters which have been set up by | 
|  | 2410 | * the Physical Function Driver.  Ideally we should be able to deal | 
|  | 2411 | * with _any_ configuration.  Practice is different ... | 
|  | 2412 | */ | 
|  | 2413 | if (fl0 != PAGE_SIZE || (fl1 != 0 && fl1 <= fl0)) { | 
|  | 2414 | dev_err(adapter->pdev_dev, "bad SGE FL buffer sizes [%d, %d]\n", | 
|  | 2415 | fl0, fl1); | 
|  | 2416 | return -EINVAL; | 
|  | 2417 | } | 
|  | 2418 | if ((sge_params->sge_control & RXPKTCPLMODE) == 0) { | 
|  | 2419 | dev_err(adapter->pdev_dev, "bad SGE CPL MODE\n"); | 
|  | 2420 | return -EINVAL; | 
|  | 2421 | } | 
|  | 2422 |  | 
|  | 2423 | /* | 
|  | 2424 | * Now translate the adapter parameters into our internal forms. | 
|  | 2425 | */ | 
|  | 2426 | if (fl1) | 
|  | 2427 | FL_PG_ORDER = ilog2(fl1) - PAGE_SHIFT; | 
|  | 2428 | STAT_LEN = ((sge_params->sge_control & EGRSTATUSPAGESIZE) ? 128 : 64); | 
|  | 2429 | PKTSHIFT = PKTSHIFT_GET(sge_params->sge_control); | 
|  | 2430 | FL_ALIGN = 1 << (INGPADBOUNDARY_GET(sge_params->sge_control) + | 
| Casey Leedom | b3003be | 2010-06-29 12:54:12 +0000 | [diff] [blame] | 2431 | SGE_INGPADBOUNDARY_SHIFT); | 
| Casey Leedom | c6e0d91 | 2010-06-25 12:13:28 +0000 | [diff] [blame] | 2432 |  | 
|  | 2433 | /* | 
|  | 2434 | * Set up tasklet timers. | 
|  | 2435 | */ | 
|  | 2436 | setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adapter); | 
|  | 2437 | setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adapter); | 
|  | 2438 |  | 
|  | 2439 | /* | 
|  | 2440 | * Initialize Forwarded Interrupt Queue lock. | 
|  | 2441 | */ | 
|  | 2442 | spin_lock_init(&s->intrq_lock); | 
|  | 2443 |  | 
|  | 2444 | return 0; | 
|  | 2445 | } |