blob: 6d94d58d4287c31f976d30dd58f73aa805764d6a [file] [log] [blame]
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001/*
2 * Copyright 2012 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/module.h>
16#include <linux/init.h>
17#include <linux/moduleparam.h>
18#include <linux/sched.h>
19#include <linux/kernel.h> /* printk() */
20#include <linux/slab.h> /* kmalloc() */
21#include <linux/errno.h> /* error codes */
22#include <linux/types.h> /* size_t */
23#include <linux/interrupt.h>
24#include <linux/in.h>
25#include <linux/irq.h>
26#include <linux/netdevice.h> /* struct device, and other headers */
27#include <linux/etherdevice.h> /* eth_type_trans */
28#include <linux/skbuff.h>
29#include <linux/ioctl.h>
30#include <linux/cdev.h>
31#include <linux/hugetlb.h>
32#include <linux/in6.h>
33#include <linux/timer.h>
34#include <linux/hrtimer.h>
35#include <linux/ktime.h>
36#include <linux/io.h>
37#include <linux/ctype.h>
38#include <linux/ip.h>
Chris Metcalf2c7d04a2013-08-01 11:36:42 -040039#include <linux/ipv6.h>
Chris Metcalfe3d62d72012-06-07 10:45:02 +000040#include <linux/tcp.h>
41
42#include <asm/checksum.h>
43#include <asm/homecache.h>
44#include <gxio/mpipe.h>
45#include <arch/sim.h>
46
47/* Default transmit lockup timeout period, in jiffies. */
48#define TILE_NET_TIMEOUT (5 * HZ)
49
50/* The maximum number of distinct channels (idesc.channel is 5 bits). */
51#define TILE_NET_CHANNELS 32
52
53/* Maximum number of idescs to handle per "poll". */
54#define TILE_NET_BATCH 128
55
56/* Maximum number of packets to handle per "poll". */
57#define TILE_NET_WEIGHT 64
58
59/* Number of entries in each iqueue. */
60#define IQUEUE_ENTRIES 512
61
62/* Number of entries in each equeue. */
63#define EQUEUE_ENTRIES 2048
64
65/* Total header bytes per equeue slot. Must be big enough for 2 bytes
66 * of NET_IP_ALIGN alignment, plus 14 bytes (?) of L2 header, plus up to
67 * 60 bytes of actual TCP header. We round up to align to cache lines.
68 */
69#define HEADER_BYTES 128
70
71/* Maximum completions per cpu per device (must be a power of two).
72 * ISSUE: What is the right number here? If this is too small, then
73 * egress might block waiting for free space in a completions array.
74 * ISSUE: At the least, allocate these only for initialized echannels.
75 */
76#define TILE_NET_MAX_COMPS 64
77
78#define MAX_FRAGS (MAX_SKB_FRAGS + 1)
79
Chris Metcalf2628e8a2013-08-01 11:36:42 -040080/* The "kinds" of buffer stacks (small/large/jumbo). */
81#define MAX_KINDS 3
82
Chris Metcalfe3d62d72012-06-07 10:45:02 +000083/* Size of completions data to allocate.
84 * ISSUE: Probably more than needed since we don't use all the channels.
85 */
86#define COMPS_SIZE (TILE_NET_CHANNELS * sizeof(struct tile_net_comps))
87
88/* Size of NotifRing data to allocate. */
89#define NOTIF_RING_SIZE (IQUEUE_ENTRIES * sizeof(gxio_mpipe_idesc_t))
90
91/* Timeout to wake the per-device TX timer after we stop the queue.
92 * We don't want the timeout too short (adds overhead, and might end
93 * up causing stop/wake/stop/wake cycles) or too long (affects performance).
94 * For the 10 Gb NIC, 30 usec means roughly 30+ 1500-byte packets.
95 */
96#define TX_TIMER_DELAY_USEC 30
97
98/* Timeout to wake the per-cpu egress timer to free completions. */
99#define EGRESS_TIMER_DELAY_USEC 1000
100
101MODULE_AUTHOR("Tilera Corporation");
102MODULE_LICENSE("GPL");
103
104/* A "packet fragment" (a chunk of memory). */
105struct frag {
106 void *buf;
107 size_t length;
108};
109
110/* A single completion. */
111struct tile_net_comp {
112 /* The "complete_count" when the completion will be complete. */
113 s64 when;
114 /* The buffer to be freed when the completion is complete. */
115 struct sk_buff *skb;
116};
117
118/* The completions for a given cpu and echannel. */
119struct tile_net_comps {
120 /* The completions. */
121 struct tile_net_comp comp_queue[TILE_NET_MAX_COMPS];
122 /* The number of completions used. */
123 unsigned long comp_next;
124 /* The number of completions freed. */
125 unsigned long comp_last;
126};
127
128/* The transmit wake timer for a given cpu and echannel. */
129struct tile_net_tx_wake {
Chris Metcalf9b4c3412012-07-01 14:43:47 -0400130 int tx_queue_idx;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000131 struct hrtimer timer;
132 struct net_device *dev;
133};
134
135/* Info for a specific cpu. */
136struct tile_net_info {
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000137 /* Our cpu. */
138 int my_cpu;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000139 /* A timer for handling egress completions. */
140 struct hrtimer egress_timer;
141 /* True if "egress_timer" is scheduled. */
142 bool egress_timer_scheduled;
Chris Metcalff3286a32013-08-01 11:36:42 -0400143 struct info_mpipe {
144 /* Packet queue. */
145 gxio_mpipe_iqueue_t iqueue;
146 /* The NAPI struct. */
147 struct napi_struct napi;
148 /* Number of buffers (by kind) which must still be provided. */
149 unsigned int num_needed_buffers[MAX_KINDS];
150 /* instance id. */
151 int instance;
152 /* True if iqueue is valid. */
153 bool has_iqueue;
154 /* NAPI flags. */
155 bool napi_added;
156 bool napi_enabled;
157 /* Comps for each egress channel. */
158 struct tile_net_comps *comps_for_echannel[TILE_NET_CHANNELS];
159 /* Transmit wake timer for each egress channel. */
160 struct tile_net_tx_wake tx_wake[TILE_NET_CHANNELS];
161 } mpipe[NR_MPIPE_MAX];
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000162};
163
164/* Info for egress on a particular egress channel. */
165struct tile_net_egress {
166 /* The "equeue". */
167 gxio_mpipe_equeue_t *equeue;
168 /* The headers for TSO. */
169 unsigned char *headers;
170};
171
172/* Info for a specific device. */
173struct tile_net_priv {
174 /* Our network device. */
175 struct net_device *dev;
176 /* The primary link. */
177 gxio_mpipe_link_t link;
178 /* The primary channel, if open, else -1. */
179 int channel;
180 /* The "loopify" egress link, if needed. */
181 gxio_mpipe_link_t loopify_link;
182 /* The "loopify" egress channel, if open, else -1. */
183 int loopify_channel;
184 /* The egress channel (channel or loopify_channel). */
185 int echannel;
Chris Metcalff3286a32013-08-01 11:36:42 -0400186 /* mPIPE instance, 0 or 1. */
187 int instance;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000188};
189
Chris Metcalff3286a32013-08-01 11:36:42 -0400190static struct mpipe_data {
191 /* The ingress irq. */
192 int ingress_irq;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000193
Chris Metcalff3286a32013-08-01 11:36:42 -0400194 /* The "context" for all devices. */
195 gxio_mpipe_context_t context;
196
197 /* Egress info, indexed by "priv->echannel"
198 * (lazily created as needed).
199 */
200 struct tile_net_egress
201 egress_for_echannel[TILE_NET_CHANNELS];
202
203 /* Devices currently associated with each channel.
204 * NOTE: The array entry can become NULL after ifconfig down, but
205 * we do not free the underlying net_device structures, so it is
206 * safe to use a pointer after reading it from this array.
207 */
208 struct net_device
209 *tile_net_devs_for_channel[TILE_NET_CHANNELS];
210
211 /* The actual memory allocated for the buffer stacks. */
212 void *buffer_stack_vas[MAX_KINDS];
213
214 /* The amount of memory allocated for each buffer stack. */
215 size_t buffer_stack_bytes[MAX_KINDS];
216
217 /* The first buffer stack index
218 * (small = +0, large = +1, jumbo = +2).
219 */
220 int first_buffer_stack;
221
222 /* The buckets. */
223 int first_bucket;
224 int num_buckets;
225
226} mpipe_data[NR_MPIPE_MAX] = {
227 [0 ... (NR_MPIPE_MAX - 1)] {
228 .ingress_irq = -1,
229 .first_buffer_stack = -1,
230 .first_bucket = -1,
231 .num_buckets = 1
232 }
233};
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000234
235/* A mutex for "tile_net_devs_for_channel". */
236static DEFINE_MUTEX(tile_net_devs_for_channel_mutex);
237
238/* The per-cpu info. */
239static DEFINE_PER_CPU(struct tile_net_info, per_cpu_info);
240
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000241
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400242/* The buffer size enums for each buffer stack.
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000243 * See arch/tile/include/gxio/mpipe.h for the set of possible values.
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400244 * We avoid the "10384" size because it can induce "false chaining"
245 * on "cut-through" jumbo packets.
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000246 */
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400247static gxio_mpipe_buffer_size_enum_t buffer_size_enums[MAX_KINDS] = {
248 GXIO_MPIPE_BUFFER_SIZE_128,
249 GXIO_MPIPE_BUFFER_SIZE_1664,
250 GXIO_MPIPE_BUFFER_SIZE_16384
251};
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000252
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000253/* Text value of tile_net.cpus if passed as a module parameter. */
254static char *network_cpus_string;
255
256/* The actual cpus in "network_cpus". */
257static struct cpumask network_cpus_map;
258
259/* If "loopify=LINK" was specified, this is "LINK". */
260static char *loopify_link_name;
261
262/* If "tile_net.custom" was specified, this is non-NULL. */
263static char *custom_str;
264
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400265/* If "tile_net.jumbo=NUM" was specified, this is "NUM". */
266static uint jumbo_num;
267
Chris Metcalff3286a32013-08-01 11:36:42 -0400268/* Obtain mpipe instance from struct tile_net_priv given struct net_device. */
269static inline int mpipe_instance(struct net_device *dev)
270{
271 struct tile_net_priv *priv = netdev_priv(dev);
272 return priv->instance;
273}
274
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000275/* The "tile_net.cpus" argument specifies the cpus that are dedicated
276 * to handle ingress packets.
277 *
278 * The parameter should be in the form "tile_net.cpus=m-n[,x-y]", where
279 * m, n, x, y are integer numbers that represent the cpus that can be
280 * neither a dedicated cpu nor a dataplane cpu.
281 */
282static bool network_cpus_init(void)
283{
284 char buf[1024];
285 int rc;
286
287 if (network_cpus_string == NULL)
288 return false;
289
290 rc = cpulist_parse_crop(network_cpus_string, &network_cpus_map);
291 if (rc != 0) {
292 pr_warn("tile_net.cpus=%s: malformed cpu list\n",
293 network_cpus_string);
294 return false;
295 }
296
297 /* Remove dedicated cpus. */
298 cpumask_and(&network_cpus_map, &network_cpus_map, cpu_possible_mask);
299
300 if (cpumask_empty(&network_cpus_map)) {
301 pr_warn("Ignoring empty tile_net.cpus='%s'.\n",
302 network_cpus_string);
303 return false;
304 }
305
306 cpulist_scnprintf(buf, sizeof(buf), &network_cpus_map);
307 pr_info("Linux network CPUs: %s\n", buf);
308 return true;
309}
310
311module_param_named(cpus, network_cpus_string, charp, 0444);
312MODULE_PARM_DESC(cpus, "cpulist of cores that handle network interrupts");
313
314/* The "tile_net.loopify=LINK" argument causes the named device to
315 * actually use "loop0" for ingress, and "loop1" for egress. This
316 * allows an app to sit between the actual link and linux, passing
317 * (some) packets along to linux, and forwarding (some) packets sent
318 * out by linux.
319 */
320module_param_named(loopify, loopify_link_name, charp, 0444);
321MODULE_PARM_DESC(loopify, "name the device to use loop0/1 for ingress/egress");
322
323/* The "tile_net.custom" argument causes us to ignore the "conventional"
324 * classifier metadata, in particular, the "l2_offset".
325 */
326module_param_named(custom, custom_str, charp, 0444);
327MODULE_PARM_DESC(custom, "indicates a (heavily) customized classifier");
328
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400329/* The "tile_net.jumbo" argument causes us to support "jumbo" packets,
330 * and to allocate the given number of "jumbo" buffers.
331 */
332module_param_named(jumbo, jumbo_num, uint, 0444);
333MODULE_PARM_DESC(jumbo, "the number of buffers to support jumbo packets");
334
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000335/* Atomically update a statistics field.
336 * Note that on TILE-Gx, this operation is fire-and-forget on the
337 * issuing core (single-cycle dispatch) and takes only a few cycles
338 * longer than a regular store when the request reaches the home cache.
339 * No expensive bus management overhead is required.
340 */
341static void tile_net_stats_add(unsigned long value, unsigned long *field)
342{
343 BUILD_BUG_ON(sizeof(atomic_long_t) != sizeof(unsigned long));
344 atomic_long_add(value, (atomic_long_t *)field);
345}
346
347/* Allocate and push a buffer. */
Chris Metcalff3286a32013-08-01 11:36:42 -0400348static bool tile_net_provide_buffer(int instance, int kind)
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000349{
Chris Metcalff3286a32013-08-01 11:36:42 -0400350 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400351 gxio_mpipe_buffer_size_enum_t bse = buffer_size_enums[kind];
352 size_t bs = gxio_mpipe_buffer_size_enum_to_buffer_size(bse);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000353 const unsigned long buffer_alignment = 128;
354 struct sk_buff *skb;
355 int len;
356
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400357 len = sizeof(struct sk_buff **) + buffer_alignment + bs;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000358 skb = dev_alloc_skb(len);
359 if (skb == NULL)
360 return false;
361
362 /* Make room for a back-pointer to 'skb' and guarantee alignment. */
363 skb_reserve(skb, sizeof(struct sk_buff **));
364 skb_reserve(skb, -(long)skb->data & (buffer_alignment - 1));
365
366 /* Save a back-pointer to 'skb'. */
367 *(struct sk_buff **)(skb->data - sizeof(struct sk_buff **)) = skb;
368
369 /* Make sure "skb" and the back-pointer have been flushed. */
370 wmb();
371
Chris Metcalff3286a32013-08-01 11:36:42 -0400372 gxio_mpipe_push_buffer(&md->context, md->first_buffer_stack + kind,
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000373 (void *)va_to_tile_io_addr(skb->data));
374
375 return true;
376}
377
378/* Convert a raw mpipe buffer to its matching skb pointer. */
379static struct sk_buff *mpipe_buf_to_skb(void *va)
380{
381 /* Acquire the associated "skb". */
382 struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
383 struct sk_buff *skb = *skb_ptr;
384
385 /* Paranoia. */
386 if (skb->data != va) {
387 /* Panic here since there's a reasonable chance
388 * that corrupt buffers means generic memory
389 * corruption, with unpredictable system effects.
390 */
391 panic("Corrupt linux buffer! va=%p, skb=%p, skb->data=%p",
392 va, skb, skb->data);
393 }
394
395 return skb;
396}
397
Chris Metcalff3286a32013-08-01 11:36:42 -0400398static void tile_net_pop_all_buffers(int instance, int stack)
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000399{
Chris Metcalff3286a32013-08-01 11:36:42 -0400400 struct mpipe_data *md = &mpipe_data[instance];
401
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000402 for (;;) {
403 tile_io_addr_t addr =
Chris Metcalff3286a32013-08-01 11:36:42 -0400404 (tile_io_addr_t)gxio_mpipe_pop_buffer(&md->context,
405 stack);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000406 if (addr == 0)
407 break;
408 dev_kfree_skb_irq(mpipe_buf_to_skb(tile_io_addr_to_va(addr)));
409 }
410}
411
412/* Provide linux buffers to mPIPE. */
413static void tile_net_provide_needed_buffers(void)
414{
415 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
Chris Metcalff3286a32013-08-01 11:36:42 -0400416 int instance, kind;
417 for (instance = 0; instance < NR_MPIPE_MAX &&
418 info->mpipe[instance].has_iqueue; instance++) {
419 for (kind = 0; kind < MAX_KINDS; kind++) {
420 while (info->mpipe[instance].num_needed_buffers[kind]
421 != 0) {
422 if (!tile_net_provide_buffer(instance, kind)) {
423 pr_notice("Tile %d still needs"
424 " some buffers\n",
425 info->my_cpu);
426 return;
427 }
428 info->mpipe[instance].
429 num_needed_buffers[kind]--;
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400430 }
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400431 }
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000432 }
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000433}
434
435static inline bool filter_packet(struct net_device *dev, void *buf)
436{
437 /* Filter packets received before we're up. */
438 if (dev == NULL || !(dev->flags & IFF_UP))
439 return true;
440
441 /* Filter out packets that aren't for us. */
442 if (!(dev->flags & IFF_PROMISC) &&
443 !is_multicast_ether_addr(buf) &&
444 compare_ether_addr(dev->dev_addr, buf) != 0)
445 return true;
446
447 return false;
448}
449
450static void tile_net_receive_skb(struct net_device *dev, struct sk_buff *skb,
451 gxio_mpipe_idesc_t *idesc, unsigned long len)
452{
453 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
Chris Metcalff3286a32013-08-01 11:36:42 -0400454 int instance = mpipe_instance(dev);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000455
456 /* Encode the actual packet length. */
457 skb_put(skb, len);
458
459 skb->protocol = eth_type_trans(skb, dev);
460
461 /* Acknowledge "good" hardware checksums. */
462 if (idesc->cs && idesc->csum_seed_val == 0xFFFF)
463 skb->ip_summed = CHECKSUM_UNNECESSARY;
464
Chris Metcalff3286a32013-08-01 11:36:42 -0400465 napi_gro_receive(&info->mpipe[instance].napi, skb);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000466
467 /* Update stats. */
Chris Metcalfad018182013-08-01 11:36:42 -0400468 tile_net_stats_add(1, &dev->stats.rx_packets);
469 tile_net_stats_add(len, &dev->stats.rx_bytes);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000470
471 /* Need a new buffer. */
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400472 if (idesc->size == buffer_size_enums[0])
Chris Metcalff3286a32013-08-01 11:36:42 -0400473 info->mpipe[instance].num_needed_buffers[0]++;
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400474 else if (idesc->size == buffer_size_enums[1])
Chris Metcalff3286a32013-08-01 11:36:42 -0400475 info->mpipe[instance].num_needed_buffers[1]++;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000476 else
Chris Metcalff3286a32013-08-01 11:36:42 -0400477 info->mpipe[instance].num_needed_buffers[2]++;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000478}
479
480/* Handle a packet. Return true if "processed", false if "filtered". */
Chris Metcalff3286a32013-08-01 11:36:42 -0400481static bool tile_net_handle_packet(int instance, gxio_mpipe_idesc_t *idesc)
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000482{
483 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
Chris Metcalff3286a32013-08-01 11:36:42 -0400484 struct mpipe_data *md = &mpipe_data[instance];
485 struct net_device *dev = md->tile_net_devs_for_channel[idesc->channel];
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000486 uint8_t l2_offset;
487 void *va;
488 void *buf;
489 unsigned long len;
490 bool filter;
491
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400492 /* Drop packets for which no buffer was available (which can
493 * happen under heavy load), or for which the me/tr/ce flags
494 * are set (which can happen for jumbo cut-through packets,
495 * or with a customized classifier).
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000496 */
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400497 if (idesc->be || idesc->me || idesc->tr || idesc->ce) {
498 if (dev)
Chris Metcalfad018182013-08-01 11:36:42 -0400499 tile_net_stats_add(1, &dev->stats.rx_errors);
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400500 goto drop;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000501 }
502
503 /* Get the "l2_offset", if allowed. */
504 l2_offset = custom_str ? 0 : gxio_mpipe_idesc_get_l2_offset(idesc);
505
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400506 /* Get the VA (including NET_IP_ALIGN bytes of "headroom"). */
507 va = tile_io_addr_to_va((unsigned long)idesc->va);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000508
509 /* Get the actual packet start/length. */
510 buf = va + l2_offset;
511 len = idesc->l2_size - l2_offset;
512
513 /* Point "va" at the raw buffer. */
514 va -= NET_IP_ALIGN;
515
516 filter = filter_packet(dev, buf);
517 if (filter) {
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400518 if (dev)
Chris Metcalfad018182013-08-01 11:36:42 -0400519 tile_net_stats_add(1, &dev->stats.rx_dropped);
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400520drop:
Chris Metcalff3286a32013-08-01 11:36:42 -0400521 gxio_mpipe_iqueue_drop(&info->mpipe[instance].iqueue, idesc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000522 } else {
523 struct sk_buff *skb = mpipe_buf_to_skb(va);
524
525 /* Skip headroom, and any custom header. */
526 skb_reserve(skb, NET_IP_ALIGN + l2_offset);
527
528 tile_net_receive_skb(dev, skb, idesc, len);
529 }
530
Chris Metcalff3286a32013-08-01 11:36:42 -0400531 gxio_mpipe_iqueue_consume(&info->mpipe[instance].iqueue, idesc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000532 return !filter;
533}
534
535/* Handle some packets for the current CPU.
536 *
537 * This function handles up to TILE_NET_BATCH idescs per call.
538 *
539 * ISSUE: Since we do not provide new buffers until this function is
540 * complete, we must initially provide enough buffers for each network
541 * cpu to fill its iqueue and also its batched idescs.
542 *
543 * ISSUE: The "rotting packet" race condition occurs if a packet
544 * arrives after the queue appears to be empty, and before the
545 * hypervisor interrupt is re-enabled.
546 */
547static int tile_net_poll(struct napi_struct *napi, int budget)
548{
549 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
550 unsigned int work = 0;
551 gxio_mpipe_idesc_t *idesc;
Chris Metcalff3286a32013-08-01 11:36:42 -0400552 int instance, i, n;
553 struct mpipe_data *md;
554 struct info_mpipe *info_mpipe =
555 container_of(napi, struct info_mpipe, napi);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000556
Chris Metcalff3286a32013-08-01 11:36:42 -0400557 instance = info_mpipe->instance;
558 while ((n = gxio_mpipe_iqueue_try_peek(
559 &info_mpipe->iqueue,
560 &idesc)) > 0) {
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000561 for (i = 0; i < n; i++) {
562 if (i == TILE_NET_BATCH)
563 goto done;
Chris Metcalff3286a32013-08-01 11:36:42 -0400564 if (tile_net_handle_packet(instance,
565 idesc + i)) {
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000566 if (++work >= budget)
567 goto done;
568 }
569 }
570 }
571
572 /* There are no packets left. */
Chris Metcalff3286a32013-08-01 11:36:42 -0400573 napi_complete(&info_mpipe->napi);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000574
Chris Metcalff3286a32013-08-01 11:36:42 -0400575 md = &mpipe_data[instance];
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000576 /* Re-enable hypervisor interrupts. */
Chris Metcalff3286a32013-08-01 11:36:42 -0400577 gxio_mpipe_enable_notif_ring_interrupt(
578 &md->context, info->mpipe[instance].iqueue.ring);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000579
580 /* HACK: Avoid the "rotting packet" problem. */
Chris Metcalff3286a32013-08-01 11:36:42 -0400581 if (gxio_mpipe_iqueue_try_peek(&info_mpipe->iqueue, &idesc) > 0)
582 napi_schedule(&info_mpipe->napi);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000583
584 /* ISSUE: Handle completions? */
585
586done:
587 tile_net_provide_needed_buffers();
588
589 return work;
590}
591
Chris Metcalff3286a32013-08-01 11:36:42 -0400592/* Handle an ingress interrupt from an instance on the current cpu. */
593static irqreturn_t tile_net_handle_ingress_irq(int irq, void *id)
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000594{
595 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
Chris Metcalff3286a32013-08-01 11:36:42 -0400596 napi_schedule(&info->mpipe[(uint64_t)id].napi);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000597 return IRQ_HANDLED;
598}
599
600/* Free some completions. This must be called with interrupts blocked. */
601static int tile_net_free_comps(gxio_mpipe_equeue_t *equeue,
602 struct tile_net_comps *comps,
603 int limit, bool force_update)
604{
605 int n = 0;
606 while (comps->comp_last < comps->comp_next) {
607 unsigned int cid = comps->comp_last % TILE_NET_MAX_COMPS;
608 struct tile_net_comp *comp = &comps->comp_queue[cid];
609 if (!gxio_mpipe_equeue_is_complete(equeue, comp->when,
610 force_update || n == 0))
611 break;
612 dev_kfree_skb_irq(comp->skb);
613 comps->comp_last++;
614 if (++n == limit)
615 break;
616 }
617 return n;
618}
619
620/* Add a completion. This must be called with interrupts blocked.
621 * tile_net_equeue_try_reserve() will have ensured a free completion entry.
622 */
623static void add_comp(gxio_mpipe_equeue_t *equeue,
624 struct tile_net_comps *comps,
625 uint64_t when, struct sk_buff *skb)
626{
627 int cid = comps->comp_next % TILE_NET_MAX_COMPS;
628 comps->comp_queue[cid].when = when;
629 comps->comp_queue[cid].skb = skb;
630 comps->comp_next++;
631}
632
Chris Metcalf9b4c3412012-07-01 14:43:47 -0400633static void tile_net_schedule_tx_wake_timer(struct net_device *dev,
634 int tx_queue_idx)
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000635{
Chris Metcalf9b4c3412012-07-01 14:43:47 -0400636 struct tile_net_info *info = &per_cpu(per_cpu_info, tx_queue_idx);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000637 struct tile_net_priv *priv = netdev_priv(dev);
Chris Metcalff3286a32013-08-01 11:36:42 -0400638 int instance = priv->instance;
639 struct tile_net_tx_wake *tx_wake =
640 &info->mpipe[instance].tx_wake[priv->echannel];
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000641
Chris Metcalf9b4c3412012-07-01 14:43:47 -0400642 hrtimer_start(&tx_wake->timer,
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000643 ktime_set(0, TX_TIMER_DELAY_USEC * 1000UL),
644 HRTIMER_MODE_REL_PINNED);
645}
646
647static enum hrtimer_restart tile_net_handle_tx_wake_timer(struct hrtimer *t)
648{
649 struct tile_net_tx_wake *tx_wake =
650 container_of(t, struct tile_net_tx_wake, timer);
Chris Metcalf9b4c3412012-07-01 14:43:47 -0400651 netif_wake_subqueue(tx_wake->dev, tx_wake->tx_queue_idx);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000652 return HRTIMER_NORESTART;
653}
654
655/* Make sure the egress timer is scheduled. */
656static void tile_net_schedule_egress_timer(void)
657{
658 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
659
660 if (!info->egress_timer_scheduled) {
661 hrtimer_start(&info->egress_timer,
662 ktime_set(0, EGRESS_TIMER_DELAY_USEC * 1000UL),
663 HRTIMER_MODE_REL_PINNED);
664 info->egress_timer_scheduled = true;
665 }
666}
667
668/* The "function" for "info->egress_timer".
669 *
670 * This timer will reschedule itself as long as there are any pending
671 * completions expected for this tile.
672 */
673static enum hrtimer_restart tile_net_handle_egress_timer(struct hrtimer *t)
674{
675 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
676 unsigned long irqflags;
677 bool pending = false;
Chris Metcalff3286a32013-08-01 11:36:42 -0400678 int i, instance;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000679
680 local_irq_save(irqflags);
681
682 /* The timer is no longer scheduled. */
683 info->egress_timer_scheduled = false;
684
685 /* Free all possible comps for this tile. */
Chris Metcalff3286a32013-08-01 11:36:42 -0400686 for (instance = 0; instance < NR_MPIPE_MAX &&
687 info->mpipe[instance].has_iqueue; instance++) {
688 for (i = 0; i < TILE_NET_CHANNELS; i++) {
689 struct tile_net_egress *egress =
690 &mpipe_data[instance].egress_for_echannel[i];
691 struct tile_net_comps *comps =
692 info->mpipe[instance].comps_for_echannel[i];
693 if (!egress || comps->comp_last >= comps->comp_next)
694 continue;
695 tile_net_free_comps(egress->equeue, comps, -1, true);
696 pending = pending ||
697 (comps->comp_last < comps->comp_next);
698 }
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000699 }
700
701 /* Reschedule timer if needed. */
702 if (pending)
703 tile_net_schedule_egress_timer();
704
705 local_irq_restore(irqflags);
706
707 return HRTIMER_NORESTART;
708}
709
Chris Metcalff3286a32013-08-01 11:36:42 -0400710/* Helper functions for "tile_net_update()". */
711static void enable_ingress_irq(void *irq)
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000712{
Chris Metcalff3286a32013-08-01 11:36:42 -0400713 enable_percpu_irq((long)irq, 0);
714}
715
716static void disable_ingress_irq(void *irq)
717{
718 disable_percpu_irq((long)irq);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000719}
720
721/* Helper function for tile_net_open() and tile_net_stop().
722 * Always called under tile_net_devs_for_channel_mutex.
723 */
724static int tile_net_update(struct net_device *dev)
725{
726 static gxio_mpipe_rules_t rules; /* too big to fit on the stack */
727 bool saw_channel = false;
Chris Metcalff3286a32013-08-01 11:36:42 -0400728 int instance = mpipe_instance(dev);
729 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000730 int channel;
731 int rc;
732 int cpu;
733
Chris Metcalff3286a32013-08-01 11:36:42 -0400734 saw_channel = false;
735 gxio_mpipe_rules_init(&rules, &md->context);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000736
737 for (channel = 0; channel < TILE_NET_CHANNELS; channel++) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400738 if (md->tile_net_devs_for_channel[channel] == NULL)
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000739 continue;
740 if (!saw_channel) {
741 saw_channel = true;
Chris Metcalff3286a32013-08-01 11:36:42 -0400742 gxio_mpipe_rules_begin(&rules, md->first_bucket,
743 md->num_buckets, NULL);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000744 gxio_mpipe_rules_set_headroom(&rules, NET_IP_ALIGN);
745 }
746 gxio_mpipe_rules_add_channel(&rules, channel);
747 }
748
749 /* NOTE: This can fail if there is no classifier.
750 * ISSUE: Can anything else cause it to fail?
751 */
752 rc = gxio_mpipe_rules_commit(&rules);
753 if (rc != 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400754 netdev_warn(dev, "gxio_mpipe_rules_commit: mpipe[%d] %d\n",
755 instance, rc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000756 return -EIO;
757 }
758
Chris Metcalf5e7a54a2013-08-01 11:36:42 -0400759 /* Update all cpus, sequentially (to protect "netif_napi_add()").
760 * We use on_each_cpu to handle the IPI mask or unmask.
761 */
762 if (!saw_channel)
Chris Metcalff3286a32013-08-01 11:36:42 -0400763 on_each_cpu(disable_ingress_irq,
764 (void *)(long)(md->ingress_irq), 1);
Chris Metcalf5e7a54a2013-08-01 11:36:42 -0400765 for_each_online_cpu(cpu) {
766 struct tile_net_info *info = &per_cpu(per_cpu_info, cpu);
Chris Metcalff3286a32013-08-01 11:36:42 -0400767
768 if (!info->mpipe[instance].has_iqueue)
Chris Metcalf5e7a54a2013-08-01 11:36:42 -0400769 continue;
770 if (saw_channel) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400771 if (!info->mpipe[instance].napi_added) {
772 netif_napi_add(dev, &info->mpipe[instance].napi,
Chris Metcalf5e7a54a2013-08-01 11:36:42 -0400773 tile_net_poll, TILE_NET_WEIGHT);
Chris Metcalff3286a32013-08-01 11:36:42 -0400774 info->mpipe[instance].napi_added = true;
Chris Metcalf5e7a54a2013-08-01 11:36:42 -0400775 }
Chris Metcalff3286a32013-08-01 11:36:42 -0400776 if (!info->mpipe[instance].napi_enabled) {
777 napi_enable(&info->mpipe[instance].napi);
778 info->mpipe[instance].napi_enabled = true;
Chris Metcalf5e7a54a2013-08-01 11:36:42 -0400779 }
780 } else {
Chris Metcalff3286a32013-08-01 11:36:42 -0400781 if (info->mpipe[instance].napi_enabled) {
782 napi_disable(&info->mpipe[instance].napi);
783 info->mpipe[instance].napi_enabled = false;
Chris Metcalf5e7a54a2013-08-01 11:36:42 -0400784 }
785 /* FIXME: Drain the iqueue. */
786 }
787 }
788 if (saw_channel)
Chris Metcalff3286a32013-08-01 11:36:42 -0400789 on_each_cpu(enable_ingress_irq,
790 (void *)(long)(md->ingress_irq), 1);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000791
792 /* HACK: Allow packets to flow in the simulator. */
793 if (saw_channel)
Chris Metcalff3286a32013-08-01 11:36:42 -0400794 sim_enable_mpipe_links(instance, -1);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000795
796 return 0;
797}
798
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400799/* Initialize a buffer stack. */
800static int create_buffer_stack(struct net_device *dev,
801 int kind, size_t num_buffers)
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000802{
803 pte_t hash_pte = pte_set_home((pte_t) { 0 }, PAGE_HOME_HASH);
Chris Metcalff3286a32013-08-01 11:36:42 -0400804 int instance = mpipe_instance(dev);
805 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400806 size_t needed = gxio_mpipe_calc_buffer_stack_bytes(num_buffers);
Chris Metcalff3286a32013-08-01 11:36:42 -0400807 int stack_idx = md->first_buffer_stack + kind;
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400808 void *va;
809 int i, rc;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000810
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400811 /* Round up to 64KB and then use alloc_pages() so we get the
812 * required 64KB alignment.
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000813 */
Chris Metcalff3286a32013-08-01 11:36:42 -0400814 md->buffer_stack_bytes[kind] =
815 ALIGN(needed, 64 * 1024);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000816
Chris Metcalff3286a32013-08-01 11:36:42 -0400817 va = alloc_pages_exact(md->buffer_stack_bytes[kind], GFP_KERNEL);
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400818 if (va == NULL) {
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000819 netdev_err(dev,
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400820 "Could not alloc %zd bytes for buffer stack %d\n",
Chris Metcalff3286a32013-08-01 11:36:42 -0400821 md->buffer_stack_bytes[kind], kind);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000822 return -ENOMEM;
823 }
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400824
825 /* Initialize the buffer stack. */
Chris Metcalff3286a32013-08-01 11:36:42 -0400826 rc = gxio_mpipe_init_buffer_stack(&md->context, stack_idx,
827 buffer_size_enums[kind], va,
828 md->buffer_stack_bytes[kind], 0);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000829 if (rc != 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400830 netdev_err(dev, "gxio_mpipe_init_buffer_stack: mpipe[%d] %d\n",
831 instance, rc);
832 free_pages_exact(va, md->buffer_stack_bytes[kind]);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000833 return rc;
834 }
835
Chris Metcalff3286a32013-08-01 11:36:42 -0400836 md->buffer_stack_vas[kind] = va;
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400837
Chris Metcalff3286a32013-08-01 11:36:42 -0400838 rc = gxio_mpipe_register_client_memory(&md->context, stack_idx,
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000839 hash_pte, 0);
840 if (rc != 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400841 netdev_err(dev,
842 "gxio_mpipe_register_client_memory: mpipe[%d] %d\n",
843 instance, rc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000844 return rc;
845 }
846
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400847 /* Provide initial buffers. */
848 for (i = 0; i < num_buffers; i++) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400849 if (!tile_net_provide_buffer(instance, kind)) {
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400850 netdev_err(dev, "Cannot allocate initial sk_bufs!\n");
851 return -ENOMEM;
852 }
853 }
854
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000855 return 0;
856}
857
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400858/* Allocate and initialize mpipe buffer stacks, and register them in
859 * the mPIPE TLBs, for small, large, and (possibly) jumbo packet sizes.
860 * This routine supports tile_net_init_mpipe(), below.
861 */
862static int init_buffer_stacks(struct net_device *dev,
863 int network_cpus_count)
864{
865 int num_kinds = MAX_KINDS - (jumbo_num == 0);
866 size_t num_buffers;
867 int rc;
Chris Metcalff3286a32013-08-01 11:36:42 -0400868 int instance = mpipe_instance(dev);
869 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400870
871 /* Allocate the buffer stacks. */
Chris Metcalff3286a32013-08-01 11:36:42 -0400872 rc = gxio_mpipe_alloc_buffer_stacks(&md->context, num_kinds, 0, 0);
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400873 if (rc < 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400874 netdev_err(dev,
875 "gxio_mpipe_alloc_buffer_stacks: mpipe[%d] %d\n",
876 instance, rc);
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400877 return rc;
878 }
Chris Metcalff3286a32013-08-01 11:36:42 -0400879 md->first_buffer_stack = rc;
Chris Metcalf2628e8a2013-08-01 11:36:42 -0400880
881 /* Enough small/large buffers to (normally) avoid buffer errors. */
882 num_buffers =
883 network_cpus_count * (IQUEUE_ENTRIES + TILE_NET_BATCH);
884
885 /* Allocate the small memory stack. */
886 if (rc >= 0)
887 rc = create_buffer_stack(dev, 0, num_buffers);
888
889 /* Allocate the large buffer stack. */
890 if (rc >= 0)
891 rc = create_buffer_stack(dev, 1, num_buffers);
892
893 /* Allocate the jumbo buffer stack if needed. */
894 if (rc >= 0 && jumbo_num != 0)
895 rc = create_buffer_stack(dev, 2, jumbo_num);
896
897 return rc;
898}
899
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000900/* Allocate per-cpu resources (memory for completions and idescs).
901 * This routine supports tile_net_init_mpipe(), below.
902 */
903static int alloc_percpu_mpipe_resources(struct net_device *dev,
904 int cpu, int ring)
905{
906 struct tile_net_info *info = &per_cpu(per_cpu_info, cpu);
907 int order, i, rc;
Chris Metcalff3286a32013-08-01 11:36:42 -0400908 int instance = mpipe_instance(dev);
909 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000910 struct page *page;
911 void *addr;
912
913 /* Allocate the "comps". */
914 order = get_order(COMPS_SIZE);
915 page = homecache_alloc_pages(GFP_KERNEL, order, cpu);
916 if (page == NULL) {
917 netdev_err(dev, "Failed to alloc %zd bytes comps memory\n",
918 COMPS_SIZE);
919 return -ENOMEM;
920 }
921 addr = pfn_to_kaddr(page_to_pfn(page));
922 memset(addr, 0, COMPS_SIZE);
923 for (i = 0; i < TILE_NET_CHANNELS; i++)
Chris Metcalff3286a32013-08-01 11:36:42 -0400924 info->mpipe[instance].comps_for_echannel[i] =
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000925 addr + i * sizeof(struct tile_net_comps);
926
927 /* If this is a network cpu, create an iqueue. */
928 if (cpu_isset(cpu, network_cpus_map)) {
929 order = get_order(NOTIF_RING_SIZE);
930 page = homecache_alloc_pages(GFP_KERNEL, order, cpu);
931 if (page == NULL) {
932 netdev_err(dev,
933 "Failed to alloc %zd bytes iqueue memory\n",
934 NOTIF_RING_SIZE);
935 return -ENOMEM;
936 }
937 addr = pfn_to_kaddr(page_to_pfn(page));
Chris Metcalff3286a32013-08-01 11:36:42 -0400938 rc = gxio_mpipe_iqueue_init(&info->mpipe[instance].iqueue,
939 &md->context, ring++, addr,
940 NOTIF_RING_SIZE, 0);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000941 if (rc < 0) {
942 netdev_err(dev,
943 "gxio_mpipe_iqueue_init failed: %d\n", rc);
944 return rc;
945 }
Chris Metcalff3286a32013-08-01 11:36:42 -0400946 info->mpipe[instance].has_iqueue = true;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000947 }
948
949 return ring;
950}
951
952/* Initialize NotifGroup and buckets.
953 * This routine supports tile_net_init_mpipe(), below.
954 */
955static int init_notif_group_and_buckets(struct net_device *dev,
956 int ring, int network_cpus_count)
957{
958 int group, rc;
Chris Metcalff3286a32013-08-01 11:36:42 -0400959 int instance = mpipe_instance(dev);
960 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000961
962 /* Allocate one NotifGroup. */
Chris Metcalff3286a32013-08-01 11:36:42 -0400963 rc = gxio_mpipe_alloc_notif_groups(&md->context, 1, 0, 0);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000964 if (rc < 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400965 netdev_err(dev, "gxio_mpipe_alloc_notif_groups: mpipe[%d] %d\n",
966 instance, rc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000967 return rc;
968 }
969 group = rc;
970
971 /* Initialize global num_buckets value. */
972 if (network_cpus_count > 4)
Chris Metcalff3286a32013-08-01 11:36:42 -0400973 md->num_buckets = 256;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000974 else if (network_cpus_count > 1)
Chris Metcalff3286a32013-08-01 11:36:42 -0400975 md->num_buckets = 16;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000976
977 /* Allocate some buckets, and set global first_bucket value. */
Chris Metcalff3286a32013-08-01 11:36:42 -0400978 rc = gxio_mpipe_alloc_buckets(&md->context, md->num_buckets, 0, 0);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000979 if (rc < 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400980 netdev_err(dev, "gxio_mpipe_alloc_buckets: mpipe[%d] %d\n",
981 instance, rc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000982 return rc;
983 }
Chris Metcalff3286a32013-08-01 11:36:42 -0400984 md->first_bucket = rc;
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000985
986 /* Init group and buckets. */
987 rc = gxio_mpipe_init_notif_group_and_buckets(
Chris Metcalff3286a32013-08-01 11:36:42 -0400988 &md->context, group, ring, network_cpus_count,
989 md->first_bucket, md->num_buckets,
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000990 GXIO_MPIPE_BUCKET_STICKY_FLOW_LOCALITY);
991 if (rc != 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -0400992 netdev_err(dev, "gxio_mpipe_init_notif_group_and_buckets: "
993 "mpipe[%d] %d\n", instance, rc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +0000994 return rc;
995 }
996
997 return 0;
998}
999
1000/* Create an irq and register it, then activate the irq and request
1001 * interrupts on all cores. Note that "ingress_irq" being initialized
1002 * is how we know not to call tile_net_init_mpipe() again.
1003 * This routine supports tile_net_init_mpipe(), below.
1004 */
1005static int tile_net_setup_interrupts(struct net_device *dev)
1006{
Chris Metcalff3286a32013-08-01 11:36:42 -04001007 int cpu, rc, irq;
1008 int instance = mpipe_instance(dev);
1009 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001010
Chris Metcalff3286a32013-08-01 11:36:42 -04001011 irq = md->ingress_irq;
1012 if (irq < 0) {
1013 irq = create_irq();
1014 if (irq < 0) {
1015 netdev_err(dev,
1016 "create_irq failed: mpipe[%d] %d\n",
1017 instance, irq);
1018 return irq;
1019 }
1020 tile_irq_activate(irq, TILE_IRQ_PERCPU);
1021
1022 rc = request_irq(irq, tile_net_handle_ingress_irq,
1023 0, "tile_net", (void *)((uint64_t)instance));
1024
1025 if (rc != 0) {
1026 netdev_err(dev, "request_irq failed: mpipe[%d] %d\n",
1027 instance, rc);
1028 destroy_irq(irq);
1029 return rc;
1030 }
1031 md->ingress_irq = irq;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001032 }
1033
1034 for_each_online_cpu(cpu) {
1035 struct tile_net_info *info = &per_cpu(per_cpu_info, cpu);
Chris Metcalff3286a32013-08-01 11:36:42 -04001036 if (info->mpipe[instance].has_iqueue) {
1037 gxio_mpipe_request_notif_ring_interrupt(&md->context,
1038 cpu_x(cpu), cpu_y(cpu), KERNEL_PL, irq,
1039 info->mpipe[instance].iqueue.ring);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001040 }
1041 }
1042
1043 return 0;
1044}
1045
1046/* Undo any state set up partially by a failed call to tile_net_init_mpipe. */
Chris Metcalff3286a32013-08-01 11:36:42 -04001047static void tile_net_init_mpipe_fail(int instance)
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001048{
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001049 int kind, cpu;
Chris Metcalff3286a32013-08-01 11:36:42 -04001050 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001051
1052 /* Do cleanups that require the mpipe context first. */
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001053 for (kind = 0; kind < MAX_KINDS; kind++) {
Chris Metcalff3286a32013-08-01 11:36:42 -04001054 if (md->buffer_stack_vas[kind] != NULL) {
1055 tile_net_pop_all_buffers(instance,
1056 md->first_buffer_stack +
1057 kind);
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001058 }
1059 }
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001060
1061 /* Destroy mpipe context so the hardware no longer owns any memory. */
Chris Metcalff3286a32013-08-01 11:36:42 -04001062 gxio_mpipe_destroy(&md->context);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001063
1064 for_each_online_cpu(cpu) {
1065 struct tile_net_info *info = &per_cpu(per_cpu_info, cpu);
Chris Metcalff3286a32013-08-01 11:36:42 -04001066 free_pages(
1067 (unsigned long)(
1068 info->mpipe[instance].comps_for_echannel[0]),
1069 get_order(COMPS_SIZE));
1070 info->mpipe[instance].comps_for_echannel[0] = NULL;
1071 free_pages((unsigned long)(info->mpipe[instance].iqueue.idescs),
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001072 get_order(NOTIF_RING_SIZE));
Chris Metcalff3286a32013-08-01 11:36:42 -04001073 info->mpipe[instance].iqueue.idescs = NULL;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001074 }
1075
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001076 for (kind = 0; kind < MAX_KINDS; kind++) {
Chris Metcalff3286a32013-08-01 11:36:42 -04001077 if (md->buffer_stack_vas[kind] != NULL) {
1078 free_pages_exact(md->buffer_stack_vas[kind],
1079 md->buffer_stack_bytes[kind]);
1080 md->buffer_stack_vas[kind] = NULL;
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001081 }
1082 }
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001083
Chris Metcalff3286a32013-08-01 11:36:42 -04001084 md->first_buffer_stack = -1;
1085 md->first_bucket = -1;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001086}
1087
1088/* The first time any tilegx network device is opened, we initialize
1089 * the global mpipe state. If this step fails, we fail to open the
1090 * device, but if it succeeds, we never need to do it again, and since
1091 * tile_net can't be unloaded, we never undo it.
1092 *
1093 * Note that some resources in this path (buffer stack indices,
1094 * bindings from init_buffer_stack, etc.) are hypervisor resources
1095 * that are freed implicitly by gxio_mpipe_destroy().
1096 */
1097static int tile_net_init_mpipe(struct net_device *dev)
1098{
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001099 int rc;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001100 int cpu;
1101 int first_ring, ring;
Chris Metcalff3286a32013-08-01 11:36:42 -04001102 int instance = mpipe_instance(dev);
1103 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001104 int network_cpus_count = cpus_weight(network_cpus_map);
1105
1106 if (!hash_default) {
1107 netdev_err(dev, "Networking requires hash_default!\n");
1108 return -EIO;
1109 }
1110
Chris Metcalff3286a32013-08-01 11:36:42 -04001111 rc = gxio_mpipe_init(&md->context, instance);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001112 if (rc != 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -04001113 netdev_err(dev, "gxio_mpipe_init: mpipe[%d] %d\n",
1114 instance, rc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001115 return -EIO;
1116 }
1117
1118 /* Set up the buffer stacks. */
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001119 rc = init_buffer_stacks(dev, network_cpus_count);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001120 if (rc != 0)
1121 goto fail;
1122
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001123 /* Allocate one NotifRing for each network cpu. */
Chris Metcalff3286a32013-08-01 11:36:42 -04001124 rc = gxio_mpipe_alloc_notif_rings(&md->context,
1125 network_cpus_count, 0, 0);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001126 if (rc < 0) {
1127 netdev_err(dev, "gxio_mpipe_alloc_notif_rings failed %d\n",
1128 rc);
1129 goto fail;
1130 }
1131
1132 /* Init NotifRings per-cpu. */
1133 first_ring = rc;
1134 ring = first_ring;
1135 for_each_online_cpu(cpu) {
1136 rc = alloc_percpu_mpipe_resources(dev, cpu, ring);
1137 if (rc < 0)
1138 goto fail;
1139 ring = rc;
1140 }
1141
1142 /* Initialize NotifGroup and buckets. */
1143 rc = init_notif_group_and_buckets(dev, first_ring, network_cpus_count);
1144 if (rc != 0)
1145 goto fail;
1146
1147 /* Create and enable interrupts. */
1148 rc = tile_net_setup_interrupts(dev);
1149 if (rc != 0)
1150 goto fail;
1151
1152 return 0;
1153
1154fail:
Chris Metcalff3286a32013-08-01 11:36:42 -04001155 tile_net_init_mpipe_fail(instance);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001156 return rc;
1157}
1158
1159/* Create persistent egress info for a given egress channel.
1160 * Note that this may be shared between, say, "gbe0" and "xgbe0".
1161 * ISSUE: Defer header allocation until TSO is actually needed?
1162 */
1163static int tile_net_init_egress(struct net_device *dev, int echannel)
1164{
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001165 static int ering = -1;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001166 struct page *headers_page, *edescs_page, *equeue_page;
1167 gxio_mpipe_edesc_t *edescs;
1168 gxio_mpipe_equeue_t *equeue;
1169 unsigned char *headers;
1170 int headers_order, edescs_order, equeue_order;
1171 size_t edescs_size;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001172 int rc = -ENOMEM;
Chris Metcalff3286a32013-08-01 11:36:42 -04001173 int instance = mpipe_instance(dev);
1174 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001175
1176 /* Only initialize once. */
Chris Metcalff3286a32013-08-01 11:36:42 -04001177 if (md->egress_for_echannel[echannel].equeue != NULL)
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001178 return 0;
1179
1180 /* Allocate memory for the "headers". */
1181 headers_order = get_order(EQUEUE_ENTRIES * HEADER_BYTES);
1182 headers_page = alloc_pages(GFP_KERNEL, headers_order);
1183 if (headers_page == NULL) {
1184 netdev_warn(dev,
1185 "Could not alloc %zd bytes for TSO headers.\n",
1186 PAGE_SIZE << headers_order);
1187 goto fail;
1188 }
1189 headers = pfn_to_kaddr(page_to_pfn(headers_page));
1190
1191 /* Allocate memory for the "edescs". */
1192 edescs_size = EQUEUE_ENTRIES * sizeof(*edescs);
1193 edescs_order = get_order(edescs_size);
1194 edescs_page = alloc_pages(GFP_KERNEL, edescs_order);
1195 if (edescs_page == NULL) {
1196 netdev_warn(dev,
1197 "Could not alloc %zd bytes for eDMA ring.\n",
1198 edescs_size);
1199 goto fail_headers;
1200 }
1201 edescs = pfn_to_kaddr(page_to_pfn(edescs_page));
1202
1203 /* Allocate memory for the "equeue". */
1204 equeue_order = get_order(sizeof(*equeue));
1205 equeue_page = alloc_pages(GFP_KERNEL, equeue_order);
1206 if (equeue_page == NULL) {
1207 netdev_warn(dev,
1208 "Could not alloc %zd bytes for equeue info.\n",
1209 PAGE_SIZE << equeue_order);
1210 goto fail_edescs;
1211 }
1212 equeue = pfn_to_kaddr(page_to_pfn(equeue_page));
1213
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001214 /* Allocate an edma ring (using a one entry "free list"). */
1215 if (ering < 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -04001216 rc = gxio_mpipe_alloc_edma_rings(&md->context, 1, 0, 0);
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001217 if (rc < 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -04001218 netdev_warn(dev, "gxio_mpipe_alloc_edma_rings: "
1219 "mpipe[%d] %d\n", instance, rc);
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001220 goto fail_equeue;
1221 }
1222 ering = rc;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001223 }
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001224
1225 /* Initialize the equeue. */
Chris Metcalff3286a32013-08-01 11:36:42 -04001226 rc = gxio_mpipe_equeue_init(equeue, &md->context, ering, echannel,
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001227 edescs, edescs_size, 0);
1228 if (rc != 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -04001229 netdev_err(dev, "gxio_mpipe_equeue_init: mpipe[%d] %d\n",
1230 instance, rc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001231 goto fail_equeue;
1232 }
1233
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001234 /* Don't reuse the ering later. */
1235 ering = -1;
1236
1237 if (jumbo_num != 0) {
1238 /* Make sure "jumbo" packets can be egressed safely. */
1239 if (gxio_mpipe_equeue_set_snf_size(equeue, 10368) < 0) {
1240 /* ISSUE: There is no "gxio_mpipe_equeue_destroy()". */
1241 netdev_warn(dev, "Jumbo packets may not be egressed"
1242 " properly on channel %d\n", echannel);
1243 }
1244 }
1245
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001246 /* Done. */
Chris Metcalff3286a32013-08-01 11:36:42 -04001247 md->egress_for_echannel[echannel].equeue = equeue;
1248 md->egress_for_echannel[echannel].headers = headers;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001249 return 0;
1250
1251fail_equeue:
1252 __free_pages(equeue_page, equeue_order);
1253
1254fail_edescs:
1255 __free_pages(edescs_page, edescs_order);
1256
1257fail_headers:
1258 __free_pages(headers_page, headers_order);
1259
1260fail:
1261 return rc;
1262}
1263
1264/* Return channel number for a newly-opened link. */
1265static int tile_net_link_open(struct net_device *dev, gxio_mpipe_link_t *link,
1266 const char *link_name)
1267{
Chris Metcalff3286a32013-08-01 11:36:42 -04001268 int instance = mpipe_instance(dev);
1269 struct mpipe_data *md = &mpipe_data[instance];
1270 int rc = gxio_mpipe_link_open(link, &md->context, link_name, 0);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001271 if (rc < 0) {
Chris Metcalff3286a32013-08-01 11:36:42 -04001272 netdev_err(dev, "Failed to open '%s', mpipe[%d], %d\n",
1273 link_name, instance, rc);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001274 return rc;
1275 }
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001276 if (jumbo_num != 0) {
1277 u32 attr = GXIO_MPIPE_LINK_RECEIVE_JUMBO;
1278 rc = gxio_mpipe_link_set_attr(link, attr, 1);
1279 if (rc != 0) {
1280 netdev_err(dev,
1281 "Cannot receive jumbo packets on '%s'\n",
1282 link_name);
1283 gxio_mpipe_link_close(link);
1284 return rc;
1285 }
1286 }
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001287 rc = gxio_mpipe_link_channel(link);
1288 if (rc < 0 || rc >= TILE_NET_CHANNELS) {
1289 netdev_err(dev, "gxio_mpipe_link_channel bad value: %d\n", rc);
1290 gxio_mpipe_link_close(link);
1291 return -EINVAL;
1292 }
1293 return rc;
1294}
1295
1296/* Help the kernel activate the given network interface. */
1297static int tile_net_open(struct net_device *dev)
1298{
1299 struct tile_net_priv *priv = netdev_priv(dev);
Chris Metcalff3286a32013-08-01 11:36:42 -04001300 int cpu, rc, instance;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001301
1302 mutex_lock(&tile_net_devs_for_channel_mutex);
1303
Chris Metcalff3286a32013-08-01 11:36:42 -04001304 /* Get the instance info. */
1305 rc = gxio_mpipe_link_instance(dev->name);
1306 if (rc < 0 || rc >= NR_MPIPE_MAX)
1307 return -EIO;
1308
1309 priv->instance = rc;
1310 instance = rc;
1311 if (!mpipe_data[rc].context.mmio_fast_base) {
1312 /* Do one-time initialization per instance the first time
1313 * any device is opened.
1314 */
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001315 rc = tile_net_init_mpipe(dev);
1316 if (rc != 0)
1317 goto fail;
1318 }
1319
1320 /* Determine if this is the "loopify" device. */
1321 if (unlikely((loopify_link_name != NULL) &&
1322 !strcmp(dev->name, loopify_link_name))) {
1323 rc = tile_net_link_open(dev, &priv->link, "loop0");
1324 if (rc < 0)
1325 goto fail;
1326 priv->channel = rc;
1327 rc = tile_net_link_open(dev, &priv->loopify_link, "loop1");
1328 if (rc < 0)
1329 goto fail;
1330 priv->loopify_channel = rc;
1331 priv->echannel = rc;
1332 } else {
1333 rc = tile_net_link_open(dev, &priv->link, dev->name);
1334 if (rc < 0)
1335 goto fail;
1336 priv->channel = rc;
1337 priv->echannel = rc;
1338 }
1339
1340 /* Initialize egress info (if needed). Once ever, per echannel. */
1341 rc = tile_net_init_egress(dev, priv->echannel);
1342 if (rc != 0)
1343 goto fail;
1344
Chris Metcalff3286a32013-08-01 11:36:42 -04001345 mpipe_data[instance].tile_net_devs_for_channel[priv->channel] = dev;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001346
1347 rc = tile_net_update(dev);
1348 if (rc != 0)
1349 goto fail;
1350
1351 mutex_unlock(&tile_net_devs_for_channel_mutex);
1352
1353 /* Initialize the transmit wake timer for this device for each cpu. */
1354 for_each_online_cpu(cpu) {
1355 struct tile_net_info *info = &per_cpu(per_cpu_info, cpu);
1356 struct tile_net_tx_wake *tx_wake =
Chris Metcalff3286a32013-08-01 11:36:42 -04001357 &info->mpipe[instance].tx_wake[priv->echannel];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001358
1359 hrtimer_init(&tx_wake->timer, CLOCK_MONOTONIC,
1360 HRTIMER_MODE_REL);
Chris Metcalf9b4c3412012-07-01 14:43:47 -04001361 tx_wake->tx_queue_idx = cpu;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001362 tx_wake->timer.function = tile_net_handle_tx_wake_timer;
1363 tx_wake->dev = dev;
1364 }
1365
1366 for_each_online_cpu(cpu)
1367 netif_start_subqueue(dev, cpu);
1368 netif_carrier_on(dev);
1369 return 0;
1370
1371fail:
1372 if (priv->loopify_channel >= 0) {
1373 if (gxio_mpipe_link_close(&priv->loopify_link) != 0)
1374 netdev_warn(dev, "Failed to close loopify link!\n");
1375 priv->loopify_channel = -1;
1376 }
1377 if (priv->channel >= 0) {
1378 if (gxio_mpipe_link_close(&priv->link) != 0)
1379 netdev_warn(dev, "Failed to close link!\n");
1380 priv->channel = -1;
1381 }
1382 priv->echannel = -1;
Chris Metcalff3286a32013-08-01 11:36:42 -04001383 mpipe_data[instance].tile_net_devs_for_channel[priv->channel] = NULL;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001384 mutex_unlock(&tile_net_devs_for_channel_mutex);
1385
1386 /* Don't return raw gxio error codes to generic Linux. */
1387 return (rc > -512) ? rc : -EIO;
1388}
1389
1390/* Help the kernel deactivate the given network interface. */
1391static int tile_net_stop(struct net_device *dev)
1392{
1393 struct tile_net_priv *priv = netdev_priv(dev);
1394 int cpu;
Chris Metcalff3286a32013-08-01 11:36:42 -04001395 int instance = priv->instance;
1396 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001397
1398 for_each_online_cpu(cpu) {
1399 struct tile_net_info *info = &per_cpu(per_cpu_info, cpu);
1400 struct tile_net_tx_wake *tx_wake =
Chris Metcalff3286a32013-08-01 11:36:42 -04001401 &info->mpipe[instance].tx_wake[priv->echannel];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001402
1403 hrtimer_cancel(&tx_wake->timer);
1404 netif_stop_subqueue(dev, cpu);
1405 }
1406
1407 mutex_lock(&tile_net_devs_for_channel_mutex);
Chris Metcalff3286a32013-08-01 11:36:42 -04001408 md->tile_net_devs_for_channel[priv->channel] = NULL;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001409 (void)tile_net_update(dev);
1410 if (priv->loopify_channel >= 0) {
1411 if (gxio_mpipe_link_close(&priv->loopify_link) != 0)
1412 netdev_warn(dev, "Failed to close loopify link!\n");
1413 priv->loopify_channel = -1;
1414 }
1415 if (priv->channel >= 0) {
1416 if (gxio_mpipe_link_close(&priv->link) != 0)
1417 netdev_warn(dev, "Failed to close link!\n");
1418 priv->channel = -1;
1419 }
1420 priv->echannel = -1;
1421 mutex_unlock(&tile_net_devs_for_channel_mutex);
1422
1423 return 0;
1424}
1425
1426/* Determine the VA for a fragment. */
1427static inline void *tile_net_frag_buf(skb_frag_t *f)
1428{
1429 unsigned long pfn = page_to_pfn(skb_frag_page(f));
1430 return pfn_to_kaddr(pfn) + f->page_offset;
1431}
1432
1433/* Acquire a completion entry and an egress slot, or if we can't,
1434 * stop the queue and schedule the tx_wake timer.
1435 */
1436static s64 tile_net_equeue_try_reserve(struct net_device *dev,
Chris Metcalf9b4c3412012-07-01 14:43:47 -04001437 int tx_queue_idx,
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001438 struct tile_net_comps *comps,
1439 gxio_mpipe_equeue_t *equeue,
1440 int num_edescs)
1441{
1442 /* Try to acquire a completion entry. */
1443 if (comps->comp_next - comps->comp_last < TILE_NET_MAX_COMPS - 1 ||
1444 tile_net_free_comps(equeue, comps, 32, false) != 0) {
1445
1446 /* Try to acquire an egress slot. */
1447 s64 slot = gxio_mpipe_equeue_try_reserve(equeue, num_edescs);
1448 if (slot >= 0)
1449 return slot;
1450
1451 /* Freeing some completions gives the equeue time to drain. */
1452 tile_net_free_comps(equeue, comps, TILE_NET_MAX_COMPS, false);
1453
1454 slot = gxio_mpipe_equeue_try_reserve(equeue, num_edescs);
1455 if (slot >= 0)
1456 return slot;
1457 }
1458
1459 /* Still nothing; give up and stop the queue for a short while. */
Chris Metcalf9b4c3412012-07-01 14:43:47 -04001460 netif_stop_subqueue(dev, tx_queue_idx);
1461 tile_net_schedule_tx_wake_timer(dev, tx_queue_idx);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001462 return -1;
1463}
1464
1465/* Determine how many edesc's are needed for TSO.
1466 *
1467 * Sometimes, if "sendfile()" requires copying, we will be called with
1468 * "data" containing the header and payload, with "frags" being empty.
1469 * Sometimes, for example when using NFS over TCP, a single segment can
1470 * span 3 fragments. This requires special care.
1471 */
1472static int tso_count_edescs(struct sk_buff *skb)
1473{
1474 struct skb_shared_info *sh = skb_shinfo(skb);
Chris Metcalf83885462012-07-11 14:08:21 -04001475 unsigned int sh_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001476 unsigned int data_len = skb->len - sh_len;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001477 unsigned int p_len = sh->gso_size;
1478 long f_id = -1; /* id of the current fragment */
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001479 long f_size = skb_headlen(skb) - sh_len; /* current fragment size */
1480 long f_used = 0; /* bytes used from the current fragment */
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001481 long n; /* size of the current piece of payload */
1482 int num_edescs = 0;
1483 int segment;
1484
1485 for (segment = 0; segment < sh->gso_segs; segment++) {
1486
1487 unsigned int p_used = 0;
1488
1489 /* One edesc for header and for each piece of the payload. */
1490 for (num_edescs++; p_used < p_len; num_edescs++) {
1491
1492 /* Advance as needed. */
1493 while (f_used >= f_size) {
1494 f_id++;
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001495 f_size = skb_frag_size(&sh->frags[f_id]);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001496 f_used = 0;
1497 }
1498
1499 /* Use bytes from the current fragment. */
1500 n = p_len - p_used;
1501 if (n > f_size - f_used)
1502 n = f_size - f_used;
1503 f_used += n;
1504 p_used += n;
1505 }
1506
1507 /* The last segment may be less than gso_size. */
1508 data_len -= p_len;
1509 if (data_len < p_len)
1510 p_len = data_len;
1511 }
1512
1513 return num_edescs;
1514}
1515
Chris Metcalf2c7d04a2013-08-01 11:36:42 -04001516/* Prepare modified copies of the skbuff headers. */
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001517static void tso_headers_prepare(struct sk_buff *skb, unsigned char *headers,
1518 s64 slot)
1519{
1520 struct skb_shared_info *sh = skb_shinfo(skb);
1521 struct iphdr *ih;
Chris Metcalf2c7d04a2013-08-01 11:36:42 -04001522 struct ipv6hdr *ih6;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001523 struct tcphdr *th;
Chris Metcalf83885462012-07-11 14:08:21 -04001524 unsigned int sh_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001525 unsigned int data_len = skb->len - sh_len;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001526 unsigned char *data = skb->data;
Chris Metcalf83885462012-07-11 14:08:21 -04001527 unsigned int ih_off, th_off, p_len;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001528 unsigned int isum_seed, tsum_seed, id, seq;
Chris Metcalf2c7d04a2013-08-01 11:36:42 -04001529 int is_ipv6;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001530 long f_id = -1; /* id of the current fragment */
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001531 long f_size = skb_headlen(skb) - sh_len; /* current fragment size */
1532 long f_used = 0; /* bytes used from the current fragment */
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001533 long n; /* size of the current piece of payload */
1534 int segment;
1535
1536 /* Locate original headers and compute various lengths. */
Chris Metcalf2c7d04a2013-08-01 11:36:42 -04001537 is_ipv6 = skb_is_gso_v6(skb);
1538 if (is_ipv6) {
1539 ih6 = ipv6_hdr(skb);
1540 ih_off = skb_network_offset(skb);
1541 } else {
1542 ih = ip_hdr(skb);
1543 ih_off = skb_network_offset(skb);
1544 isum_seed = ((0xFFFF - ih->check) +
1545 (0xFFFF - ih->tot_len) +
1546 (0xFFFF - ih->id));
1547 id = ntohs(ih->id);
1548 }
1549
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001550 th = tcp_hdr(skb);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001551 th_off = skb_transport_offset(skb);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001552 p_len = sh->gso_size;
1553
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001554 tsum_seed = th->check + (0xFFFF ^ htons(skb->len));
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001555 seq = ntohl(th->seq);
1556
1557 /* Prepare all the headers. */
1558 for (segment = 0; segment < sh->gso_segs; segment++) {
1559 unsigned char *buf;
1560 unsigned int p_used = 0;
1561
1562 /* Copy to the header memory for this segment. */
1563 buf = headers + (slot % EQUEUE_ENTRIES) * HEADER_BYTES +
1564 NET_IP_ALIGN;
1565 memcpy(buf, data, sh_len);
1566
1567 /* Update copied ip header. */
Chris Metcalf2c7d04a2013-08-01 11:36:42 -04001568 if (is_ipv6) {
1569 ih6 = (struct ipv6hdr *)(buf + ih_off);
1570 ih6->payload_len = htons(sh_len + p_len - ih_off -
1571 sizeof(*ih6));
1572 } else {
1573 ih = (struct iphdr *)(buf + ih_off);
1574 ih->tot_len = htons(sh_len + p_len - ih_off);
1575 ih->id = htons(id);
1576 ih->check = csum_long(isum_seed + ih->tot_len +
1577 ih->id) ^ 0xffff;
1578 }
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001579
1580 /* Update copied tcp header. */
1581 th = (struct tcphdr *)(buf + th_off);
1582 th->seq = htonl(seq);
1583 th->check = csum_long(tsum_seed + htons(sh_len + p_len));
1584 if (segment != sh->gso_segs - 1) {
1585 th->fin = 0;
1586 th->psh = 0;
1587 }
1588
1589 /* Skip past the header. */
1590 slot++;
1591
1592 /* Skip past the payload. */
1593 while (p_used < p_len) {
1594
1595 /* Advance as needed. */
1596 while (f_used >= f_size) {
1597 f_id++;
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001598 f_size = skb_frag_size(&sh->frags[f_id]);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001599 f_used = 0;
1600 }
1601
1602 /* Use bytes from the current fragment. */
1603 n = p_len - p_used;
1604 if (n > f_size - f_used)
1605 n = f_size - f_used;
1606 f_used += n;
1607 p_used += n;
1608
1609 slot++;
1610 }
1611
1612 id++;
1613 seq += p_len;
1614
1615 /* The last segment may be less than gso_size. */
1616 data_len -= p_len;
1617 if (data_len < p_len)
1618 p_len = data_len;
1619 }
1620
1621 /* Flush the headers so they are ready for hardware DMA. */
1622 wmb();
1623}
1624
1625/* Pass all the data to mpipe for egress. */
1626static void tso_egress(struct net_device *dev, gxio_mpipe_equeue_t *equeue,
1627 struct sk_buff *skb, unsigned char *headers, s64 slot)
1628{
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001629 struct skb_shared_info *sh = skb_shinfo(skb);
Chris Metcalff3286a32013-08-01 11:36:42 -04001630 int instance = mpipe_instance(dev);
1631 struct mpipe_data *md = &mpipe_data[instance];
Chris Metcalf83885462012-07-11 14:08:21 -04001632 unsigned int sh_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001633 unsigned int data_len = skb->len - sh_len;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001634 unsigned int p_len = sh->gso_size;
1635 gxio_mpipe_edesc_t edesc_head = { { 0 } };
1636 gxio_mpipe_edesc_t edesc_body = { { 0 } };
1637 long f_id = -1; /* id of the current fragment */
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001638 long f_size = skb_headlen(skb) - sh_len; /* current fragment size */
1639 long f_used = 0; /* bytes used from the current fragment */
1640 void *f_data = skb->data + sh_len;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001641 long n; /* size of the current piece of payload */
1642 unsigned long tx_packets = 0, tx_bytes = 0;
Chris Metcalf83885462012-07-11 14:08:21 -04001643 unsigned int csum_start;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001644 int segment;
1645
1646 /* Prepare to egress the headers: set up header edesc. */
1647 csum_start = skb_checksum_start_offset(skb);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001648 edesc_head.csum = 1;
1649 edesc_head.csum_start = csum_start;
1650 edesc_head.csum_dest = csum_start + skb->csum_offset;
1651 edesc_head.xfer_size = sh_len;
1652
1653 /* This is only used to specify the TLB. */
Chris Metcalff3286a32013-08-01 11:36:42 -04001654 edesc_head.stack_idx = md->first_buffer_stack;
1655 edesc_body.stack_idx = md->first_buffer_stack;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001656
1657 /* Egress all the edescs. */
1658 for (segment = 0; segment < sh->gso_segs; segment++) {
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001659 unsigned char *buf;
1660 unsigned int p_used = 0;
1661
1662 /* Egress the header. */
1663 buf = headers + (slot % EQUEUE_ENTRIES) * HEADER_BYTES +
1664 NET_IP_ALIGN;
1665 edesc_head.va = va_to_tile_io_addr(buf);
1666 gxio_mpipe_equeue_put_at(equeue, edesc_head, slot);
1667 slot++;
1668
1669 /* Egress the payload. */
1670 while (p_used < p_len) {
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001671 void *va;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001672
1673 /* Advance as needed. */
1674 while (f_used >= f_size) {
1675 f_id++;
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001676 f_size = skb_frag_size(&sh->frags[f_id]);
Chris Metcalf83885462012-07-11 14:08:21 -04001677 f_data = tile_net_frag_buf(&sh->frags[f_id]);
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001678 f_used = 0;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001679 }
1680
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001681 va = f_data + f_used;
1682
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001683 /* Use bytes from the current fragment. */
1684 n = p_len - p_used;
1685 if (n > f_size - f_used)
1686 n = f_size - f_used;
1687 f_used += n;
1688 p_used += n;
1689
1690 /* Egress a piece of the payload. */
Chris Metcalf3da3fff2012-10-25 07:25:20 +00001691 edesc_body.va = va_to_tile_io_addr(va);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001692 edesc_body.xfer_size = n;
1693 edesc_body.bound = !(p_used < p_len);
1694 gxio_mpipe_equeue_put_at(equeue, edesc_body, slot);
1695 slot++;
1696 }
1697
1698 tx_packets++;
1699 tx_bytes += sh_len + p_len;
1700
1701 /* The last segment may be less than gso_size. */
1702 data_len -= p_len;
1703 if (data_len < p_len)
1704 p_len = data_len;
1705 }
1706
1707 /* Update stats. */
Chris Metcalfad018182013-08-01 11:36:42 -04001708 tile_net_stats_add(tx_packets, &dev->stats.tx_packets);
1709 tile_net_stats_add(tx_bytes, &dev->stats.tx_bytes);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001710}
1711
1712/* Do "TSO" handling for egress.
1713 *
1714 * Normally drivers set NETIF_F_TSO only to support hardware TSO;
1715 * otherwise the stack uses scatter-gather to implement GSO in software.
1716 * On our testing, enabling GSO support (via NETIF_F_SG) drops network
1717 * performance down to around 7.5 Gbps on the 10G interfaces, although
1718 * also dropping cpu utilization way down, to under 8%. But
1719 * implementing "TSO" in the driver brings performance back up to line
1720 * rate, while dropping cpu usage even further, to less than 4%. In
1721 * practice, profiling of GSO shows that skb_segment() is what causes
1722 * the performance overheads; we benefit in the driver from using
1723 * preallocated memory to duplicate the TCP/IP headers.
1724 */
1725static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev)
1726{
1727 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
1728 struct tile_net_priv *priv = netdev_priv(dev);
1729 int channel = priv->echannel;
Chris Metcalff3286a32013-08-01 11:36:42 -04001730 int instance = priv->instance;
1731 struct mpipe_data *md = &mpipe_data[instance];
1732 struct tile_net_egress *egress = &md->egress_for_echannel[channel];
1733 struct tile_net_comps *comps =
1734 info->mpipe[instance].comps_for_echannel[channel];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001735 gxio_mpipe_equeue_t *equeue = egress->equeue;
1736 unsigned long irqflags;
1737 int num_edescs;
1738 s64 slot;
1739
1740 /* Determine how many mpipe edesc's are needed. */
1741 num_edescs = tso_count_edescs(skb);
1742
1743 local_irq_save(irqflags);
1744
1745 /* Try to acquire a completion entry and an egress slot. */
Chris Metcalf9b4c3412012-07-01 14:43:47 -04001746 slot = tile_net_equeue_try_reserve(dev, skb->queue_mapping, comps,
1747 equeue, num_edescs);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001748 if (slot < 0) {
1749 local_irq_restore(irqflags);
1750 return NETDEV_TX_BUSY;
1751 }
1752
1753 /* Set up copies of header data properly. */
1754 tso_headers_prepare(skb, egress->headers, slot);
1755
1756 /* Actually pass the data to the network hardware. */
1757 tso_egress(dev, equeue, skb, egress->headers, slot);
1758
1759 /* Add a completion record. */
1760 add_comp(equeue, comps, slot + num_edescs - 1, skb);
1761
1762 local_irq_restore(irqflags);
1763
1764 /* Make sure the egress timer is scheduled. */
1765 tile_net_schedule_egress_timer();
1766
1767 return NETDEV_TX_OK;
1768}
1769
1770/* Analyze the body and frags for a transmit request. */
1771static unsigned int tile_net_tx_frags(struct frag *frags,
1772 struct sk_buff *skb,
1773 void *b_data, unsigned int b_len)
1774{
1775 unsigned int i, n = 0;
1776
1777 struct skb_shared_info *sh = skb_shinfo(skb);
1778
1779 if (b_len != 0) {
1780 frags[n].buf = b_data;
1781 frags[n++].length = b_len;
1782 }
1783
1784 for (i = 0; i < sh->nr_frags; i++) {
1785 skb_frag_t *f = &sh->frags[i];
1786 frags[n].buf = tile_net_frag_buf(f);
1787 frags[n++].length = skb_frag_size(f);
1788 }
1789
1790 return n;
1791}
1792
1793/* Help the kernel transmit a packet. */
1794static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
1795{
1796 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
1797 struct tile_net_priv *priv = netdev_priv(dev);
Chris Metcalff3286a32013-08-01 11:36:42 -04001798 int instance = priv->instance;
1799 struct mpipe_data *md = &mpipe_data[instance];
1800 struct tile_net_egress *egress =
1801 &md->egress_for_echannel[priv->echannel];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001802 gxio_mpipe_equeue_t *equeue = egress->equeue;
1803 struct tile_net_comps *comps =
Chris Metcalff3286a32013-08-01 11:36:42 -04001804 info->mpipe[instance].comps_for_echannel[priv->echannel];
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001805 unsigned int len = skb->len;
1806 unsigned char *data = skb->data;
1807 unsigned int num_edescs;
1808 struct frag frags[MAX_FRAGS];
1809 gxio_mpipe_edesc_t edescs[MAX_FRAGS];
1810 unsigned long irqflags;
1811 gxio_mpipe_edesc_t edesc = { { 0 } };
1812 unsigned int i;
1813 s64 slot;
1814
1815 if (skb_is_gso(skb))
1816 return tile_net_tx_tso(skb, dev);
1817
1818 num_edescs = tile_net_tx_frags(frags, skb, data, skb_headlen(skb));
1819
1820 /* This is only used to specify the TLB. */
Chris Metcalff3286a32013-08-01 11:36:42 -04001821 edesc.stack_idx = md->first_buffer_stack;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001822
1823 /* Prepare the edescs. */
1824 for (i = 0; i < num_edescs; i++) {
1825 edesc.xfer_size = frags[i].length;
1826 edesc.va = va_to_tile_io_addr(frags[i].buf);
1827 edescs[i] = edesc;
1828 }
1829
1830 /* Mark the final edesc. */
1831 edescs[num_edescs - 1].bound = 1;
1832
1833 /* Add checksum info to the initial edesc, if needed. */
1834 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1835 unsigned int csum_start = skb_checksum_start_offset(skb);
1836 edescs[0].csum = 1;
1837 edescs[0].csum_start = csum_start;
1838 edescs[0].csum_dest = csum_start + skb->csum_offset;
1839 }
1840
1841 local_irq_save(irqflags);
1842
1843 /* Try to acquire a completion entry and an egress slot. */
Chris Metcalf9b4c3412012-07-01 14:43:47 -04001844 slot = tile_net_equeue_try_reserve(dev, skb->queue_mapping, comps,
1845 equeue, num_edescs);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001846 if (slot < 0) {
1847 local_irq_restore(irqflags);
1848 return NETDEV_TX_BUSY;
1849 }
1850
1851 for (i = 0; i < num_edescs; i++)
1852 gxio_mpipe_equeue_put_at(equeue, edescs[i], slot++);
1853
1854 /* Add a completion record. */
1855 add_comp(equeue, comps, slot - 1, skb);
1856
1857 /* NOTE: Use ETH_ZLEN for short packets (e.g. 42 < 60). */
Chris Metcalfad018182013-08-01 11:36:42 -04001858 tile_net_stats_add(1, &dev->stats.tx_packets);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001859 tile_net_stats_add(max_t(unsigned int, len, ETH_ZLEN),
Chris Metcalfad018182013-08-01 11:36:42 -04001860 &dev->stats.tx_bytes);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001861
1862 local_irq_restore(irqflags);
1863
1864 /* Make sure the egress timer is scheduled. */
1865 tile_net_schedule_egress_timer();
1866
1867 return NETDEV_TX_OK;
1868}
1869
1870/* Return subqueue id on this core (one per core). */
1871static u16 tile_net_select_queue(struct net_device *dev, struct sk_buff *skb)
1872{
1873 return smp_processor_id();
1874}
1875
1876/* Deal with a transmit timeout. */
1877static void tile_net_tx_timeout(struct net_device *dev)
1878{
1879 int cpu;
1880
1881 for_each_online_cpu(cpu)
1882 netif_wake_subqueue(dev, cpu);
1883}
1884
1885/* Ioctl commands. */
1886static int tile_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
1887{
1888 return -EOPNOTSUPP;
1889}
1890
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001891/* Change the MTU. */
1892static int tile_net_change_mtu(struct net_device *dev, int new_mtu)
1893{
Chris Metcalf2628e8a2013-08-01 11:36:42 -04001894 if (new_mtu < 68)
1895 return -EINVAL;
1896 if (new_mtu > ((jumbo_num != 0) ? 9000 : 1500))
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001897 return -EINVAL;
1898 dev->mtu = new_mtu;
1899 return 0;
1900}
1901
1902/* Change the Ethernet address of the NIC.
1903 *
1904 * The hypervisor driver does not support changing MAC address. However,
1905 * the hardware does not do anything with the MAC address, so the address
1906 * which gets used on outgoing packets, and which is accepted on incoming
1907 * packets, is completely up to us.
1908 *
1909 * Returns 0 on success, negative on failure.
1910 */
1911static int tile_net_set_mac_address(struct net_device *dev, void *p)
1912{
1913 struct sockaddr *addr = p;
1914
1915 if (!is_valid_ether_addr(addr->sa_data))
1916 return -EINVAL;
1917 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1918 return 0;
1919}
1920
1921#ifdef CONFIG_NET_POLL_CONTROLLER
1922/* Polling 'interrupt' - used by things like netconsole to send skbs
1923 * without having to re-enable interrupts. It's not called while
1924 * the interrupt routine is executing.
1925 */
1926static void tile_net_netpoll(struct net_device *dev)
1927{
Chris Metcalff3286a32013-08-01 11:36:42 -04001928 int instance = mpipe_instance(dev);
1929 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
1930 struct mpipe_data *md = &mpipe_data[instance];
1931
1932 disable_percpu_irq(md->ingress_irq);
1933 napi_schedule(&info->mpipe[instance].napi);
1934 enable_percpu_irq(md->ingress_irq, 0);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001935}
1936#endif
1937
1938static const struct net_device_ops tile_net_ops = {
1939 .ndo_open = tile_net_open,
1940 .ndo_stop = tile_net_stop,
1941 .ndo_start_xmit = tile_net_tx,
1942 .ndo_select_queue = tile_net_select_queue,
1943 .ndo_do_ioctl = tile_net_ioctl,
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001944 .ndo_change_mtu = tile_net_change_mtu,
1945 .ndo_tx_timeout = tile_net_tx_timeout,
1946 .ndo_set_mac_address = tile_net_set_mac_address,
1947#ifdef CONFIG_NET_POLL_CONTROLLER
1948 .ndo_poll_controller = tile_net_netpoll,
1949#endif
1950};
1951
1952/* The setup function.
1953 *
1954 * This uses ether_setup() to assign various fields in dev, including
1955 * setting IFF_BROADCAST and IFF_MULTICAST, then sets some extra fields.
1956 */
1957static void tile_net_setup(struct net_device *dev)
1958{
Chris Metcalfa8eaed52013-08-01 11:36:42 -04001959 netdev_features_t features = 0;
1960
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001961 ether_setup(dev);
1962 dev->netdev_ops = &tile_net_ops;
1963 dev->watchdog_timeo = TILE_NET_TIMEOUT;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001964 dev->mtu = 1500;
Chris Metcalfa8eaed52013-08-01 11:36:42 -04001965
1966 features |= NETIF_F_LLTX;
1967 features |= NETIF_F_HW_CSUM;
1968 features |= NETIF_F_SG;
1969 features |= NETIF_F_TSO;
Chris Metcalf2c7d04a2013-08-01 11:36:42 -04001970 features |= NETIF_F_TSO6;
Chris Metcalfa8eaed52013-08-01 11:36:42 -04001971
1972 dev->hw_features |= features;
1973 dev->vlan_features |= features;
1974 dev->features |= features;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00001975}
1976
1977/* Allocate the device structure, register the device, and obtain the
1978 * MAC address from the hypervisor.
1979 */
1980static void tile_net_dev_init(const char *name, const uint8_t *mac)
1981{
1982 int ret;
1983 int i;
1984 int nz_addr = 0;
1985 struct net_device *dev;
1986 struct tile_net_priv *priv;
1987
1988 /* HACK: Ignore "loop" links. */
1989 if (strncmp(name, "loop", 4) == 0)
1990 return;
1991
1992 /* Allocate the device structure. Normally, "name" is a
1993 * template, instantiated by register_netdev(), but not for us.
1994 */
1995 dev = alloc_netdev_mqs(sizeof(*priv), name, tile_net_setup,
1996 NR_CPUS, 1);
1997 if (!dev) {
1998 pr_err("alloc_netdev_mqs(%s) failed\n", name);
1999 return;
2000 }
2001
2002 /* Initialize "priv". */
2003 priv = netdev_priv(dev);
2004 memset(priv, 0, sizeof(*priv));
2005 priv->dev = dev;
2006 priv->channel = -1;
2007 priv->loopify_channel = -1;
2008 priv->echannel = -1;
2009
2010 /* Get the MAC address and set it in the device struct; this must
2011 * be done before the device is opened. If the MAC is all zeroes,
2012 * we use a random address, since we're probably on the simulator.
2013 */
2014 for (i = 0; i < 6; i++)
2015 nz_addr |= mac[i];
2016
2017 if (nz_addr) {
2018 memcpy(dev->dev_addr, mac, 6);
2019 dev->addr_len = 6;
2020 } else {
Chris Metcalfc8ab13f2012-07-18 12:23:06 -04002021 eth_hw_addr_random(dev);
Chris Metcalfe3d62d72012-06-07 10:45:02 +00002022 }
2023
2024 /* Register the network device. */
2025 ret = register_netdev(dev);
2026 if (ret) {
2027 netdev_err(dev, "register_netdev failed %d\n", ret);
2028 free_netdev(dev);
2029 return;
2030 }
2031}
2032
2033/* Per-cpu module initialization. */
2034static void tile_net_init_module_percpu(void *unused)
2035{
2036 struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
2037 int my_cpu = smp_processor_id();
Chris Metcalff3286a32013-08-01 11:36:42 -04002038 int instance;
Chris Metcalfe3d62d72012-06-07 10:45:02 +00002039
Chris Metcalff3286a32013-08-01 11:36:42 -04002040 for (instance = 0; instance < NR_MPIPE_MAX; instance++) {
2041 info->mpipe[instance].has_iqueue = false;
2042 info->mpipe[instance].instance = instance;
2043 }
Chris Metcalfe3d62d72012-06-07 10:45:02 +00002044 info->my_cpu = my_cpu;
2045
2046 /* Initialize the egress timer. */
2047 hrtimer_init(&info->egress_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2048 info->egress_timer.function = tile_net_handle_egress_timer;
2049}
2050
2051/* Module initialization. */
2052static int __init tile_net_init_module(void)
2053{
2054 int i;
2055 char name[GXIO_MPIPE_LINK_NAME_LEN];
2056 uint8_t mac[6];
2057
2058 pr_info("Tilera Network Driver\n");
2059
Chris Metcalff3286a32013-08-01 11:36:42 -04002060 BUILD_BUG_ON(NR_MPIPE_MAX != 2);
2061
Chris Metcalfe3d62d72012-06-07 10:45:02 +00002062 mutex_init(&tile_net_devs_for_channel_mutex);
2063
2064 /* Initialize each CPU. */
2065 on_each_cpu(tile_net_init_module_percpu, NULL, 1);
2066
2067 /* Find out what devices we have, and initialize them. */
2068 for (i = 0; gxio_mpipe_link_enumerate_mac(i, name, mac) >= 0; i++)
2069 tile_net_dev_init(name, mac);
2070
2071 if (!network_cpus_init())
2072 network_cpus_map = *cpu_online_mask;
2073
2074 return 0;
2075}
2076
2077module_init(tile_net_init_module);