blob: 178e2937bbaaa71126a29a763be5eb7fa87e60d6 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 *
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090012 * Fixes:
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090035 * Ulises Alonso : Frame number limit removal and
Linus Torvalds1da177e2005-04-16 15:20:36 -070036 * packet_set_ring memory leak.
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070037 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090040 * byte arrays at the end of sockaddr_ll
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070041 * and packet_mreq.
Johann Baudy69e3c752009-05-18 22:11:22 -070042 * Johann Baudy : Added TX RING.
Linus Torvalds1da177e2005-04-16 15:20:36 -070043 *
44 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License
46 * as published by the Free Software Foundation; either version
47 * 2 of the License, or (at your option) any later version.
48 *
49 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090050
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <linux/mm.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080053#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070054#include <linux/fcntl.h>
55#include <linux/socket.h>
56#include <linux/in.h>
57#include <linux/inet.h>
58#include <linux/netdevice.h>
59#include <linux/if_packet.h>
60#include <linux/wireless.h>
Herbert Xuffbc6112007-02-04 23:33:10 -080061#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070062#include <linux/kmod.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020063#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070064#include <net/ip.h>
65#include <net/protocol.h>
66#include <linux/skbuff.h>
67#include <net/sock.h>
68#include <linux/errno.h>
69#include <linux/timer.h>
70#include <asm/system.h>
71#include <asm/uaccess.h>
72#include <asm/ioctls.h>
73#include <asm/page.h>
Al Viroa1f8e7f72006-10-19 16:08:53 -040074#include <asm/cacheflush.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070075#include <asm/io.h>
76#include <linux/proc_fs.h>
77#include <linux/seq_file.h>
78#include <linux/poll.h>
79#include <linux/module.h>
80#include <linux/init.h>
Herbert Xu905db442009-01-30 14:12:06 -080081#include <linux/mutex.h>
Eric Dumazet05423b22009-10-26 18:40:35 -070082#include <linux/if_vlan.h>
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -080083#include <linux/virtio_net.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85#ifdef CONFIG_INET
86#include <net/inet_common.h>
87#endif
88
Linus Torvalds1da177e2005-04-16 15:20:36 -070089/*
Linus Torvalds1da177e2005-04-16 15:20:36 -070090 Assumptions:
91 - if device has no dev->hard_header routine, it adds and removes ll header
92 inside itself. In this case ll header is invisible outside of device,
93 but higher levels still should reserve dev->hard_header_len.
94 Some devices are enough clever to reallocate skb, when header
95 will not fit to reserved space (tunnel), another ones are silly
96 (PPP).
97 - packet socket receives packets with pulled ll header,
98 so that SOCK_RAW should push it back.
99
100On receive:
101-----------
102
103Incoming, dev->hard_header!=NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700104 mac_header -> ll header
105 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106
107Outgoing, dev->hard_header!=NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700108 mac_header -> ll header
109 data -> ll header
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110
111Incoming, dev->hard_header==NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700112 mac_header -> UNKNOWN position. It is very likely, that it points to ll
113 header. PPP makes it, that is wrong, because introduce
YOSHIFUJI Hideakidb0c58f2007-07-19 10:44:35 +0900114 assymetry between rx and tx paths.
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700115 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116
117Outgoing, dev->hard_header==NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700118 mac_header -> data. ll header is still not built!
119 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120
121Resume
122 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
123
124
125On transmit:
126------------
127
128dev->hard_header != NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700129 mac_header -> ll header
130 data -> ll header
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131
132dev->hard_header == NULL (ll header is added by device, we cannot control it)
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700133 mac_header -> data
134 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135
136 We should set nh.raw on output to correct posistion,
137 packet classifier depends on it.
138 */
139
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140/* Private packet socket structures. */
141
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000142struct packet_mclist {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143 struct packet_mclist *next;
144 int ifindex;
145 int count;
146 unsigned short type;
147 unsigned short alen;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700148 unsigned char addr[MAX_ADDR_LEN];
149};
150/* identical to struct packet_mreq except it has
151 * a longer address field.
152 */
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000153struct packet_mreq_max {
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700154 int mr_ifindex;
155 unsigned short mr_type;
156 unsigned short mr_alen;
157 unsigned char mr_address[MAX_ADDR_LEN];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158};
David S. Millera2efcfa2007-05-29 13:12:50 -0700159
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160#ifdef CONFIG_PACKET_MMAP
Johann Baudy69e3c752009-05-18 22:11:22 -0700161static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
162 int closing, int tx_ring);
163
164struct packet_ring_buffer {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000165 char **pg_vec;
Johann Baudy69e3c752009-05-18 22:11:22 -0700166 unsigned int head;
167 unsigned int frames_per_block;
168 unsigned int frame_size;
169 unsigned int frame_max;
170
171 unsigned int pg_vec_order;
172 unsigned int pg_vec_pages;
173 unsigned int pg_vec_len;
174
175 atomic_t pending;
176};
177
178struct packet_sock;
179static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180#endif
181
182static void packet_flush_mclist(struct sock *sk);
183
184struct packet_sock {
185 /* struct sock has to be the first member of packet_sock */
186 struct sock sk;
187 struct tpacket_stats stats;
188#ifdef CONFIG_PACKET_MMAP
Johann Baudy69e3c752009-05-18 22:11:22 -0700189 struct packet_ring_buffer rx_ring;
190 struct packet_ring_buffer tx_ring;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 int copy_thresh;
192#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 spinlock_t bind_lock;
Herbert Xu905db442009-01-30 14:12:06 -0800194 struct mutex pg_vec_lock;
Herbert Xu8dc41942007-02-04 23:31:32 -0800195 unsigned int running:1, /* prot_hook is attached*/
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700196 auxdata:1,
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -0800197 origdev:1,
198 has_vnet_hdr:1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 int ifindex; /* bound device */
Al Viro0e11c912006-11-08 00:26:29 -0800200 __be16 num;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 struct packet_mclist *mclist;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202#ifdef CONFIG_PACKET_MMAP
203 atomic_t mapped;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700204 enum tpacket_versions tp_version;
205 unsigned int tp_hdrlen;
Patrick McHardy89133362008-07-18 18:05:19 -0700206 unsigned int tp_reserve;
Johann Baudy69e3c752009-05-18 22:11:22 -0700207 unsigned int tp_loss:1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208#endif
Eric Dumazet94b05952009-10-16 04:02:20 +0000209 struct packet_type prot_hook ____cacheline_aligned_in_smp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210};
211
Herbert Xuffbc6112007-02-04 23:33:10 -0800212struct packet_skb_cb {
213 unsigned int origlen;
214 union {
215 struct sockaddr_pkt pkt;
216 struct sockaddr_ll ll;
217 } sa;
218};
219
220#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
Herbert Xu8dc41942007-02-04 23:31:32 -0800221
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222#ifdef CONFIG_PACKET_MMAP
223
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700224static void __packet_set_status(struct packet_sock *po, void *frame, int status)
225{
226 union {
227 struct tpacket_hdr *h1;
228 struct tpacket2_hdr *h2;
229 void *raw;
230 } h;
231
232 h.raw = frame;
233 switch (po->tp_version) {
234 case TPACKET_V1:
235 h.h1->tp_status = status;
Johann Baudy69e3c752009-05-18 22:11:22 -0700236 flush_dcache_page(virt_to_page(&h.h1->tp_status));
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700237 break;
238 case TPACKET_V2:
239 h.h2->tp_status = status;
Johann Baudy69e3c752009-05-18 22:11:22 -0700240 flush_dcache_page(virt_to_page(&h.h2->tp_status));
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700241 break;
Johann Baudy69e3c752009-05-18 22:11:22 -0700242 default:
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000243 pr_err("TPACKET version not supported\n");
Johann Baudy69e3c752009-05-18 22:11:22 -0700244 BUG();
245 }
246
247 smp_wmb();
248}
249
250static int __packet_get_status(struct packet_sock *po, void *frame)
251{
252 union {
253 struct tpacket_hdr *h1;
254 struct tpacket2_hdr *h2;
255 void *raw;
256 } h;
257
258 smp_rmb();
259
260 h.raw = frame;
261 switch (po->tp_version) {
262 case TPACKET_V1:
263 flush_dcache_page(virt_to_page(&h.h1->tp_status));
264 return h.h1->tp_status;
265 case TPACKET_V2:
266 flush_dcache_page(virt_to_page(&h.h2->tp_status));
267 return h.h2->tp_status;
268 default:
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000269 pr_err("TPACKET version not supported\n");
Johann Baudy69e3c752009-05-18 22:11:22 -0700270 BUG();
271 return 0;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700272 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273}
Johann Baudy69e3c752009-05-18 22:11:22 -0700274
275static void *packet_lookup_frame(struct packet_sock *po,
276 struct packet_ring_buffer *rb,
277 unsigned int position,
278 int status)
279{
280 unsigned int pg_vec_pos, frame_offset;
281 union {
282 struct tpacket_hdr *h1;
283 struct tpacket2_hdr *h2;
284 void *raw;
285 } h;
286
287 pg_vec_pos = position / rb->frames_per_block;
288 frame_offset = position % rb->frames_per_block;
289
290 h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size);
291
292 if (status != __packet_get_status(po, h.raw))
293 return NULL;
294
295 return h.raw;
296}
297
298static inline void *packet_current_frame(struct packet_sock *po,
299 struct packet_ring_buffer *rb,
300 int status)
301{
302 return packet_lookup_frame(po, rb, rb->head, status);
303}
304
305static inline void *packet_previous_frame(struct packet_sock *po,
306 struct packet_ring_buffer *rb,
307 int status)
308{
309 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
310 return packet_lookup_frame(po, rb, previous, status);
311}
312
313static inline void packet_increment_head(struct packet_ring_buffer *buff)
314{
315 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
316}
317
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318#endif
319
320static inline struct packet_sock *pkt_sk(struct sock *sk)
321{
322 return (struct packet_sock *)sk;
323}
324
325static void packet_sock_destruct(struct sock *sk)
326{
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700327 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
328 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329
330 if (!sock_flag(sk, SOCK_DEAD)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000331 pr_err("Attempt to release alive packet socket: %p\n", sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332 return;
333 }
334
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -0800335 sk_refcnt_debug_dec(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336}
337
338
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800339static const struct proto_ops packet_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800341static const struct proto_ops packet_ops_spkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000343static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
344 struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345{
346 struct sock *sk;
347 struct sockaddr_pkt *spkt;
348
349 /*
350 * When we registered the protocol we saved the socket in the data
351 * field for just this event.
352 */
353
354 sk = pt->af_packet_priv;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900355
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356 /*
357 * Yank back the headers [hope the device set this
358 * right or kerboom...]
359 *
360 * Incoming packets have ll header pulled,
361 * push it back.
362 *
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700363 * For outgoing ones skb->data == skb_mac_header(skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364 * so that this procedure is noop.
365 */
366
367 if (skb->pkt_type == PACKET_LOOPBACK)
368 goto out;
369
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800370 if (!net_eq(dev_net(dev), sock_net(sk)))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800371 goto out;
372
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000373 skb = skb_share_check(skb, GFP_ATOMIC);
374 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375 goto oom;
376
377 /* drop any routing info */
Eric Dumazetadf30902009-06-02 05:19:30 +0000378 skb_dst_drop(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379
Phil Oester84531c22005-07-12 11:57:52 -0700380 /* drop conntrack reference */
381 nf_reset(skb);
382
Herbert Xuffbc6112007-02-04 23:33:10 -0800383 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700385 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386
387 /*
388 * The SOCK_PACKET socket receives _all_ frames.
389 */
390
391 spkt->spkt_family = dev->type;
392 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
393 spkt->spkt_protocol = skb->protocol;
394
395 /*
396 * Charge the memory to the socket. This is done specifically
397 * to prevent sockets using all the memory up.
398 */
399
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000400 if (sock_queue_rcv_skb(sk, skb) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 return 0;
402
403out:
404 kfree_skb(skb);
405oom:
406 return 0;
407}
408
409
410/*
411 * Output a raw packet to a device layer. This bypasses all the other
412 * protocol layers and you must therefore supply it with a complete frame
413 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900414
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
416 struct msghdr *msg, size_t len)
417{
418 struct sock *sk = sock->sk;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000419 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000420 struct sk_buff *skb = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 struct net_device *dev;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000422 __be16 proto = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423 int err;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900424
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900426 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 */
428
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000429 if (saddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430 if (msg->msg_namelen < sizeof(struct sockaddr))
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000431 return -EINVAL;
432 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
433 proto = saddr->spkt_protocol;
434 } else
435 return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436
437 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900438 * Find the device first to size check it
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 */
440
441 saddr->spkt_device[13] = 0;
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000442retry:
Eric Dumazet654d1f82009-11-02 10:43:32 +0100443 rcu_read_lock();
444 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445 err = -ENODEV;
446 if (dev == NULL)
447 goto out_unlock;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900448
David S. Millerd5e76b02007-01-25 19:30:36 -0800449 err = -ENETDOWN;
450 if (!(dev->flags & IFF_UP))
451 goto out_unlock;
452
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 /*
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000454 * You may not queue a frame bigger than the mtu. This is the lowest level
455 * raw protocol and you must do your own fragmentation at this level.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900457
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458 err = -EMSGSIZE;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -0800459 if (len > dev->mtu + dev->hard_header_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 goto out_unlock;
461
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000462 if (!skb) {
463 size_t reserved = LL_RESERVED_SPACE(dev);
464 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000466 rcu_read_unlock();
467 skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
468 if (skb == NULL)
469 return -ENOBUFS;
470 /* FIXME: Save some space for broken drivers that write a hard
471 * header at transmission time by themselves. PPP is the notable
472 * one here. This should really be fixed at the driver level.
473 */
474 skb_reserve(skb, reserved);
475 skb_reset_network_header(skb);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900476
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000477 /* Try to align data part correctly */
478 if (hhlen) {
479 skb->data -= hhlen;
480 skb->tail -= hhlen;
481 if (len < hhlen)
482 skb_reset_network_header(skb);
483 }
484 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
485 if (err)
486 goto out_free;
487 goto retry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488 }
489
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000490
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491 skb->protocol = proto;
492 skb->dev = dev;
493 skb->priority = sk->sk_priority;
Eric Dumazet2d37a182009-10-01 19:14:46 +0000494 skb->mark = sk->sk_mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495
496 dev_queue_xmit(skb);
Eric Dumazet654d1f82009-11-02 10:43:32 +0100497 rcu_read_unlock();
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000498 return len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500out_unlock:
Eric Dumazet654d1f82009-11-02 10:43:32 +0100501 rcu_read_unlock();
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000502out_free:
503 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 return err;
505}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506
David S. Millerdbcb5852007-01-24 15:21:02 -0800507static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
508 unsigned int res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509{
510 struct sk_filter *filter;
511
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700512 rcu_read_lock_bh();
513 filter = rcu_dereference(sk->sk_filter);
David S. Millerdbcb5852007-01-24 15:21:02 -0800514 if (filter != NULL)
515 res = sk_run_filter(skb, filter->insns, filter->len);
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700516 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517
David S. Millerdbcb5852007-01-24 15:21:02 -0800518 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519}
520
521/*
522 This function makes lazy skb cloning in hope that most of packets
523 are discarded by BPF.
524
525 Note tricky part: we DO mangle shared skb! skb->data, skb->len
526 and skb->cb are mangled. It works because (and until) packets
527 falling here are owned by current CPU. Output packets are cloned
528 by dev_queue_xmit_nit(), input packets are processed by net_bh
529 sequencially, so that if we return skb to original state on exit,
530 we will not harm anyone.
531 */
532
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000533static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
534 struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535{
536 struct sock *sk;
537 struct sockaddr_ll *sll;
538 struct packet_sock *po;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000539 u8 *skb_head = skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800541 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542
543 if (skb->pkt_type == PACKET_LOOPBACK)
544 goto drop;
545
546 sk = pt->af_packet_priv;
547 po = pkt_sk(sk);
548
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800549 if (!net_eq(dev_net(dev), sock_net(sk)))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800550 goto drop;
551
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 skb->dev = dev;
553
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700554 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 /* The device has an explicit notion of ll header,
556 exported to higher levels.
557
558 Otherwise, the device hides datails of it frame
559 structure, so that corresponding packet head
560 never delivered to user.
561 */
562 if (sk->sk_type != SOCK_DGRAM)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700563 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700564 else if (skb->pkt_type == PACKET_OUTGOING) {
565 /* Special case: outgoing packets have ll header at head */
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300566 skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567 }
568 }
569
570 snaplen = skb->len;
571
David S. Millerdbcb5852007-01-24 15:21:02 -0800572 res = run_filter(skb, sk, snaplen);
573 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700574 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800575 if (snaplen > res)
576 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577
578 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
579 (unsigned)sk->sk_rcvbuf)
580 goto drop_n_acct;
581
582 if (skb_shared(skb)) {
583 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
584 if (nskb == NULL)
585 goto drop_n_acct;
586
587 if (skb_head != skb->data) {
588 skb->data = skb_head;
589 skb->len = skb_len;
590 }
591 kfree_skb(skb);
592 skb = nskb;
593 }
594
Herbert Xuffbc6112007-02-04 23:33:10 -0800595 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
596 sizeof(skb->cb));
597
598 sll = &PACKET_SKB_CB(skb)->sa.ll;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 sll->sll_family = AF_PACKET;
600 sll->sll_hatype = dev->type;
601 sll->sll_protocol = skb->protocol;
602 sll->sll_pkttype = skb->pkt_type;
Peter P Waskiewicz Jr8032b462007-11-10 22:03:25 -0800603 if (unlikely(po->origdev))
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700604 sll->sll_ifindex = orig_dev->ifindex;
605 else
606 sll->sll_ifindex = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607
Stephen Hemmingerb95cce32007-09-26 22:13:38 -0700608 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609
Herbert Xuffbc6112007-02-04 23:33:10 -0800610 PACKET_SKB_CB(skb)->origlen = skb->len;
Herbert Xu8dc41942007-02-04 23:31:32 -0800611
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 if (pskb_trim(skb, snaplen))
613 goto drop_n_acct;
614
615 skb_set_owner_r(skb, sk);
616 skb->dev = NULL;
Eric Dumazetadf30902009-06-02 05:19:30 +0000617 skb_dst_drop(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618
Phil Oester84531c22005-07-12 11:57:52 -0700619 /* drop conntrack reference */
620 nf_reset(skb);
621
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622 spin_lock(&sk->sk_receive_queue.lock);
623 po->stats.tp_packets++;
Neil Horman3b885782009-10-12 13:26:31 -0700624 skb->dropcount = atomic_read(&sk->sk_drops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625 __skb_queue_tail(&sk->sk_receive_queue, skb);
626 spin_unlock(&sk->sk_receive_queue.lock);
627 sk->sk_data_ready(sk, skb->len);
628 return 0;
629
630drop_n_acct:
Neil Horman3b885782009-10-12 13:26:31 -0700631 po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632
633drop_n_restore:
634 if (skb_head != skb->data && skb_shared(skb)) {
635 skb->data = skb_head;
636 skb->len = skb_len;
637 }
638drop:
Neil Hormanead2ceb2009-03-11 09:49:55 +0000639 consume_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 return 0;
641}
642
643#ifdef CONFIG_PACKET_MMAP
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000644static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
645 struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646{
647 struct sock *sk;
648 struct packet_sock *po;
649 struct sockaddr_ll *sll;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700650 union {
651 struct tpacket_hdr *h1;
652 struct tpacket2_hdr *h2;
653 void *raw;
654 } h;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000655 u8 *skb_head = skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800657 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700659 unsigned short macoff, netoff, hdrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 struct sk_buff *copy_skb = NULL;
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -0700661 struct timeval tv;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700662 struct timespec ts;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663
664 if (skb->pkt_type == PACKET_LOOPBACK)
665 goto drop;
666
667 sk = pt->af_packet_priv;
668 po = pkt_sk(sk);
669
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800670 if (!net_eq(dev_net(dev), sock_net(sk)))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800671 goto drop;
672
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700673 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 if (sk->sk_type != SOCK_DGRAM)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700675 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676 else if (skb->pkt_type == PACKET_OUTGOING) {
677 /* Special case: outgoing packets have ll header at head */
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300678 skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679 }
680 }
681
Herbert Xu8dc41942007-02-04 23:31:32 -0800682 if (skb->ip_summed == CHECKSUM_PARTIAL)
683 status |= TP_STATUS_CSUMNOTREADY;
684
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 snaplen = skb->len;
686
David S. Millerdbcb5852007-01-24 15:21:02 -0800687 res = run_filter(skb, sk, snaplen);
688 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700689 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800690 if (snaplen > res)
691 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692
693 if (sk->sk_type == SOCK_DGRAM) {
Patrick McHardy89133362008-07-18 18:05:19 -0700694 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
695 po->tp_reserve;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696 } else {
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300697 unsigned maclen = skb_network_offset(skb);
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700698 netoff = TPACKET_ALIGN(po->tp_hdrlen +
Patrick McHardy89133362008-07-18 18:05:19 -0700699 (maclen < 16 ? 16 : maclen)) +
700 po->tp_reserve;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701 macoff = netoff - maclen;
702 }
703
Johann Baudy69e3c752009-05-18 22:11:22 -0700704 if (macoff + snaplen > po->rx_ring.frame_size) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705 if (po->copy_thresh &&
706 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
707 (unsigned)sk->sk_rcvbuf) {
708 if (skb_shared(skb)) {
709 copy_skb = skb_clone(skb, GFP_ATOMIC);
710 } else {
711 copy_skb = skb_get(skb);
712 skb_head = skb->data;
713 }
714 if (copy_skb)
715 skb_set_owner_r(copy_skb, sk);
716 }
Johann Baudy69e3c752009-05-18 22:11:22 -0700717 snaplen = po->rx_ring.frame_size - macoff;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718 if ((int)snaplen < 0)
719 snaplen = 0;
720 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721
722 spin_lock(&sk->sk_receive_queue.lock);
Johann Baudy69e3c752009-05-18 22:11:22 -0700723 h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700724 if (!h.raw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725 goto ring_is_full;
Johann Baudy69e3c752009-05-18 22:11:22 -0700726 packet_increment_head(&po->rx_ring);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727 po->stats.tp_packets++;
728 if (copy_skb) {
729 status |= TP_STATUS_COPY;
730 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
731 }
732 if (!po->stats.tp_drops)
733 status &= ~TP_STATUS_LOSING;
734 spin_unlock(&sk->sk_receive_queue.lock);
735
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700736 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700738 switch (po->tp_version) {
739 case TPACKET_V1:
740 h.h1->tp_len = skb->len;
741 h.h1->tp_snaplen = snaplen;
742 h.h1->tp_mac = macoff;
743 h.h1->tp_net = netoff;
744 if (skb->tstamp.tv64)
745 tv = ktime_to_timeval(skb->tstamp);
746 else
747 do_gettimeofday(&tv);
748 h.h1->tp_sec = tv.tv_sec;
749 h.h1->tp_usec = tv.tv_usec;
750 hdrlen = sizeof(*h.h1);
751 break;
752 case TPACKET_V2:
753 h.h2->tp_len = skb->len;
754 h.h2->tp_snaplen = snaplen;
755 h.h2->tp_mac = macoff;
756 h.h2->tp_net = netoff;
757 if (skb->tstamp.tv64)
758 ts = ktime_to_timespec(skb->tstamp);
759 else
760 getnstimeofday(&ts);
761 h.h2->tp_sec = ts.tv_sec;
762 h.h2->tp_nsec = ts.tv_nsec;
Eric Dumazet05423b22009-10-26 18:40:35 -0700763 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700764 hdrlen = sizeof(*h.h2);
765 break;
766 default:
767 BUG();
768 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700770 sll = h.raw + TPACKET_ALIGN(hdrlen);
Stephen Hemmingerb95cce32007-09-26 22:13:38 -0700771 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 sll->sll_family = AF_PACKET;
773 sll->sll_hatype = dev->type;
774 sll->sll_protocol = skb->protocol;
775 sll->sll_pkttype = skb->pkt_type;
Peter P Waskiewicz Jr8032b462007-11-10 22:03:25 -0800776 if (unlikely(po->origdev))
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700777 sll->sll_ifindex = orig_dev->ifindex;
778 else
779 sll->sll_ifindex = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700781 __packet_set_status(po, h.raw, status);
Ralf Baechlee16aa202006-12-07 00:11:33 -0800782 smp_mb();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783 {
784 struct page *p_start, *p_end;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700785 u8 *h_end = h.raw + macoff + snaplen - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700787 p_start = virt_to_page(h.raw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788 p_end = virt_to_page(h_end);
789 while (p_start <= p_end) {
790 flush_dcache_page(p_start);
791 p_start++;
792 }
793 }
794
795 sk->sk_data_ready(sk, 0);
796
797drop_n_restore:
798 if (skb_head != skb->data && skb_shared(skb)) {
799 skb->data = skb_head;
800 skb->len = skb_len;
801 }
802drop:
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900803 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 return 0;
805
806ring_is_full:
807 po->stats.tp_drops++;
808 spin_unlock(&sk->sk_receive_queue.lock);
809
810 sk->sk_data_ready(sk, 0);
Wei Yongjunacb5d752009-02-25 00:36:42 +0000811 kfree_skb(copy_skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812 goto drop_n_restore;
813}
814
Johann Baudy69e3c752009-05-18 22:11:22 -0700815static void tpacket_destruct_skb(struct sk_buff *skb)
816{
817 struct packet_sock *po = pkt_sk(skb->sk);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000818 void *ph;
Johann Baudy69e3c752009-05-18 22:11:22 -0700819
820 BUG_ON(skb == NULL);
821
822 if (likely(po->tx_ring.pg_vec)) {
823 ph = skb_shinfo(skb)->destructor_arg;
824 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
825 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
826 atomic_dec(&po->tx_ring.pending);
827 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
828 }
829
830 sock_wfree(skb);
831}
832
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000833static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
834 void *frame, struct net_device *dev, int size_max,
835 __be16 proto, unsigned char *addr)
Johann Baudy69e3c752009-05-18 22:11:22 -0700836{
837 union {
838 struct tpacket_hdr *h1;
839 struct tpacket2_hdr *h2;
840 void *raw;
841 } ph;
842 int to_write, offset, len, tp_len, nr_frags, len_max;
843 struct socket *sock = po->sk.sk_socket;
844 struct page *page;
845 void *data;
846 int err;
847
848 ph.raw = frame;
849
850 skb->protocol = proto;
851 skb->dev = dev;
852 skb->priority = po->sk.sk_priority;
Eric Dumazet2d37a182009-10-01 19:14:46 +0000853 skb->mark = po->sk.sk_mark;
Johann Baudy69e3c752009-05-18 22:11:22 -0700854 skb_shinfo(skb)->destructor_arg = ph.raw;
855
856 switch (po->tp_version) {
857 case TPACKET_V2:
858 tp_len = ph.h2->tp_len;
859 break;
860 default:
861 tp_len = ph.h1->tp_len;
862 break;
863 }
864 if (unlikely(tp_len > size_max)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000865 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
Johann Baudy69e3c752009-05-18 22:11:22 -0700866 return -EMSGSIZE;
867 }
868
869 skb_reserve(skb, LL_RESERVED_SPACE(dev));
870 skb_reset_network_header(skb);
871
872 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
873 to_write = tp_len;
874
875 if (sock->type == SOCK_DGRAM) {
876 err = dev_hard_header(skb, dev, ntohs(proto), addr,
877 NULL, tp_len);
878 if (unlikely(err < 0))
879 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000880 } else if (dev->hard_header_len) {
Johann Baudy69e3c752009-05-18 22:11:22 -0700881 /* net device doesn't like empty head */
882 if (unlikely(tp_len <= dev->hard_header_len)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000883 pr_err("packet size is too short (%d < %d)\n",
884 tp_len, dev->hard_header_len);
Johann Baudy69e3c752009-05-18 22:11:22 -0700885 return -EINVAL;
886 }
887
888 skb_push(skb, dev->hard_header_len);
889 err = skb_store_bits(skb, 0, data,
890 dev->hard_header_len);
891 if (unlikely(err))
892 return err;
893
894 data += dev->hard_header_len;
895 to_write -= dev->hard_header_len;
896 }
897
898 err = -EFAULT;
899 page = virt_to_page(data);
900 offset = offset_in_page(data);
901 len_max = PAGE_SIZE - offset;
902 len = ((to_write > len_max) ? len_max : to_write);
903
904 skb->data_len = to_write;
905 skb->len += to_write;
906 skb->truesize += to_write;
907 atomic_add(to_write, &po->sk.sk_wmem_alloc);
908
909 while (likely(to_write)) {
910 nr_frags = skb_shinfo(skb)->nr_frags;
911
912 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000913 pr_err("Packet exceed the number of skb frags(%lu)\n",
914 MAX_SKB_FRAGS);
Johann Baudy69e3c752009-05-18 22:11:22 -0700915 return -EFAULT;
916 }
917
918 flush_dcache_page(page);
919 get_page(page);
920 skb_fill_page_desc(skb,
921 nr_frags,
922 page++, offset, len);
923 to_write -= len;
924 offset = 0;
925 len_max = PAGE_SIZE;
926 len = ((to_write > len_max) ? len_max : to_write);
927 }
928
929 return tp_len;
930}
931
932static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
933{
934 struct socket *sock;
935 struct sk_buff *skb;
936 struct net_device *dev;
937 __be16 proto;
938 int ifindex, err, reserve = 0;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000939 void *ph;
940 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
Johann Baudy69e3c752009-05-18 22:11:22 -0700941 int tp_len, size_max;
942 unsigned char *addr;
943 int len_sum = 0;
944 int status = 0;
945
946 sock = po->sk.sk_socket;
947
948 mutex_lock(&po->pg_vec_lock);
949
950 err = -EBUSY;
951 if (saddr == NULL) {
952 ifindex = po->ifindex;
953 proto = po->num;
954 addr = NULL;
955 } else {
956 err = -EINVAL;
957 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
958 goto out;
959 if (msg->msg_namelen < (saddr->sll_halen
960 + offsetof(struct sockaddr_ll,
961 sll_addr)))
962 goto out;
963 ifindex = saddr->sll_ifindex;
964 proto = saddr->sll_protocol;
965 addr = saddr->sll_addr;
966 }
967
968 dev = dev_get_by_index(sock_net(&po->sk), ifindex);
969 err = -ENXIO;
970 if (unlikely(dev == NULL))
971 goto out;
972
973 reserve = dev->hard_header_len;
974
975 err = -ENETDOWN;
976 if (unlikely(!(dev->flags & IFF_UP)))
977 goto out_put;
978
979 size_max = po->tx_ring.frame_size
Gabor Gombasb5dd8842009-10-29 03:19:11 -0700980 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
Johann Baudy69e3c752009-05-18 22:11:22 -0700981
982 if (size_max > dev->mtu + reserve)
983 size_max = dev->mtu + reserve;
984
985 do {
986 ph = packet_current_frame(po, &po->tx_ring,
987 TP_STATUS_SEND_REQUEST);
988
989 if (unlikely(ph == NULL)) {
990 schedule();
991 continue;
992 }
993
994 status = TP_STATUS_SEND_REQUEST;
995 skb = sock_alloc_send_skb(&po->sk,
996 LL_ALLOCATED_SPACE(dev)
997 + sizeof(struct sockaddr_ll),
998 0, &err);
999
1000 if (unlikely(skb == NULL))
1001 goto out_status;
1002
1003 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1004 addr);
1005
1006 if (unlikely(tp_len < 0)) {
1007 if (po->tp_loss) {
1008 __packet_set_status(po, ph,
1009 TP_STATUS_AVAILABLE);
1010 packet_increment_head(&po->tx_ring);
1011 kfree_skb(skb);
1012 continue;
1013 } else {
1014 status = TP_STATUS_WRONG_FORMAT;
1015 err = tp_len;
1016 goto out_status;
1017 }
1018 }
1019
1020 skb->destructor = tpacket_destruct_skb;
1021 __packet_set_status(po, ph, TP_STATUS_SENDING);
1022 atomic_inc(&po->tx_ring.pending);
1023
1024 status = TP_STATUS_SEND_REQUEST;
1025 err = dev_queue_xmit(skb);
Jarek Poplawskieb70df12010-01-10 22:04:19 +00001026 if (unlikely(err > 0)) {
1027 err = net_xmit_errno(err);
1028 if (err && __packet_get_status(po, ph) ==
1029 TP_STATUS_AVAILABLE) {
1030 /* skb was destructed already */
1031 skb = NULL;
1032 goto out_status;
1033 }
1034 /*
1035 * skb was dropped but not destructed yet;
1036 * let's treat it like congestion or err < 0
1037 */
1038 err = 0;
1039 }
Johann Baudy69e3c752009-05-18 22:11:22 -07001040 packet_increment_head(&po->tx_ring);
1041 len_sum += tp_len;
Joe Perchesf64f9e72009-11-29 16:55:45 -08001042 } while (likely((ph != NULL) ||
1043 ((!(msg->msg_flags & MSG_DONTWAIT)) &&
1044 (atomic_read(&po->tx_ring.pending))))
1045 );
Johann Baudy69e3c752009-05-18 22:11:22 -07001046
1047 err = len_sum;
1048 goto out_put;
1049
Johann Baudy69e3c752009-05-18 22:11:22 -07001050out_status:
1051 __packet_set_status(po, ph, status);
1052 kfree_skb(skb);
1053out_put:
1054 dev_put(dev);
1055out:
1056 mutex_unlock(&po->pg_vec_lock);
1057 return err;
1058}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059#endif
1060
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001061static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
1062 size_t reserve, size_t len,
1063 size_t linear, int noblock,
1064 int *err)
1065{
1066 struct sk_buff *skb;
1067
1068 /* Under a page? Don't bother with paged skb. */
1069 if (prepad + len < PAGE_SIZE || !linear)
1070 linear = len;
1071
1072 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
1073 err);
1074 if (!skb)
1075 return NULL;
1076
1077 skb_reserve(skb, reserve);
1078 skb_put(skb, linear);
1079 skb->data_len = len - linear;
1080 skb->len += len - linear;
1081
1082 return skb;
1083}
1084
Johann Baudy69e3c752009-05-18 22:11:22 -07001085static int packet_snd(struct socket *sock,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086 struct msghdr *msg, size_t len)
1087{
1088 struct sock *sk = sock->sk;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001089 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090 struct sk_buff *skb;
1091 struct net_device *dev;
Al Viro0e11c912006-11-08 00:26:29 -08001092 __be16 proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093 unsigned char *addr;
1094 int ifindex, err, reserve = 0;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001095 struct virtio_net_hdr vnet_hdr = { 0 };
1096 int offset = 0;
1097 int vnet_hdr_len;
1098 struct packet_sock *po = pkt_sk(sk);
1099 unsigned short gso_type = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100
1101 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001102 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001104
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 if (saddr == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 ifindex = po->ifindex;
1107 proto = po->num;
1108 addr = NULL;
1109 } else {
1110 err = -EINVAL;
1111 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1112 goto out;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001113 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1114 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115 ifindex = saddr->sll_ifindex;
1116 proto = saddr->sll_protocol;
1117 addr = saddr->sll_addr;
1118 }
1119
1120
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001121 dev = dev_get_by_index(sock_net(sk), ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122 err = -ENXIO;
1123 if (dev == NULL)
1124 goto out_unlock;
1125 if (sock->type == SOCK_RAW)
1126 reserve = dev->hard_header_len;
1127
David S. Millerd5e76b02007-01-25 19:30:36 -08001128 err = -ENETDOWN;
1129 if (!(dev->flags & IFF_UP))
1130 goto out_unlock;
1131
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001132 if (po->has_vnet_hdr) {
1133 vnet_hdr_len = sizeof(vnet_hdr);
1134
1135 err = -EINVAL;
1136 if (len < vnet_hdr_len)
1137 goto out_unlock;
1138
1139 len -= vnet_hdr_len;
1140
1141 err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
1142 vnet_hdr_len);
1143 if (err < 0)
1144 goto out_unlock;
1145
1146 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1147 (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
1148 vnet_hdr.hdr_len))
1149 vnet_hdr.hdr_len = vnet_hdr.csum_start +
1150 vnet_hdr.csum_offset + 2;
1151
1152 err = -EINVAL;
1153 if (vnet_hdr.hdr_len > len)
1154 goto out_unlock;
1155
1156 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1157 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1158 case VIRTIO_NET_HDR_GSO_TCPV4:
1159 gso_type = SKB_GSO_TCPV4;
1160 break;
1161 case VIRTIO_NET_HDR_GSO_TCPV6:
1162 gso_type = SKB_GSO_TCPV6;
1163 break;
1164 case VIRTIO_NET_HDR_GSO_UDP:
1165 gso_type = SKB_GSO_UDP;
1166 break;
1167 default:
1168 goto out_unlock;
1169 }
1170
1171 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
1172 gso_type |= SKB_GSO_TCP_ECN;
1173
1174 if (vnet_hdr.gso_size == 0)
1175 goto out_unlock;
1176
1177 }
1178 }
1179
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180 err = -EMSGSIZE;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001181 if (!gso_type && (len > dev->mtu+reserve))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182 goto out_unlock;
1183
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001184 err = -ENOBUFS;
1185 skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
1186 LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
1187 msg->msg_flags & MSG_DONTWAIT, &err);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001188 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189 goto out_unlock;
1190
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001191 skb_set_network_header(skb, reserve);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192
Stephen Hemminger0c4e8582007-10-09 01:36:32 -07001193 err = -EINVAL;
1194 if (sock->type == SOCK_DGRAM &&
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001195 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
Stephen Hemminger0c4e8582007-10-09 01:36:32 -07001196 goto out_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197
1198 /* Returns -EFAULT on error */
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001199 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200 if (err)
1201 goto out_free;
1202
1203 skb->protocol = proto;
1204 skb->dev = dev;
1205 skb->priority = sk->sk_priority;
Eric Dumazet2d37a182009-10-01 19:14:46 +00001206 skb->mark = sk->sk_mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001208 if (po->has_vnet_hdr) {
1209 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1210 if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
1211 vnet_hdr.csum_offset)) {
1212 err = -EINVAL;
1213 goto out_free;
1214 }
1215 }
1216
1217 skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
1218 skb_shinfo(skb)->gso_type = gso_type;
1219
1220 /* Header must be checked, and gso_segs computed. */
1221 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1222 skb_shinfo(skb)->gso_segs = 0;
1223
1224 len += vnet_hdr_len;
1225 }
1226
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227 /*
1228 * Now send it
1229 */
1230
1231 err = dev_queue_xmit(skb);
1232 if (err > 0 && (err = net_xmit_errno(err)) != 0)
1233 goto out_unlock;
1234
1235 dev_put(dev);
1236
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001237 return len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238
1239out_free:
1240 kfree_skb(skb);
1241out_unlock:
1242 if (dev)
1243 dev_put(dev);
1244out:
1245 return err;
1246}
1247
Johann Baudy69e3c752009-05-18 22:11:22 -07001248static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1249 struct msghdr *msg, size_t len)
1250{
1251#ifdef CONFIG_PACKET_MMAP
1252 struct sock *sk = sock->sk;
1253 struct packet_sock *po = pkt_sk(sk);
1254 if (po->tx_ring.pg_vec)
1255 return tpacket_snd(po, msg);
1256 else
1257#endif
1258 return packet_snd(sock, msg, len);
1259}
1260
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261/*
1262 * Close a PACKET socket. This is fairly simple. We immediately go
1263 * to 'closed' state and remove our protocol entry in the device list.
1264 */
1265
1266static int packet_release(struct socket *sock)
1267{
1268 struct sock *sk = sock->sk;
1269 struct packet_sock *po;
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08001270 struct net *net;
Johann Baudy69e3c752009-05-18 22:11:22 -07001271#ifdef CONFIG_PACKET_MMAP
1272 struct tpacket_req req;
1273#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001274
1275 if (!sk)
1276 return 0;
1277
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001278 net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279 po = pkt_sk(sk);
1280
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08001281 write_lock_bh(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282 sk_del_node_init(sk);
Eric Dumazet920de802008-11-24 00:09:29 -08001283 sock_prot_inuse_add(net, sk->sk_prot, -1);
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08001284 write_unlock_bh(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285
1286 /*
1287 * Unhook packet receive handler.
1288 */
1289
1290 if (po->running) {
1291 /*
1292 * Remove the protocol hook
1293 */
1294 dev_remove_pack(&po->prot_hook);
1295 po->running = 0;
1296 po->num = 0;
1297 __sock_put(sk);
1298 }
1299
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300 packet_flush_mclist(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301
1302#ifdef CONFIG_PACKET_MMAP
Johann Baudy69e3c752009-05-18 22:11:22 -07001303 memset(&req, 0, sizeof(req));
1304
1305 if (po->rx_ring.pg_vec)
1306 packet_set_ring(sk, &req, 1, 0);
1307
1308 if (po->tx_ring.pg_vec)
1309 packet_set_ring(sk, &req, 1, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310#endif
1311
1312 /*
1313 * Now the socket is dead. No more input will appear.
1314 */
1315
1316 sock_orphan(sk);
1317 sock->sk = NULL;
1318
1319 /* Purge queues */
1320
1321 skb_queue_purge(&sk->sk_receive_queue);
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -08001322 sk_refcnt_debug_release(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001323
1324 sock_put(sk);
1325 return 0;
1326}
1327
1328/*
1329 * Attach a packet hook.
1330 */
1331
Al Viro0e11c912006-11-08 00:26:29 -08001332static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333{
1334 struct packet_sock *po = pkt_sk(sk);
1335 /*
1336 * Detach an existing hook if present.
1337 */
1338
1339 lock_sock(sk);
1340
1341 spin_lock(&po->bind_lock);
1342 if (po->running) {
1343 __sock_put(sk);
1344 po->running = 0;
1345 po->num = 0;
1346 spin_unlock(&po->bind_lock);
1347 dev_remove_pack(&po->prot_hook);
1348 spin_lock(&po->bind_lock);
1349 }
1350
1351 po->num = protocol;
1352 po->prot_hook.type = protocol;
1353 po->prot_hook.dev = dev;
1354
1355 po->ifindex = dev ? dev->ifindex : 0;
1356
1357 if (protocol == 0)
1358 goto out_unlock;
1359
Urs Thuermannbe85d4a2007-11-12 21:05:20 -08001360 if (!dev || (dev->flags & IFF_UP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361 dev_add_pack(&po->prot_hook);
1362 sock_hold(sk);
1363 po->running = 1;
Urs Thuermannbe85d4a2007-11-12 21:05:20 -08001364 } else {
1365 sk->sk_err = ENETDOWN;
1366 if (!sock_flag(sk, SOCK_DEAD))
1367 sk->sk_error_report(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001368 }
1369
1370out_unlock:
1371 spin_unlock(&po->bind_lock);
1372 release_sock(sk);
1373 return 0;
1374}
1375
1376/*
1377 * Bind a packet socket to a device
1378 */
1379
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001380static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1381 int addr_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382{
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001383 struct sock *sk = sock->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384 char name[15];
1385 struct net_device *dev;
1386 int err = -ENODEV;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001387
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388 /*
1389 * Check legality
1390 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001391
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001392 if (addr_len != sizeof(struct sockaddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001394 strlcpy(name, uaddr->sa_data, sizeof(name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001396 dev = dev_get_by_name(sock_net(sk), name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397 if (dev) {
1398 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1399 dev_put(dev);
1400 }
1401 return err;
1402}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403
1404static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1405{
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001406 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1407 struct sock *sk = sock->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408 struct net_device *dev = NULL;
1409 int err;
1410
1411
1412 /*
1413 * Check legality
1414 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001415
Linus Torvalds1da177e2005-04-16 15:20:36 -07001416 if (addr_len < sizeof(struct sockaddr_ll))
1417 return -EINVAL;
1418 if (sll->sll_family != AF_PACKET)
1419 return -EINVAL;
1420
1421 if (sll->sll_ifindex) {
1422 err = -ENODEV;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001423 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424 if (dev == NULL)
1425 goto out;
1426 }
1427 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1428 if (dev)
1429 dev_put(dev);
1430
1431out:
1432 return err;
1433}
1434
1435static struct proto packet_proto = {
1436 .name = "PACKET",
1437 .owner = THIS_MODULE,
1438 .obj_size = sizeof(struct packet_sock),
1439};
1440
1441/*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001442 * Create a packet of type SOCK_PACKET.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 */
1444
Eric Paris3f378b62009-11-05 22:18:14 -08001445static int packet_create(struct net *net, struct socket *sock, int protocol,
1446 int kern)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447{
1448 struct sock *sk;
1449 struct packet_sock *po;
Al Viro0e11c912006-11-08 00:26:29 -08001450 __be16 proto = (__force __be16)protocol; /* weird, but documented */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001451 int err;
1452
1453 if (!capable(CAP_NET_RAW))
1454 return -EPERM;
David S. Millerbe020972007-05-29 13:16:31 -07001455 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1456 sock->type != SOCK_PACKET)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 return -ESOCKTNOSUPPORT;
1458
1459 sock->state = SS_UNCONNECTED;
1460
1461 err = -ENOBUFS;
Pavel Emelyanov6257ff22007-11-01 00:39:31 -07001462 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463 if (sk == NULL)
1464 goto out;
1465
1466 sock->ops = &packet_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001467 if (sock->type == SOCK_PACKET)
1468 sock->ops = &packet_ops_spkt;
David S. Millerbe020972007-05-29 13:16:31 -07001469
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470 sock_init_data(sock, sk);
1471
1472 po = pkt_sk(sk);
1473 sk->sk_family = PF_PACKET;
Al Viro0e11c912006-11-08 00:26:29 -08001474 po->num = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475
1476 sk->sk_destruct = packet_sock_destruct;
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -08001477 sk_refcnt_debug_inc(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478
1479 /*
1480 * Attach a protocol block
1481 */
1482
1483 spin_lock_init(&po->bind_lock);
Herbert Xu905db442009-01-30 14:12:06 -08001484 mutex_init(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 po->prot_hook.func = packet_rcv;
David S. Millerbe020972007-05-29 13:16:31 -07001486
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 if (sock->type == SOCK_PACKET)
1488 po->prot_hook.func = packet_rcv_spkt;
David S. Millerbe020972007-05-29 13:16:31 -07001489
Linus Torvalds1da177e2005-04-16 15:20:36 -07001490 po->prot_hook.af_packet_priv = sk;
1491
Al Viro0e11c912006-11-08 00:26:29 -08001492 if (proto) {
1493 po->prot_hook.type = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494 dev_add_pack(&po->prot_hook);
1495 sock_hold(sk);
1496 po->running = 1;
1497 }
1498
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08001499 write_lock_bh(&net->packet.sklist_lock);
1500 sk_add_node(sk, &net->packet.sklist);
Eric Dumazet36804532008-11-19 14:25:35 -08001501 sock_prot_inuse_add(net, &packet_proto, 1);
Eric Dumazet920de802008-11-24 00:09:29 -08001502 write_unlock_bh(&net->packet.sklist_lock);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001503 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504out:
1505 return err;
1506}
1507
1508/*
1509 * Pull a packet from our receive queue and hand it to the user.
1510 * If necessary we block.
1511 */
1512
1513static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1514 struct msghdr *msg, size_t len, int flags)
1515{
1516 struct sock *sk = sock->sk;
1517 struct sk_buff *skb;
1518 int copied, err;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001519 struct sockaddr_ll *sll;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001520 int vnet_hdr_len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521
1522 err = -EINVAL;
1523 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1524 goto out;
1525
1526#if 0
1527 /* What error should we return now? EUNATTACH? */
1528 if (pkt_sk(sk)->ifindex < 0)
1529 return -ENODEV;
1530#endif
1531
1532 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001533 * Call the generic datagram receiver. This handles all sorts
1534 * of horrible races and re-entrancy so we can forget about it
1535 * in the protocol layers.
1536 *
1537 * Now it will return ENETDOWN, if device have just gone down,
1538 * but then it will block.
1539 */
1540
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001541 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542
1543 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001544 * An error occurred so return it. Because skb_recv_datagram()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545 * handles the blocking we don't see and worry about blocking
1546 * retries.
1547 */
1548
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001549 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550 goto out;
1551
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001552 if (pkt_sk(sk)->has_vnet_hdr) {
1553 struct virtio_net_hdr vnet_hdr = { 0 };
1554
1555 err = -EINVAL;
1556 vnet_hdr_len = sizeof(vnet_hdr);
1557 if ((len -= vnet_hdr_len) < 0)
1558 goto out_free;
1559
1560 if (skb_is_gso(skb)) {
1561 struct skb_shared_info *sinfo = skb_shinfo(skb);
1562
1563 /* This is a hint as to how much should be linear. */
1564 vnet_hdr.hdr_len = skb_headlen(skb);
1565 vnet_hdr.gso_size = sinfo->gso_size;
1566 if (sinfo->gso_type & SKB_GSO_TCPV4)
1567 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1568 else if (sinfo->gso_type & SKB_GSO_TCPV6)
1569 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1570 else if (sinfo->gso_type & SKB_GSO_UDP)
1571 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
1572 else if (sinfo->gso_type & SKB_GSO_FCOE)
1573 goto out_free;
1574 else
1575 BUG();
1576 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
1577 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1578 } else
1579 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
1580
1581 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1582 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1583 vnet_hdr.csum_start = skb->csum_start -
1584 skb_headroom(skb);
1585 vnet_hdr.csum_offset = skb->csum_offset;
1586 } /* else everything is zero */
1587
1588 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
1589 vnet_hdr_len);
1590 if (err < 0)
1591 goto out_free;
1592 }
1593
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 /*
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001595 * If the address length field is there to be filled in, we fill
1596 * it in now.
1597 */
1598
Herbert Xuffbc6112007-02-04 23:33:10 -08001599 sll = &PACKET_SKB_CB(skb)->sa.ll;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001600 if (sock->type == SOCK_PACKET)
1601 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1602 else
1603 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1604
1605 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606 * You lose any data beyond the buffer you gave. If it worries a
1607 * user program they can ask the device for its MTU anyway.
1608 */
1609
1610 copied = skb->len;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001611 if (copied > len) {
1612 copied = len;
1613 msg->msg_flags |= MSG_TRUNC;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001614 }
1615
1616 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1617 if (err)
1618 goto out_free;
1619
Neil Horman3b885782009-10-12 13:26:31 -07001620 sock_recv_ts_and_drops(msg, sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621
1622 if (msg->msg_name)
Herbert Xuffbc6112007-02-04 23:33:10 -08001623 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1624 msg->msg_namelen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625
Herbert Xu8dc41942007-02-04 23:31:32 -08001626 if (pkt_sk(sk)->auxdata) {
Herbert Xuffbc6112007-02-04 23:33:10 -08001627 struct tpacket_auxdata aux;
1628
1629 aux.tp_status = TP_STATUS_USER;
1630 if (skb->ip_summed == CHECKSUM_PARTIAL)
1631 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1632 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1633 aux.tp_snaplen = skb->len;
1634 aux.tp_mac = 0;
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -03001635 aux.tp_net = skb_network_offset(skb);
Eric Dumazet05423b22009-10-26 18:40:35 -07001636 aux.tp_vlan_tci = vlan_tx_tag_get(skb);
Herbert Xuffbc6112007-02-04 23:33:10 -08001637
1638 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
Herbert Xu8dc41942007-02-04 23:31:32 -08001639 }
1640
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641 /*
1642 * Free or return the buffer as appropriate. Again this
1643 * hides all the races and re-entrancy issues from us.
1644 */
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001645 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646
1647out_free:
1648 skb_free_datagram(sk, skb);
1649out:
1650 return err;
1651}
1652
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1654 int *uaddr_len, int peer)
1655{
1656 struct net_device *dev;
1657 struct sock *sk = sock->sk;
1658
1659 if (peer)
1660 return -EOPNOTSUPP;
1661
1662 uaddr->sa_family = AF_PACKET;
Eric Dumazet654d1f82009-11-02 10:43:32 +01001663 rcu_read_lock();
1664 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1665 if (dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666 strlcpy(uaddr->sa_data, dev->name, 15);
Eric Dumazet654d1f82009-11-02 10:43:32 +01001667 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 memset(uaddr->sa_data, 0, 14);
Eric Dumazet654d1f82009-11-02 10:43:32 +01001669 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670 *uaddr_len = sizeof(*uaddr);
1671
1672 return 0;
1673}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674
1675static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1676 int *uaddr_len, int peer)
1677{
1678 struct net_device *dev;
1679 struct sock *sk = sock->sk;
1680 struct packet_sock *po = pkt_sk(sk);
Cyrill Gorcunov13cfa972009-11-08 05:51:19 +00001681 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682
1683 if (peer)
1684 return -EOPNOTSUPP;
1685
1686 sll->sll_family = AF_PACKET;
1687 sll->sll_ifindex = po->ifindex;
1688 sll->sll_protocol = po->num;
Eric Dumazet654d1f82009-11-02 10:43:32 +01001689 rcu_read_lock();
1690 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 if (dev) {
1692 sll->sll_hatype = dev->type;
1693 sll->sll_halen = dev->addr_len;
1694 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 } else {
1696 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1697 sll->sll_halen = 0;
1698 }
Eric Dumazet654d1f82009-11-02 10:43:32 +01001699 rcu_read_unlock();
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001700 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701
1702 return 0;
1703}
1704
Wang Chen2aeb0b82008-07-14 20:49:46 -07001705static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1706 int what)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707{
1708 switch (i->type) {
1709 case PACKET_MR_MULTICAST:
1710 if (what > 0)
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001711 return dev_mc_add(dev, i->addr, i->alen, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712 else
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001713 return dev_mc_delete(dev, i->addr, i->alen, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714 break;
1715 case PACKET_MR_PROMISC:
Wang Chen2aeb0b82008-07-14 20:49:46 -07001716 return dev_set_promiscuity(dev, what);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717 break;
1718 case PACKET_MR_ALLMULTI:
Wang Chen2aeb0b82008-07-14 20:49:46 -07001719 return dev_set_allmulti(dev, what);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720 break;
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001721 case PACKET_MR_UNICAST:
1722 if (what > 0)
Jiri Pirkoccffad22009-05-22 23:22:17 +00001723 return dev_unicast_add(dev, i->addr);
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001724 else
Jiri Pirkoccffad22009-05-22 23:22:17 +00001725 return dev_unicast_delete(dev, i->addr);
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001726 break;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001727 default:
1728 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001729 }
Wang Chen2aeb0b82008-07-14 20:49:46 -07001730 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731}
1732
1733static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1734{
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001735 for ( ; i; i = i->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736 if (i->ifindex == dev->ifindex)
1737 packet_dev_mc(dev, i, what);
1738 }
1739}
1740
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001741static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742{
1743 struct packet_sock *po = pkt_sk(sk);
1744 struct packet_mclist *ml, *i;
1745 struct net_device *dev;
1746 int err;
1747
1748 rtnl_lock();
1749
1750 err = -ENODEV;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001751 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001752 if (!dev)
1753 goto done;
1754
1755 err = -EINVAL;
1756 if (mreq->mr_alen > dev->addr_len)
1757 goto done;
1758
1759 err = -ENOBUFS;
Kris Katterjohn8b3a7002006-01-11 15:56:43 -08001760 i = kmalloc(sizeof(*i), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761 if (i == NULL)
1762 goto done;
1763
1764 err = 0;
1765 for (ml = po->mclist; ml; ml = ml->next) {
1766 if (ml->ifindex == mreq->mr_ifindex &&
1767 ml->type == mreq->mr_type &&
1768 ml->alen == mreq->mr_alen &&
1769 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1770 ml->count++;
1771 /* Free the new element ... */
1772 kfree(i);
1773 goto done;
1774 }
1775 }
1776
1777 i->type = mreq->mr_type;
1778 i->ifindex = mreq->mr_ifindex;
1779 i->alen = mreq->mr_alen;
1780 memcpy(i->addr, mreq->mr_address, i->alen);
1781 i->count = 1;
1782 i->next = po->mclist;
1783 po->mclist = i;
Wang Chen2aeb0b82008-07-14 20:49:46 -07001784 err = packet_dev_mc(dev, i, 1);
1785 if (err) {
1786 po->mclist = i->next;
1787 kfree(i);
1788 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789
1790done:
1791 rtnl_unlock();
1792 return err;
1793}
1794
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001795static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001796{
1797 struct packet_mclist *ml, **mlp;
1798
1799 rtnl_lock();
1800
1801 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1802 if (ml->ifindex == mreq->mr_ifindex &&
1803 ml->type == mreq->mr_type &&
1804 ml->alen == mreq->mr_alen &&
1805 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1806 if (--ml->count == 0) {
1807 struct net_device *dev;
1808 *mlp = ml->next;
Eric Dumazetad959e72009-10-16 06:38:46 +00001809 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1810 if (dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811 packet_dev_mc(dev, ml, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001812 kfree(ml);
1813 }
1814 rtnl_unlock();
1815 return 0;
1816 }
1817 }
1818 rtnl_unlock();
1819 return -EADDRNOTAVAIL;
1820}
1821
1822static void packet_flush_mclist(struct sock *sk)
1823{
1824 struct packet_sock *po = pkt_sk(sk);
1825 struct packet_mclist *ml;
1826
1827 if (!po->mclist)
1828 return;
1829
1830 rtnl_lock();
1831 while ((ml = po->mclist) != NULL) {
1832 struct net_device *dev;
1833
1834 po->mclist = ml->next;
Eric Dumazetad959e72009-10-16 06:38:46 +00001835 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1836 if (dev != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001837 packet_dev_mc(dev, ml, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001838 kfree(ml);
1839 }
1840 rtnl_unlock();
1841}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001842
1843static int
David S. Millerb7058842009-09-30 16:12:20 -07001844packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001845{
1846 struct sock *sk = sock->sk;
Herbert Xu8dc41942007-02-04 23:31:32 -08001847 struct packet_sock *po = pkt_sk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001848 int ret;
1849
1850 if (level != SOL_PACKET)
1851 return -ENOPROTOOPT;
1852
Johann Baudy69e3c752009-05-18 22:11:22 -07001853 switch (optname) {
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001854 case PACKET_ADD_MEMBERSHIP:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001855 case PACKET_DROP_MEMBERSHIP:
1856 {
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001857 struct packet_mreq_max mreq;
1858 int len = optlen;
1859 memset(&mreq, 0, sizeof(mreq));
1860 if (len < sizeof(struct packet_mreq))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861 return -EINVAL;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001862 if (len > sizeof(mreq))
1863 len = sizeof(mreq);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001864 if (copy_from_user(&mreq, optval, len))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001865 return -EFAULT;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001866 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1867 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001868 if (optname == PACKET_ADD_MEMBERSHIP)
1869 ret = packet_mc_add(sk, &mreq);
1870 else
1871 ret = packet_mc_drop(sk, &mreq);
1872 return ret;
1873 }
David S. Millera2efcfa2007-05-29 13:12:50 -07001874
Linus Torvalds1da177e2005-04-16 15:20:36 -07001875#ifdef CONFIG_PACKET_MMAP
1876 case PACKET_RX_RING:
Johann Baudy69e3c752009-05-18 22:11:22 -07001877 case PACKET_TX_RING:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001878 {
1879 struct tpacket_req req;
1880
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001881 if (optlen < sizeof(req))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882 return -EINVAL;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001883 if (pkt_sk(sk)->has_vnet_hdr)
1884 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001885 if (copy_from_user(&req, optval, sizeof(req)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001886 return -EFAULT;
Johann Baudy69e3c752009-05-18 22:11:22 -07001887 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888 }
1889 case PACKET_COPY_THRESH:
1890 {
1891 int val;
1892
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001893 if (optlen != sizeof(val))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001895 if (copy_from_user(&val, optval, sizeof(val)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001896 return -EFAULT;
1897
1898 pkt_sk(sk)->copy_thresh = val;
1899 return 0;
1900 }
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001901 case PACKET_VERSION:
1902 {
1903 int val;
1904
1905 if (optlen != sizeof(val))
1906 return -EINVAL;
Johann Baudy69e3c752009-05-18 22:11:22 -07001907 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001908 return -EBUSY;
1909 if (copy_from_user(&val, optval, sizeof(val)))
1910 return -EFAULT;
1911 switch (val) {
1912 case TPACKET_V1:
1913 case TPACKET_V2:
1914 po->tp_version = val;
1915 return 0;
1916 default:
1917 return -EINVAL;
1918 }
1919 }
Patrick McHardy89133362008-07-18 18:05:19 -07001920 case PACKET_RESERVE:
1921 {
1922 unsigned int val;
1923
1924 if (optlen != sizeof(val))
1925 return -EINVAL;
Johann Baudy69e3c752009-05-18 22:11:22 -07001926 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
Patrick McHardy89133362008-07-18 18:05:19 -07001927 return -EBUSY;
1928 if (copy_from_user(&val, optval, sizeof(val)))
1929 return -EFAULT;
1930 po->tp_reserve = val;
1931 return 0;
1932 }
Johann Baudy69e3c752009-05-18 22:11:22 -07001933 case PACKET_LOSS:
1934 {
1935 unsigned int val;
1936
1937 if (optlen != sizeof(val))
1938 return -EINVAL;
1939 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1940 return -EBUSY;
1941 if (copy_from_user(&val, optval, sizeof(val)))
1942 return -EFAULT;
1943 po->tp_loss = !!val;
1944 return 0;
1945 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001946#endif
Herbert Xu8dc41942007-02-04 23:31:32 -08001947 case PACKET_AUXDATA:
1948 {
1949 int val;
1950
1951 if (optlen < sizeof(val))
1952 return -EINVAL;
1953 if (copy_from_user(&val, optval, sizeof(val)))
1954 return -EFAULT;
1955
1956 po->auxdata = !!val;
1957 return 0;
1958 }
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -07001959 case PACKET_ORIGDEV:
1960 {
1961 int val;
1962
1963 if (optlen < sizeof(val))
1964 return -EINVAL;
1965 if (copy_from_user(&val, optval, sizeof(val)))
1966 return -EFAULT;
1967
1968 po->origdev = !!val;
1969 return 0;
1970 }
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001971 case PACKET_VNET_HDR:
1972 {
1973 int val;
1974
1975 if (sock->type != SOCK_RAW)
1976 return -EINVAL;
1977 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1978 return -EBUSY;
1979 if (optlen < sizeof(val))
1980 return -EINVAL;
1981 if (copy_from_user(&val, optval, sizeof(val)))
1982 return -EFAULT;
1983
1984 po->has_vnet_hdr = !!val;
1985 return 0;
1986 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001987 default:
1988 return -ENOPROTOOPT;
1989 }
1990}
1991
1992static int packet_getsockopt(struct socket *sock, int level, int optname,
1993 char __user *optval, int __user *optlen)
1994{
1995 int len;
Herbert Xu8dc41942007-02-04 23:31:32 -08001996 int val;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997 struct sock *sk = sock->sk;
1998 struct packet_sock *po = pkt_sk(sk);
Herbert Xu8dc41942007-02-04 23:31:32 -08001999 void *data;
2000 struct tpacket_stats st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002001
2002 if (level != SOL_PACKET)
2003 return -ENOPROTOOPT;
2004
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08002005 if (get_user(len, optlen))
2006 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002007
2008 if (len < 0)
2009 return -EINVAL;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002010
Johann Baudy69e3c752009-05-18 22:11:22 -07002011 switch (optname) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002012 case PACKET_STATISTICS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013 if (len > sizeof(struct tpacket_stats))
2014 len = sizeof(struct tpacket_stats);
2015 spin_lock_bh(&sk->sk_receive_queue.lock);
2016 st = po->stats;
2017 memset(&po->stats, 0, sizeof(st));
2018 spin_unlock_bh(&sk->sk_receive_queue.lock);
2019 st.tp_packets += st.tp_drops;
2020
Herbert Xu8dc41942007-02-04 23:31:32 -08002021 data = &st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002022 break;
Herbert Xu8dc41942007-02-04 23:31:32 -08002023 case PACKET_AUXDATA:
2024 if (len > sizeof(int))
2025 len = sizeof(int);
2026 val = po->auxdata;
2027
2028 data = &val;
2029 break;
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -07002030 case PACKET_ORIGDEV:
2031 if (len > sizeof(int))
2032 len = sizeof(int);
2033 val = po->origdev;
2034
2035 data = &val;
2036 break;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08002037 case PACKET_VNET_HDR:
2038 if (len > sizeof(int))
2039 len = sizeof(int);
2040 val = po->has_vnet_hdr;
2041
2042 data = &val;
2043 break;
Patrick McHardybbd6ef82008-07-14 22:50:15 -07002044#ifdef CONFIG_PACKET_MMAP
2045 case PACKET_VERSION:
2046 if (len > sizeof(int))
2047 len = sizeof(int);
2048 val = po->tp_version;
2049 data = &val;
2050 break;
2051 case PACKET_HDRLEN:
2052 if (len > sizeof(int))
2053 len = sizeof(int);
2054 if (copy_from_user(&val, optval, len))
2055 return -EFAULT;
2056 switch (val) {
2057 case TPACKET_V1:
2058 val = sizeof(struct tpacket_hdr);
2059 break;
2060 case TPACKET_V2:
2061 val = sizeof(struct tpacket2_hdr);
2062 break;
2063 default:
2064 return -EINVAL;
2065 }
2066 data = &val;
2067 break;
Patrick McHardy89133362008-07-18 18:05:19 -07002068 case PACKET_RESERVE:
2069 if (len > sizeof(unsigned int))
2070 len = sizeof(unsigned int);
2071 val = po->tp_reserve;
2072 data = &val;
2073 break;
Johann Baudy69e3c752009-05-18 22:11:22 -07002074 case PACKET_LOSS:
2075 if (len > sizeof(unsigned int))
2076 len = sizeof(unsigned int);
2077 val = po->tp_loss;
2078 data = &val;
2079 break;
Patrick McHardybbd6ef82008-07-14 22:50:15 -07002080#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002081 default:
2082 return -ENOPROTOOPT;
2083 }
2084
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08002085 if (put_user(len, optlen))
2086 return -EFAULT;
Herbert Xu8dc41942007-02-04 23:31:32 -08002087 if (copy_to_user(optval, data, len))
2088 return -EFAULT;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08002089 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002090}
2091
2092
2093static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
2094{
2095 struct sock *sk;
2096 struct hlist_node *node;
Jason Lunzad930652007-02-20 23:19:54 -08002097 struct net_device *dev = data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002098 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002100 read_lock(&net->packet.sklist_lock);
2101 sk_for_each(sk, node, &net->packet.sklist) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002102 struct packet_sock *po = pkt_sk(sk);
2103
2104 switch (msg) {
2105 case NETDEV_UNREGISTER:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106 if (po->mclist)
2107 packet_dev_mclist(dev, po->mclist, -1);
David S. Millera2efcfa2007-05-29 13:12:50 -07002108 /* fallthrough */
2109
Linus Torvalds1da177e2005-04-16 15:20:36 -07002110 case NETDEV_DOWN:
2111 if (dev->ifindex == po->ifindex) {
2112 spin_lock(&po->bind_lock);
2113 if (po->running) {
2114 __dev_remove_pack(&po->prot_hook);
2115 __sock_put(sk);
2116 po->running = 0;
2117 sk->sk_err = ENETDOWN;
2118 if (!sock_flag(sk, SOCK_DEAD))
2119 sk->sk_error_report(sk);
2120 }
2121 if (msg == NETDEV_UNREGISTER) {
2122 po->ifindex = -1;
2123 po->prot_hook.dev = NULL;
2124 }
2125 spin_unlock(&po->bind_lock);
2126 }
2127 break;
2128 case NETDEV_UP:
2129 spin_lock(&po->bind_lock);
2130 if (dev->ifindex == po->ifindex && po->num &&
2131 !po->running) {
2132 dev_add_pack(&po->prot_hook);
2133 sock_hold(sk);
2134 po->running = 1;
2135 }
2136 spin_unlock(&po->bind_lock);
2137 break;
2138 }
2139 }
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002140 read_unlock(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002141 return NOTIFY_DONE;
2142}
2143
2144
2145static int packet_ioctl(struct socket *sock, unsigned int cmd,
2146 unsigned long arg)
2147{
2148 struct sock *sk = sock->sk;
2149
Johann Baudy69e3c752009-05-18 22:11:22 -07002150 switch (cmd) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002151 case SIOCOUTQ:
2152 {
2153 int amount = sk_wmem_alloc_get(sk);
Eric Dumazet31e6d362009-06-17 19:05:41 -07002154
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002155 return put_user(amount, (int __user *)arg);
2156 }
2157 case SIOCINQ:
2158 {
2159 struct sk_buff *skb;
2160 int amount = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002161
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002162 spin_lock_bh(&sk->sk_receive_queue.lock);
2163 skb = skb_peek(&sk->sk_receive_queue);
2164 if (skb)
2165 amount = skb->len;
2166 spin_unlock_bh(&sk->sk_receive_queue.lock);
2167 return put_user(amount, (int __user *)arg);
2168 }
2169 case SIOCGSTAMP:
2170 return sock_get_timestamp(sk, (struct timeval __user *)arg);
2171 case SIOCGSTAMPNS:
2172 return sock_get_timestampns(sk, (struct timespec __user *)arg);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002173
Linus Torvalds1da177e2005-04-16 15:20:36 -07002174#ifdef CONFIG_INET
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002175 case SIOCADDRT:
2176 case SIOCDELRT:
2177 case SIOCDARP:
2178 case SIOCGARP:
2179 case SIOCSARP:
2180 case SIOCGIFADDR:
2181 case SIOCSIFADDR:
2182 case SIOCGIFBRDADDR:
2183 case SIOCSIFBRDADDR:
2184 case SIOCGIFNETMASK:
2185 case SIOCSIFNETMASK:
2186 case SIOCGIFDSTADDR:
2187 case SIOCSIFDSTADDR:
2188 case SIOCSIFFLAGS:
2189 if (!net_eq(sock_net(sk), &init_net))
2190 return -ENOIOCTLCMD;
2191 return inet_dgram_ops.ioctl(sock, cmd, arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192#endif
2193
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002194 default:
2195 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002196 }
2197 return 0;
2198}
2199
2200#ifndef CONFIG_PACKET_MMAP
2201#define packet_mmap sock_no_mmap
2202#define packet_poll datagram_poll
2203#else
2204
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002205static unsigned int packet_poll(struct file *file, struct socket *sock,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002206 poll_table *wait)
2207{
2208 struct sock *sk = sock->sk;
2209 struct packet_sock *po = pkt_sk(sk);
2210 unsigned int mask = datagram_poll(file, sock, wait);
2211
2212 spin_lock_bh(&sk->sk_receive_queue.lock);
Johann Baudy69e3c752009-05-18 22:11:22 -07002213 if (po->rx_ring.pg_vec) {
2214 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215 mask |= POLLIN | POLLRDNORM;
2216 }
2217 spin_unlock_bh(&sk->sk_receive_queue.lock);
Johann Baudy69e3c752009-05-18 22:11:22 -07002218 spin_lock_bh(&sk->sk_write_queue.lock);
2219 if (po->tx_ring.pg_vec) {
2220 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2221 mask |= POLLOUT | POLLWRNORM;
2222 }
2223 spin_unlock_bh(&sk->sk_write_queue.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002224 return mask;
2225}
2226
2227
2228/* Dirty? Well, I still did not learn better way to account
2229 * for user mmaps.
2230 */
2231
2232static void packet_mm_open(struct vm_area_struct *vma)
2233{
2234 struct file *file = vma->vm_file;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002235 struct socket *sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002236 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002237
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238 if (sk)
2239 atomic_inc(&pkt_sk(sk)->mapped);
2240}
2241
2242static void packet_mm_close(struct vm_area_struct *vma)
2243{
2244 struct file *file = vma->vm_file;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002245 struct socket *sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002246 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002247
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248 if (sk)
2249 atomic_dec(&pkt_sk(sk)->mapped);
2250}
2251
Alexey Dobriyanf0f37e22009-09-27 22:29:37 +04002252static const struct vm_operations_struct packet_mmap_ops = {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002253 .open = packet_mm_open,
2254 .close = packet_mm_close,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002255};
2256
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002257static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002258{
2259 int i;
2260
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002261 for (i = 0; i < len; i++) {
2262 if (likely(pg_vec[i]))
2263 free_pages((unsigned long) pg_vec[i], order);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264 }
2265 kfree(pg_vec);
2266}
2267
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002268static inline char *alloc_one_pg_vec_page(unsigned long order)
2269{
Eric Dumazet719bfea2009-04-15 03:39:52 -07002270 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN;
2271
2272 return (char *) __get_free_pages(gfp_flags, order);
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002273}
2274
2275static char **alloc_pg_vec(struct tpacket_req *req, int order)
2276{
2277 unsigned int block_nr = req->tp_block_nr;
2278 char **pg_vec;
2279 int i;
2280
2281 pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
2282 if (unlikely(!pg_vec))
2283 goto out;
2284
2285 for (i = 0; i < block_nr; i++) {
2286 pg_vec[i] = alloc_one_pg_vec_page(order);
2287 if (unlikely(!pg_vec[i]))
2288 goto out_free_pgvec;
2289 }
2290
2291out:
2292 return pg_vec;
2293
2294out_free_pgvec:
2295 free_pg_vec(pg_vec, order, block_nr);
2296 pg_vec = NULL;
2297 goto out;
2298}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299
Johann Baudy69e3c752009-05-18 22:11:22 -07002300static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2301 int closing, int tx_ring)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302{
2303 char **pg_vec = NULL;
2304 struct packet_sock *po = pkt_sk(sk);
Al Viro0e11c912006-11-08 00:26:29 -08002305 int was_running, order = 0;
Johann Baudy69e3c752009-05-18 22:11:22 -07002306 struct packet_ring_buffer *rb;
2307 struct sk_buff_head *rb_queue;
Al Viro0e11c912006-11-08 00:26:29 -08002308 __be16 num;
Johann Baudy69e3c752009-05-18 22:11:22 -07002309 int err;
2310
2311 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2312 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
2313
2314 err = -EBUSY;
2315 if (!closing) {
2316 if (atomic_read(&po->mapped))
2317 goto out;
2318 if (atomic_read(&rb->pending))
2319 goto out;
2320 }
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002321
Linus Torvalds1da177e2005-04-16 15:20:36 -07002322 if (req->tp_block_nr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002323 /* Sanity tests and some calculations */
Johann Baudy69e3c752009-05-18 22:11:22 -07002324 err = -EBUSY;
2325 if (unlikely(rb->pg_vec))
2326 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002327
Patrick McHardybbd6ef82008-07-14 22:50:15 -07002328 switch (po->tp_version) {
2329 case TPACKET_V1:
2330 po->tp_hdrlen = TPACKET_HDRLEN;
2331 break;
2332 case TPACKET_V2:
2333 po->tp_hdrlen = TPACKET2_HDRLEN;
2334 break;
2335 }
2336
Johann Baudy69e3c752009-05-18 22:11:22 -07002337 err = -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002338 if (unlikely((int)req->tp_block_size <= 0))
Johann Baudy69e3c752009-05-18 22:11:22 -07002339 goto out;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002340 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
Johann Baudy69e3c752009-05-18 22:11:22 -07002341 goto out;
Patrick McHardy89133362008-07-18 18:05:19 -07002342 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
Johann Baudy69e3c752009-05-18 22:11:22 -07002343 po->tp_reserve))
2344 goto out;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002345 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
Johann Baudy69e3c752009-05-18 22:11:22 -07002346 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002347
Johann Baudy69e3c752009-05-18 22:11:22 -07002348 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2349 if (unlikely(rb->frames_per_block <= 0))
2350 goto out;
2351 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2352 req->tp_frame_nr))
2353 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002354
2355 err = -ENOMEM;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002356 order = get_order(req->tp_block_size);
2357 pg_vec = alloc_pg_vec(req, order);
2358 if (unlikely(!pg_vec))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002359 goto out;
Johann Baudy69e3c752009-05-18 22:11:22 -07002360 }
2361 /* Done */
2362 else {
2363 err = -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002364 if (unlikely(req->tp_frame_nr))
Johann Baudy69e3c752009-05-18 22:11:22 -07002365 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366 }
2367
2368 lock_sock(sk);
2369
2370 /* Detach socket from network */
2371 spin_lock(&po->bind_lock);
2372 was_running = po->running;
2373 num = po->num;
2374 if (was_running) {
2375 __dev_remove_pack(&po->prot_hook);
2376 po->num = 0;
2377 po->running = 0;
2378 __sock_put(sk);
2379 }
2380 spin_unlock(&po->bind_lock);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002381
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382 synchronize_net();
2383
2384 err = -EBUSY;
Herbert Xu905db442009-01-30 14:12:06 -08002385 mutex_lock(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002386 if (closing || atomic_read(&po->mapped) == 0) {
2387 err = 0;
2388#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
Johann Baudy69e3c752009-05-18 22:11:22 -07002389 spin_lock_bh(&rb_queue->lock);
2390 pg_vec = XC(rb->pg_vec, pg_vec);
2391 rb->frame_max = (req->tp_frame_nr - 1);
2392 rb->head = 0;
2393 rb->frame_size = req->tp_frame_size;
2394 spin_unlock_bh(&rb_queue->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002395
Johann Baudy69e3c752009-05-18 22:11:22 -07002396 order = XC(rb->pg_vec_order, order);
2397 req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398
Johann Baudy69e3c752009-05-18 22:11:22 -07002399 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2400 po->prot_hook.func = (po->rx_ring.pg_vec) ?
2401 tpacket_rcv : packet_rcv;
2402 skb_queue_purge(rb_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002403#undef XC
2404 if (atomic_read(&po->mapped))
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002405 pr_err("packet_mmap: vma is busy: %d\n",
2406 atomic_read(&po->mapped));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407 }
Herbert Xu905db442009-01-30 14:12:06 -08002408 mutex_unlock(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002409
2410 spin_lock(&po->bind_lock);
2411 if (was_running && !po->running) {
2412 sock_hold(sk);
2413 po->running = 1;
2414 po->num = num;
2415 dev_add_pack(&po->prot_hook);
2416 }
2417 spin_unlock(&po->bind_lock);
2418
2419 release_sock(sk);
2420
Linus Torvalds1da177e2005-04-16 15:20:36 -07002421 if (pg_vec)
2422 free_pg_vec(pg_vec, order, req->tp_block_nr);
2423out:
2424 return err;
2425}
2426
Johann Baudy69e3c752009-05-18 22:11:22 -07002427static int packet_mmap(struct file *file, struct socket *sock,
2428 struct vm_area_struct *vma)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002429{
2430 struct sock *sk = sock->sk;
2431 struct packet_sock *po = pkt_sk(sk);
Johann Baudy69e3c752009-05-18 22:11:22 -07002432 unsigned long size, expected_size;
2433 struct packet_ring_buffer *rb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434 unsigned long start;
2435 int err = -EINVAL;
2436 int i;
2437
2438 if (vma->vm_pgoff)
2439 return -EINVAL;
2440
Herbert Xu905db442009-01-30 14:12:06 -08002441 mutex_lock(&po->pg_vec_lock);
Johann Baudy69e3c752009-05-18 22:11:22 -07002442
2443 expected_size = 0;
2444 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2445 if (rb->pg_vec) {
2446 expected_size += rb->pg_vec_len
2447 * rb->pg_vec_pages
2448 * PAGE_SIZE;
2449 }
2450 }
2451
2452 if (expected_size == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002453 goto out;
Johann Baudy69e3c752009-05-18 22:11:22 -07002454
2455 size = vma->vm_end - vma->vm_start;
2456 if (size != expected_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457 goto out;
2458
Linus Torvalds1da177e2005-04-16 15:20:36 -07002459 start = vma->vm_start;
Johann Baudy69e3c752009-05-18 22:11:22 -07002460 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2461 if (rb->pg_vec == NULL)
2462 continue;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002463
Johann Baudy69e3c752009-05-18 22:11:22 -07002464 for (i = 0; i < rb->pg_vec_len; i++) {
2465 struct page *page = virt_to_page(rb->pg_vec[i]);
2466 int pg_num;
2467
2468 for (pg_num = 0; pg_num < rb->pg_vec_pages;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002469 pg_num++, page++) {
Johann Baudy69e3c752009-05-18 22:11:22 -07002470 err = vm_insert_page(vma, start, page);
2471 if (unlikely(err))
2472 goto out;
2473 start += PAGE_SIZE;
2474 }
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002475 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476 }
Johann Baudy69e3c752009-05-18 22:11:22 -07002477
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002478 atomic_inc(&po->mapped);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002479 vma->vm_ops = &packet_mmap_ops;
2480 err = 0;
2481
2482out:
Herbert Xu905db442009-01-30 14:12:06 -08002483 mutex_unlock(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002484 return err;
2485}
2486#endif
2487
2488
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08002489static const struct proto_ops packet_ops_spkt = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490 .family = PF_PACKET,
2491 .owner = THIS_MODULE,
2492 .release = packet_release,
2493 .bind = packet_bind_spkt,
2494 .connect = sock_no_connect,
2495 .socketpair = sock_no_socketpair,
2496 .accept = sock_no_accept,
2497 .getname = packet_getname_spkt,
2498 .poll = datagram_poll,
2499 .ioctl = packet_ioctl,
2500 .listen = sock_no_listen,
2501 .shutdown = sock_no_shutdown,
2502 .setsockopt = sock_no_setsockopt,
2503 .getsockopt = sock_no_getsockopt,
2504 .sendmsg = packet_sendmsg_spkt,
2505 .recvmsg = packet_recvmsg,
2506 .mmap = sock_no_mmap,
2507 .sendpage = sock_no_sendpage,
2508};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002509
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08002510static const struct proto_ops packet_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511 .family = PF_PACKET,
2512 .owner = THIS_MODULE,
2513 .release = packet_release,
2514 .bind = packet_bind,
2515 .connect = sock_no_connect,
2516 .socketpair = sock_no_socketpair,
2517 .accept = sock_no_accept,
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002518 .getname = packet_getname,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002519 .poll = packet_poll,
2520 .ioctl = packet_ioctl,
2521 .listen = sock_no_listen,
2522 .shutdown = sock_no_shutdown,
2523 .setsockopt = packet_setsockopt,
2524 .getsockopt = packet_getsockopt,
2525 .sendmsg = packet_sendmsg,
2526 .recvmsg = packet_recvmsg,
2527 .mmap = packet_mmap,
2528 .sendpage = sock_no_sendpage,
2529};
2530
Stephen Hemmingerec1b4cf2009-10-05 05:58:39 +00002531static const struct net_proto_family packet_family_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002532 .family = PF_PACKET,
2533 .create = packet_create,
2534 .owner = THIS_MODULE,
2535};
2536
2537static struct notifier_block packet_netdev_notifier = {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002538 .notifier_call = packet_notifier,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002539};
2540
2541#ifdef CONFIG_PROC_FS
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002542static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543{
2544 struct sock *s;
2545 struct hlist_node *node;
2546
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002547 sk_for_each(s, node, &net->packet.sklist) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548 if (!off--)
2549 return s;
2550 }
2551 return NULL;
2552}
2553
2554static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazet40ccbf52008-01-07 22:39:57 -08002555 __acquires(seq_file_net(seq)->packet.sklist_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002556{
Denis V. Luneve372c4142007-11-19 22:31:54 -08002557 struct net *net = seq_file_net(seq);
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002558 read_lock(&net->packet.sklist_lock);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002559 return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002560}
2561
2562static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2563{
Herbert Xu1bf40952007-12-16 14:04:02 -08002564 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002565 ++*pos;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002566 return (v == SEQ_START_TOKEN)
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002567 ? sk_head(&net->packet.sklist)
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002568 : sk_next((struct sock *)v) ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002569}
2570
2571static void packet_seq_stop(struct seq_file *seq, void *v)
Eric Dumazet40ccbf52008-01-07 22:39:57 -08002572 __releases(seq_file_net(seq)->packet.sklist_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002573{
Herbert Xu1bf40952007-12-16 14:04:02 -08002574 struct net *net = seq_file_net(seq);
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002575 read_unlock(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002576}
2577
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002578static int packet_seq_show(struct seq_file *seq, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002579{
2580 if (v == SEQ_START_TOKEN)
2581 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2582 else {
2583 struct sock *s = v;
2584 const struct packet_sock *po = pkt_sk(s);
2585
2586 seq_printf(seq,
2587 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2588 s,
2589 atomic_read(&s->sk_refcnt),
2590 s->sk_type,
2591 ntohs(po->num),
2592 po->ifindex,
2593 po->running,
2594 atomic_read(&s->sk_rmem_alloc),
2595 sock_i_uid(s),
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002596 sock_i_ino(s));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002597 }
2598
2599 return 0;
2600}
2601
Philippe De Muyter56b3d972007-07-10 23:07:31 -07002602static const struct seq_operations packet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002603 .start = packet_seq_start,
2604 .next = packet_seq_next,
2605 .stop = packet_seq_stop,
2606 .show = packet_seq_show,
2607};
2608
2609static int packet_seq_open(struct inode *inode, struct file *file)
2610{
Denis V. Luneve372c4142007-11-19 22:31:54 -08002611 return seq_open_net(inode, file, &packet_seq_ops,
2612 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002613}
2614
Arjan van de Venda7071d2007-02-12 00:55:36 -08002615static const struct file_operations packet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002616 .owner = THIS_MODULE,
2617 .open = packet_seq_open,
2618 .read = seq_read,
2619 .llseek = seq_lseek,
Denis V. Luneve372c4142007-11-19 22:31:54 -08002620 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002621};
2622
2623#endif
2624
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002625static int __net_init packet_net_init(struct net *net)
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002626{
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002627 rwlock_init(&net->packet.sklist_lock);
2628 INIT_HLIST_HEAD(&net->packet.sklist);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002629
2630 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2631 return -ENOMEM;
2632
2633 return 0;
2634}
2635
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002636static void __net_exit packet_net_exit(struct net *net)
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002637{
2638 proc_net_remove(net, "packet");
2639}
2640
2641static struct pernet_operations packet_net_ops = {
2642 .init = packet_net_init,
2643 .exit = packet_net_exit,
2644};
2645
2646
Linus Torvalds1da177e2005-04-16 15:20:36 -07002647static void __exit packet_exit(void)
2648{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002649 unregister_netdevice_notifier(&packet_netdev_notifier);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002650 unregister_pernet_subsys(&packet_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002651 sock_unregister(PF_PACKET);
2652 proto_unregister(&packet_proto);
2653}
2654
2655static int __init packet_init(void)
2656{
2657 int rc = proto_register(&packet_proto, 0);
2658
2659 if (rc != 0)
2660 goto out;
2661
2662 sock_register(&packet_family_ops);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002663 register_pernet_subsys(&packet_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002664 register_netdevice_notifier(&packet_netdev_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002665out:
2666 return rc;
2667}
2668
2669module_init(packet_init);
2670module_exit(packet_exit);
2671MODULE_LICENSE("GPL");
2672MODULE_ALIAS_NETPROTO(PF_PACKET);