blob: 4065129345077b15a30cdf6d989343f550ad30fa [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Joe Perchesafd465032012-03-12 07:03:32 +000013#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
Randy Dunlap4fc268d2006-01-11 12:17:47 -080015#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/module.h>
17#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/kernel.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090019#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <asm/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
27#include <linux/mroute.h>
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
Herbert Xue1a80002008-10-09 12:00:17 -070033#include <linux/etherdevice.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080034#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <net/sock.h>
37#include <net/ip.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
40#include <net/ipip.h>
41#include <net/arp.h>
42#include <net/checksum.h>
43#include <net/dsfield.h>
44#include <net/inet_ecn.h>
45#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070046#include <net/net_namespace.h>
47#include <net/netns/generic.h>
Herbert Xuc19e6542008-10-09 11:59:55 -070048#include <net/rtnetlink.h>
Dmitry Kozlov00959ad2010-08-21 23:05:39 -070049#include <net/gre.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
Eric Dumazetdfd56b82011-12-10 09:48:31 +000051#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <net/ipv6.h>
53#include <net/ip6_fib.h>
54#include <net/ip6_route.h>
55#endif
56
57/*
58 Problems & solutions
59 --------------------
60
61 1. The most important issue is detecting local dead loops.
62 They would cause complete host lockup in transmit, which
63 would be "resolved" by stack overflow or, if queueing is enabled,
64 with infinite looping in net_bh.
65
66 We cannot track such dead loops during route installation,
67 it is infeasible task. The most general solutions would be
68 to keep skb->encapsulation counter (sort of local ttl),
Eric Dumazet6d0722a2010-09-29 23:35:10 -070069 and silently drop packet when it expires. It is a good
stephen hemmingerbff52852012-02-24 08:08:20 +000070 solution, but it supposes maintaining new variable in ALL
Linus Torvalds1da177e2005-04-16 15:20:36 -070071 skb, even if no tunneling is used.
72
Eric Dumazet6d0722a2010-09-29 23:35:10 -070073 Current solution: xmit_recursion breaks dead loops. This is a percpu
74 counter, since when we enter the first ndo_xmit(), cpu migration is
75 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
77 2. Networking dead loops would not kill routers, but would really
78 kill network. IP hop limit plays role of "t->recursion" in this case,
79 if we copy it from packet being encapsulated to upper header.
80 It is very good solution, but it introduces two problems:
81
82 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83 do not work over tunnels.
84 - traceroute does not work. I planned to relay ICMP from tunnel,
85 so that this problem would be solved and traceroute output
86 would even more informative. This idea appeared to be wrong:
87 only Linux complies to rfc1812 now (yes, guys, Linux is the only
88 true router now :-)), all routers (at least, in neighbourhood of mine)
89 return only 8 bytes of payload. It is the end.
90
91 Hence, if we want that OSPF worked or traceroute said something reasonable,
92 we should search for another solution.
93
94 One of them is to parse packet trying to detect inner encapsulation
95 made by our node. It is difficult or even impossible, especially,
stephen hemmingerbff52852012-02-24 08:08:20 +000096 taking into account fragmentation. TO be short, ttl is not solution at all.
Linus Torvalds1da177e2005-04-16 15:20:36 -070097
98 Current solution: The solution was UNEXPECTEDLY SIMPLE.
99 We force DF flag on tunnels with preconfigured hop limit,
100 that is ALL. :-) Well, it does not remove the problem completely,
101 but exponential growth of network traffic is changed to linear
102 (branches, that exceed pmtu are pruned) and tunnel mtu
stephen hemmingerbff52852012-02-24 08:08:20 +0000103 rapidly degrades to value <68, where looping stops.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104 Yes, it is not good if there exists a router in the loop,
105 which does not force DF, even when encapsulating packets have DF set.
106 But it is not our problem! Nobody could accuse us, we made
107 all that we could make. Even if it is your gated who injected
108 fatal route to network, even if it were you who configured
109 fatal static route: you are innocent. :-)
110
111
112
113 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
114 practically identical code. It would be good to glue them
115 together, but it is not very evident, how to make them modular.
116 sit is integral part of IPv6, ipip and gre are naturally modular.
117 We could extract common parts (hash table, ioctl etc)
118 to a separate module (ip_tunnel.c).
119
120 Alexey Kuznetsov.
121 */
122
stephen hemmingereccc1bb2012-09-25 11:02:48 +0000123static bool log_ecn_error = true;
124module_param(log_ecn_error, bool, 0644);
125MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126
Herbert Xuc19e6542008-10-09 11:59:55 -0700127static struct rtnl_link_ops ipgre_link_ops __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128static int ipgre_tunnel_init(struct net_device *dev);
129static void ipgre_tunnel_setup(struct net_device *dev);
Herbert Xu42aa9162008-10-09 11:59:32 -0700130static int ipgre_tunnel_bind_dev(struct net_device *dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131
132/* Fallback tunnel: no source, no destination, no key, no options */
133
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700134#define HASH_SIZE 16
135
Eric Dumazetf99189b2009-11-17 10:42:49 +0000136static int ipgre_net_id __read_mostly;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700137struct ipgre_net {
Eric Dumazet15078502010-09-15 11:07:53 +0000138 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700139
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700140 struct net_device *fb_tunnel_dev;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700141};
142
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143/* Tunnel hash table */
144
145/*
146 4 hash tables:
147
148 3: (remote,local)
149 2: (remote,*)
150 1: (*,local)
151 0: (*,*)
152
153 We require exact key match i.e. if a key is present in packet
154 it will match only tunnel with the same key; if it is not present,
155 it will match only keyless tunnel.
156
157 All keysless packets, if not matched configured keyless tunnels
158 will match fallback tunnel.
159 */
160
Al Virod5a0a1e2006-11-08 00:23:14 -0800161#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700163#define tunnels_r_l tunnels[3]
164#define tunnels_r tunnels[2]
165#define tunnels_l tunnels[1]
166#define tunnels_wc tunnels[0]
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167
stephen hemminger87b6d212012-04-12 06:31:16 +0000168static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
169 struct rtnl_link_stats64 *tot)
Eric Dumazete985aad2010-09-27 03:57:11 +0000170{
Eric Dumazete985aad2010-09-27 03:57:11 +0000171 int i;
172
173 for_each_possible_cpu(i) {
174 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
stephen hemminger87b6d212012-04-12 06:31:16 +0000175 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
176 unsigned int start;
Eric Dumazete985aad2010-09-27 03:57:11 +0000177
stephen hemminger87b6d212012-04-12 06:31:16 +0000178 do {
179 start = u64_stats_fetch_begin_bh(&tstats->syncp);
180 rx_packets = tstats->rx_packets;
181 tx_packets = tstats->tx_packets;
182 rx_bytes = tstats->rx_bytes;
183 tx_bytes = tstats->tx_bytes;
184 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
185
186 tot->rx_packets += rx_packets;
187 tot->tx_packets += tx_packets;
188 tot->rx_bytes += rx_bytes;
189 tot->tx_bytes += tx_bytes;
Eric Dumazete985aad2010-09-27 03:57:11 +0000190 }
stephen hemminger87b6d212012-04-12 06:31:16 +0000191
192 tot->multicast = dev->stats.multicast;
193 tot->rx_crc_errors = dev->stats.rx_crc_errors;
194 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
195 tot->rx_length_errors = dev->stats.rx_length_errors;
stephen hemmingereccc1bb2012-09-25 11:02:48 +0000196 tot->rx_frame_errors = dev->stats.rx_frame_errors;
stephen hemminger87b6d212012-04-12 06:31:16 +0000197 tot->rx_errors = dev->stats.rx_errors;
stephen hemmingereccc1bb2012-09-25 11:02:48 +0000198
stephen hemminger87b6d212012-04-12 06:31:16 +0000199 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
200 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
201 tot->tx_dropped = dev->stats.tx_dropped;
202 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
203 tot->tx_errors = dev->stats.tx_errors;
204
205 return tot;
Eric Dumazete985aad2010-09-27 03:57:11 +0000206}
207
stephen hemmingerd2083282012-09-24 18:12:23 +0000208/* Does key in tunnel parameters match packet */
209static bool ipgre_key_match(const struct ip_tunnel_parm *p,
stephen hemminger9fbef052012-10-01 05:21:14 +0000210 __be16 flags, __be32 key)
stephen hemmingerd2083282012-09-24 18:12:23 +0000211{
212 if (p->i_flags & GRE_KEY) {
213 if (flags & GRE_KEY)
214 return key == p->i_key;
215 else
216 return false; /* key expected, none present */
217 } else
218 return !(flags & GRE_KEY);
219}
220
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221/* Given src, dst and key, find appropriate for input tunnel. */
222
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000223static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
224 __be32 remote, __be32 local,
stephen hemminger9fbef052012-10-01 05:21:14 +0000225 __be16 flags, __be32 key,
stephen hemmingerd2083282012-09-24 18:12:23 +0000226 __be16 gre_proto)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227{
Timo Teras749c10f2009-01-19 17:22:12 -0800228 struct net *net = dev_net(dev);
229 int link = dev->ifindex;
Eric Dumazet15078502010-09-15 11:07:53 +0000230 unsigned int h0 = HASH(remote);
231 unsigned int h1 = HASH(key);
Timo Terasafcf1242009-01-26 20:56:10 -0800232 struct ip_tunnel *t, *cand = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700233 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Herbert Xue1a80002008-10-09 12:00:17 -0700234 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
235 ARPHRD_ETHER : ARPHRD_IPGRE;
Timo Terasafcf1242009-01-26 20:56:10 -0800236 int score, cand_score = 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237
Amerigo Wange086cad2012-11-11 21:52:34 +0000238 for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800239 if (local != t->parms.iph.saddr ||
240 remote != t->parms.iph.daddr ||
Timo Teras749c10f2009-01-19 17:22:12 -0800241 !(t->dev->flags & IFF_UP))
242 continue;
243
stephen hemmingerd2083282012-09-24 18:12:23 +0000244 if (!ipgre_key_match(&t->parms, flags, key))
245 continue;
246
Timo Teras749c10f2009-01-19 17:22:12 -0800247 if (t->dev->type != ARPHRD_IPGRE &&
248 t->dev->type != dev_type)
249 continue;
250
Timo Terasafcf1242009-01-26 20:56:10 -0800251 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800252 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800253 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800254 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800255 score |= 2;
256 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800257 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800258
259 if (score < cand_score) {
260 cand = t;
261 cand_score = score;
262 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 }
Herbert Xue1a80002008-10-09 12:00:17 -0700264
Amerigo Wange086cad2012-11-11 21:52:34 +0000265 for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800266 if (remote != t->parms.iph.daddr ||
Timo Teras749c10f2009-01-19 17:22:12 -0800267 !(t->dev->flags & IFF_UP))
268 continue;
269
stephen hemmingerd2083282012-09-24 18:12:23 +0000270 if (!ipgre_key_match(&t->parms, flags, key))
271 continue;
272
Timo Teras749c10f2009-01-19 17:22:12 -0800273 if (t->dev->type != ARPHRD_IPGRE &&
274 t->dev->type != dev_type)
275 continue;
276
Timo Terasafcf1242009-01-26 20:56:10 -0800277 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800278 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800279 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800280 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800281 score |= 2;
282 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800283 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800284
285 if (score < cand_score) {
286 cand = t;
287 cand_score = score;
288 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 }
Herbert Xue1a80002008-10-09 12:00:17 -0700290
Amerigo Wange086cad2012-11-11 21:52:34 +0000291 for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800292 if ((local != t->parms.iph.saddr &&
293 (local != t->parms.iph.daddr ||
294 !ipv4_is_multicast(local))) ||
Timo Teras749c10f2009-01-19 17:22:12 -0800295 !(t->dev->flags & IFF_UP))
296 continue;
297
stephen hemmingerd2083282012-09-24 18:12:23 +0000298 if (!ipgre_key_match(&t->parms, flags, key))
299 continue;
300
Timo Teras749c10f2009-01-19 17:22:12 -0800301 if (t->dev->type != ARPHRD_IPGRE &&
302 t->dev->type != dev_type)
303 continue;
304
Timo Terasafcf1242009-01-26 20:56:10 -0800305 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800306 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800307 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800308 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800309 score |= 2;
310 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800311 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800312
313 if (score < cand_score) {
314 cand = t;
315 cand_score = score;
316 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 }
Herbert Xue1a80002008-10-09 12:00:17 -0700318
Amerigo Wange086cad2012-11-11 21:52:34 +0000319 for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800320 if (t->parms.i_key != key ||
321 !(t->dev->flags & IFF_UP))
322 continue;
323
324 if (t->dev->type != ARPHRD_IPGRE &&
325 t->dev->type != dev_type)
326 continue;
327
Timo Terasafcf1242009-01-26 20:56:10 -0800328 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800329 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800330 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800331 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800332 score |= 2;
333 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800334 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800335
336 if (score < cand_score) {
337 cand = t;
338 cand_score = score;
339 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340 }
341
Timo Terasafcf1242009-01-26 20:56:10 -0800342 if (cand != NULL)
343 return cand;
Herbert Xue1a80002008-10-09 12:00:17 -0700344
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000345 dev = ign->fb_tunnel_dev;
346 if (dev->flags & IFF_UP)
347 return netdev_priv(dev);
Timo Teras749c10f2009-01-19 17:22:12 -0800348
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 return NULL;
350}
351
Eric Dumazet15078502010-09-15 11:07:53 +0000352static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700353 struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900355 __be32 remote = parms->iph.daddr;
356 __be32 local = parms->iph.saddr;
357 __be32 key = parms->i_key;
Eric Dumazet15078502010-09-15 11:07:53 +0000358 unsigned int h = HASH(key);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 int prio = 0;
360
361 if (local)
362 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800363 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364 prio |= 2;
365 h ^= HASH(remote);
366 }
367
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700368 return &ign->tunnels[prio][h];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369}
370
Eric Dumazet15078502010-09-15 11:07:53 +0000371static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700372 struct ip_tunnel *t)
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900373{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700374 return __ipgre_bucket(ign, &t->parms);
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900375}
376
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700377static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378{
Eric Dumazet15078502010-09-15 11:07:53 +0000379 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380
Eric Dumazet15078502010-09-15 11:07:53 +0000381 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000382 rcu_assign_pointer(*tp, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383}
384
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700385static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386{
Eric Dumazet15078502010-09-15 11:07:53 +0000387 struct ip_tunnel __rcu **tp;
388 struct ip_tunnel *iter;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389
Eric Dumazet15078502010-09-15 11:07:53 +0000390 for (tp = ipgre_bucket(ign, t);
391 (iter = rtnl_dereference(*tp)) != NULL;
392 tp = &iter->next) {
393 if (t == iter) {
394 rcu_assign_pointer(*tp, t->next);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 break;
396 }
397 }
398}
399
Herbert Xue1a80002008-10-09 12:00:17 -0700400static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
401 struct ip_tunnel_parm *parms,
402 int type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403{
Al Virod5a0a1e2006-11-08 00:23:14 -0800404 __be32 remote = parms->iph.daddr;
405 __be32 local = parms->iph.saddr;
406 __be32 key = parms->i_key;
Timo Teras749c10f2009-01-19 17:22:12 -0800407 int link = parms->link;
Eric Dumazet15078502010-09-15 11:07:53 +0000408 struct ip_tunnel *t;
409 struct ip_tunnel __rcu **tp;
Herbert Xue1a80002008-10-09 12:00:17 -0700410 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
411
Eric Dumazet15078502010-09-15 11:07:53 +0000412 for (tp = __ipgre_bucket(ign, parms);
413 (t = rtnl_dereference(*tp)) != NULL;
414 tp = &t->next)
Herbert Xue1a80002008-10-09 12:00:17 -0700415 if (local == t->parms.iph.saddr &&
416 remote == t->parms.iph.daddr &&
417 key == t->parms.i_key &&
Timo Teras749c10f2009-01-19 17:22:12 -0800418 link == t->parms.link &&
Herbert Xue1a80002008-10-09 12:00:17 -0700419 type == t->dev->type)
420 break;
421
422 return t;
423}
424
Eric Dumazet15078502010-09-15 11:07:53 +0000425static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
Herbert Xue1a80002008-10-09 12:00:17 -0700426 struct ip_tunnel_parm *parms, int create)
427{
428 struct ip_tunnel *t, *nt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430 char name[IFNAMSIZ];
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700431 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432
Herbert Xue1a80002008-10-09 12:00:17 -0700433 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
434 if (t || !create)
435 return t;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436
437 if (parms->name[0])
438 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba62008-02-23 20:19:20 -0800439 else
stephen hemminger407d6fc2010-11-29 09:47:47 +0000440 strcpy(name, "gre%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441
442 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
443 if (!dev)
stephen hemminger407d6fc2010-11-29 09:47:47 +0000444 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -0700446 dev_net_set(dev, net);
447
Patrick McHardy2941a482006-01-08 22:05:26 -0800448 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449 nt->parms = *parms;
Herbert Xuc19e6542008-10-09 11:59:55 -0700450 dev->rtnl_link_ops = &ipgre_link_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451
Herbert Xu42aa9162008-10-09 11:59:32 -0700452 dev->mtu = ipgre_tunnel_bind_dev(dev);
453
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800454 if (register_netdevice(dev) < 0)
455 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456
Willem de Bruijnf2b3ee92012-01-26 10:34:35 +0000457 /* Can use a lockless transmit, unless we generate output sequences */
458 if (!(nt->parms.o_flags & GRE_SEQ))
459 dev->features |= NETIF_F_LLTX;
460
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 dev_hold(dev);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700462 ipgre_tunnel_link(ign, nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 return nt;
464
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800465failed_free:
466 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467 return NULL;
468}
469
470static void ipgre_tunnel_uninit(struct net_device *dev)
471{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700472 struct net *net = dev_net(dev);
473 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
474
475 ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 dev_put(dev);
477}
478
479
480static void ipgre_err(struct sk_buff *skb, u32 info)
481{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482
Rami Rosen071f92d2008-05-21 17:47:54 -0700483/* All the routers (except for Linux) return only
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 8 bytes of packet payload. It means, that precise relaying of
485 ICMP in the real Internet is absolutely infeasible.
486
487 Moreover, Cisco "wise men" put GRE key to the third word
488 in GRE header. It makes impossible maintaining even soft state for keyed
489 GRE tunnels with enabled checksum. Tell them "thank you".
490
491 Well, I wonder, rfc1812 was written by Cisco employee,
stephen hemmingerbff52852012-02-24 08:08:20 +0000492 what the hell these idiots break standards established
493 by themselves???
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494 */
495
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000496 const struct iphdr *iph = (const struct iphdr *)skb->data;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000497 __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300499 const int type = icmp_hdr(skb)->type;
500 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800502 __be16 flags;
stephen hemmingerd2083282012-09-24 18:12:23 +0000503 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504
505 flags = p[0];
506 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
507 if (flags&(GRE_VERSION|GRE_ROUTING))
508 return;
509 if (flags&GRE_KEY) {
510 grehlen += 4;
511 if (flags&GRE_CSUM)
512 grehlen += 4;
513 }
514 }
515
516 /* If only 8 bytes returned, keyed message will be dropped here */
517 if (skb_headlen(skb) < grehlen)
518 return;
519
stephen hemmingerd2083282012-09-24 18:12:23 +0000520 if (flags & GRE_KEY)
521 key = *(((__be32 *)p) + (grehlen / 4) - 1);
522
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523 switch (type) {
524 default:
525 case ICMP_PARAMETERPROB:
526 return;
527
528 case ICMP_DEST_UNREACH:
529 switch (code) {
530 case ICMP_SR_FAILED:
531 case ICMP_PORT_UNREACH:
532 /* Impossible event. */
533 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534 default:
535 /* All others are translated to HOST_UNREACH.
536 rfc2003 contains "deep thoughts" about NET_UNREACH,
537 I believe they are just ether pollution. --ANK
538 */
539 break;
540 }
541 break;
542 case ICMP_TIME_EXCEEDED:
543 if (code != ICMP_EXC_TTL)
544 return;
545 break;
David S. Miller55be7a92012-07-11 21:27:49 -0700546
547 case ICMP_REDIRECT:
548 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 }
550
Timo Teras749c10f2009-01-19 17:22:12 -0800551 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
stephen hemmingerd2083282012-09-24 18:12:23 +0000552 flags, key, p[1]);
553
David S. Miller36393392012-06-14 22:21:46 -0700554 if (t == NULL)
stephen hemminger0c5794a2012-09-24 18:12:24 +0000555 return;
David S. Miller36393392012-06-14 22:21:46 -0700556
557 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
558 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
559 t->parms.link, 0, IPPROTO_GRE, 0);
stephen hemminger0c5794a2012-09-24 18:12:24 +0000560 return;
David S. Miller36393392012-06-14 22:21:46 -0700561 }
David S. Miller55be7a92012-07-11 21:27:49 -0700562 if (type == ICMP_REDIRECT) {
563 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
564 IPPROTO_GRE, 0);
stephen hemminger0c5794a2012-09-24 18:12:24 +0000565 return;
David S. Miller55be7a92012-07-11 21:27:49 -0700566 }
David S. Miller36393392012-06-14 22:21:46 -0700567 if (t->parms.iph.daddr == 0 ||
Joe Perchesf97c1e02007-12-16 13:45:43 -0800568 ipv4_is_multicast(t->parms.iph.daddr))
stephen hemminger0c5794a2012-09-24 18:12:24 +0000569 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570
571 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
stephen hemminger0c5794a2012-09-24 18:12:24 +0000572 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573
Wei Yongjunda6185d82009-02-24 23:34:48 -0800574 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575 t->err_count++;
576 else
577 t->err_count = 1;
578 t->err_time = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579}
580
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581static inline u8
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000582ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583{
584 u8 inner = 0;
585 if (skb->protocol == htons(ETH_P_IP))
586 inner = old_iph->tos;
587 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000588 inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 return INET_ECN_encapsulate(tos, inner);
590}
591
592static int ipgre_rcv(struct sk_buff *skb)
593{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000594 const struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800596 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800597 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800598 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 u32 seqno = 0;
600 struct ip_tunnel *tunnel;
601 int offset = 4;
Herbert Xue1a80002008-10-09 12:00:17 -0700602 __be16 gre_proto;
stephen hemmingereccc1bb2012-09-25 11:02:48 +0000603 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604
605 if (!pskb_may_pull(skb, 16))
stephen hemminger0c5794a2012-09-24 18:12:24 +0000606 goto drop;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700608 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 h = skb->data;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000610 flags = *(__be16 *)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611
612 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
613 /* - Version must be 0.
614 - We do not support routing headers.
615 */
616 if (flags&(GRE_VERSION|GRE_ROUTING))
stephen hemminger0c5794a2012-09-24 18:12:24 +0000617 goto drop;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618
619 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800620 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700621 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800622 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800623 if (!csum)
624 break;
625 /* fall through */
626 case CHECKSUM_NONE:
627 skb->csum = 0;
628 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700629 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630 }
631 offset += 4;
632 }
633 if (flags&GRE_KEY) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000634 key = *(__be32 *)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635 offset += 4;
636 }
637 if (flags&GRE_SEQ) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000638 seqno = ntohl(*(__be32 *)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639 offset += 4;
640 }
641 }
642
Herbert Xue1a80002008-10-09 12:00:17 -0700643 gre_proto = *(__be16 *)(h + 2);
644
stephen hemmingerd2083282012-09-24 18:12:23 +0000645 tunnel = ipgre_tunnel_lookup(skb->dev,
646 iph->saddr, iph->daddr, flags, key,
647 gre_proto);
648 if (tunnel) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000649 struct pcpu_tstats *tstats;
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700650
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651 secpath_reset(skb);
652
Herbert Xue1a80002008-10-09 12:00:17 -0700653 skb->protocol = gre_proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 /* WCCP version 1 and 2 protocol decoding.
655 * - Change protocol to IP
656 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
657 */
Herbert Xue1a80002008-10-09 12:00:17 -0700658 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700659 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900660 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661 offset += 4;
662 }
663
Timo Teras1d069162007-12-20 00:10:33 -0800664 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300665 __pskb_pull(skb, offset);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700666 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 skb->pkt_type = PACKET_HOST;
668#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800669 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670 /* Looped back packet, drop it! */
David S. Millerc7537962010-11-11 17:07:48 -0800671 if (rt_is_output_route(skb_rtable(skb)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672 goto drop;
Eric Dumazete985aad2010-09-27 03:57:11 +0000673 tunnel->dev->stats.multicast++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 skb->pkt_type = PACKET_BROADCAST;
675 }
676#endif
677
678 if (((flags&GRE_CSUM) && csum) ||
679 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000680 tunnel->dev->stats.rx_crc_errors++;
681 tunnel->dev->stats.rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682 goto drop;
683 }
684 if (tunnel->parms.i_flags&GRE_SEQ) {
685 if (!(flags&GRE_SEQ) ||
686 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000687 tunnel->dev->stats.rx_fifo_errors++;
688 tunnel->dev->stats.rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689 goto drop;
690 }
691 tunnel->i_seqno = seqno + 1;
692 }
Herbert Xue1a80002008-10-09 12:00:17 -0700693
694 /* Warning: All skb pointers will be invalidated! */
695 if (tunnel->dev->type == ARPHRD_ETHER) {
696 if (!pskb_may_pull(skb, ETH_HLEN)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000697 tunnel->dev->stats.rx_length_errors++;
698 tunnel->dev->stats.rx_errors++;
Herbert Xue1a80002008-10-09 12:00:17 -0700699 goto drop;
700 }
701
702 iph = ip_hdr(skb);
703 skb->protocol = eth_type_trans(skb, tunnel->dev);
704 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
705 }
706
stephen hemmingereccc1bb2012-09-25 11:02:48 +0000707 __skb_tunnel_rx(skb, tunnel->dev);
708
709 skb_reset_network_header(skb);
710 err = IP_ECN_decapsulate(iph, skb);
711 if (unlikely(err)) {
712 if (log_ecn_error)
713 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
714 &iph->saddr, iph->tos);
715 if (err > 1) {
716 ++tunnel->dev->stats.rx_frame_errors;
717 ++tunnel->dev->stats.rx_errors;
718 goto drop;
719 }
720 }
721
Eric Dumazete985aad2010-09-27 03:57:11 +0000722 tstats = this_cpu_ptr(tunnel->dev->tstats);
stephen hemminger87b6d212012-04-12 06:31:16 +0000723 u64_stats_update_begin(&tstats->syncp);
Eric Dumazete985aad2010-09-27 03:57:11 +0000724 tstats->rx_packets++;
725 tstats->rx_bytes += skb->len;
stephen hemminger87b6d212012-04-12 06:31:16 +0000726 u64_stats_update_end(&tstats->syncp);
Eric Dumazete985aad2010-09-27 03:57:11 +0000727
Eric Dumazet60769a52012-09-27 02:48:50 +0000728 gro_cells_receive(&tunnel->gro_cells, skb);
Eric Dumazet8990f462010-09-20 00:12:11 +0000729 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700731 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732
733drop:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 kfree_skb(skb);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000735 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736}
737
Pravin B Shelar8f100982013-02-24 20:05:05 +0000738static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff *skb)
Pravin B Shelar68c33162013-02-14 14:02:41 +0000739{
740 int err;
741
742 if (skb_is_gso(skb)) {
743 err = skb_unclone(skb, GFP_ATOMIC);
744 if (unlikely(err))
745 goto error;
746 skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
747 return skb;
Pravin B Shelar8f100982013-02-24 20:05:05 +0000748 } else if (skb->ip_summed == CHECKSUM_PARTIAL &&
749 tunnel->parms.o_flags&GRE_CSUM) {
750 err = skb_checksum_help(skb);
751 if (unlikely(err))
752 goto error;
753 } else if (skb->ip_summed != CHECKSUM_PARTIAL)
Dmitry Kravkovaa0e51c2013-02-18 09:50:52 +0000754 skb->ip_summed = CHECKSUM_NONE;
Pravin B Shelar68c33162013-02-14 14:02:41 +0000755
756 return skb;
757
758error:
759 kfree_skb(skb);
760 return ERR_PTR(err);
761}
762
Stephen Hemminger6fef4c02009-08-31 19:50:41 +0000763static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764{
Pravin B Shelar68c33162013-02-14 14:02:41 +0000765 struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats);
Patrick McHardy2941a482006-01-08 22:05:26 -0800766 struct ip_tunnel *tunnel = netdev_priv(dev);
Eric Dumazetcef401d2013-01-25 20:34:37 +0000767 const struct iphdr *old_iph;
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000768 const struct iphdr *tiph;
David S. Millercbb1e852011-05-04 12:33:34 -0700769 struct flowi4 fl4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800771 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 struct rtable *rt; /* Route to the other host */
Eric Dumazet15078502010-09-15 11:07:53 +0000773 struct net_device *tdev; /* Device to other host */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700775 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800777 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778 int mtu;
Eric Dumazetf7e75ba2012-12-20 16:00:27 +0000779 u8 ttl;
Pravin B Shelar68c33162013-02-14 14:02:41 +0000780 int err;
781 int pkt_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782
Pravin B Shelar8f100982013-02-24 20:05:05 +0000783 skb = handle_offloads(tunnel, skb);
Pravin B Shelar68c33162013-02-14 14:02:41 +0000784 if (IS_ERR(skb)) {
785 dev->stats.tx_dropped++;
786 return NETDEV_TX_OK;
787 }
788
789 if (!skb->encapsulation) {
790 skb_reset_inner_headers(skb);
791 skb->encapsulation = 1;
792 }
Eric Dumazet6b78f162012-09-13 21:25:33 +0000793
Eric Dumazetcef401d2013-01-25 20:34:37 +0000794 old_iph = ip_hdr(skb);
795
Herbert Xue1a80002008-10-09 12:00:17 -0700796 if (dev->type == ARPHRD_ETHER)
797 IPCB(skb)->flags = 0;
798
799 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800 gre_hlen = 0;
Isaku Yamahata412ed942012-12-20 15:12:52 +0000801 if (skb->protocol == htons(ETH_P_IP))
802 tiph = (const struct iphdr *)skb->data;
803 else
804 tiph = &tunnel->parms.iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805 } else {
806 gre_hlen = tunnel->hlen;
807 tiph = &tunnel->parms.iph;
808 }
809
810 if ((dst = tiph->daddr) == 0) {
811 /* NBMA tunnel */
812
Eric Dumazetadf30902009-06-02 05:19:30 +0000813 if (skb_dst(skb) == NULL) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000814 dev->stats.tx_fifo_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 goto tx_error;
816 }
817
David S. Miller61d57f82012-01-24 18:23:30 -0500818 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazet511c3f92009-06-02 05:14:27 +0000819 rt = skb_rtable(skb);
David S. Millerf8126f12012-07-13 05:03:45 -0700820 dst = rt_nexthop(rt, old_iph->daddr);
David S. Miller61d57f82012-01-24 18:23:30 -0500821 }
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000822#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 else if (skb->protocol == htons(ETH_P_IPV6)) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000824 const struct in6_addr *addr6;
David S. Miller0ec88662012-01-27 15:01:08 -0800825 struct neighbour *neigh;
826 bool do_tx_error_icmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 int addr_type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828
David S. Miller0ec88662012-01-27 15:01:08 -0800829 neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830 if (neigh == NULL)
831 goto tx_error;
832
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000833 addr6 = (const struct in6_addr *)&neigh->primary_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 addr_type = ipv6_addr_type(addr6);
835
836 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700837 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838 addr_type = ipv6_addr_type(addr6);
839 }
840
841 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
David S. Miller0ec88662012-01-27 15:01:08 -0800842 do_tx_error_icmp = true;
843 else {
844 do_tx_error_icmp = false;
845 dst = addr6->s6_addr32[3];
846 }
847 neigh_release(neigh);
848 if (do_tx_error_icmp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 goto tx_error_icmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 }
851#endif
852 else
853 goto tx_error;
854 }
855
Eric Dumazetf7e75ba2012-12-20 16:00:27 +0000856 ttl = tiph->ttl;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 tos = tiph->tos;
David Ward040468a2013-01-27 13:04:58 +0000858 if (tos & 0x1) {
859 tos &= ~0x1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860 if (skb->protocol == htons(ETH_P_IP))
861 tos = old_iph->tos;
Stephen Hemmingerdd4ba832010-07-08 21:35:58 -0700862 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000863 tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864 }
865
David S. Millercbb1e852011-05-04 12:33:34 -0700866 rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
David S. Miller78fbfd82011-03-12 00:00:52 -0500867 tunnel->parms.o_key, RT_TOS(tos),
868 tunnel->parms.link);
869 if (IS_ERR(rt)) {
870 dev->stats.tx_carrier_errors++;
871 goto tx_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700873 tdev = rt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874
875 if (tdev == dev) {
876 ip_rt_put(rt);
Eric Dumazete985aad2010-09-27 03:57:11 +0000877 dev->stats.collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878 goto tx_error;
879 }
880
881 df = tiph->frag_off;
882 if (df)
Changli Gaod8d1f302010-06-10 23:31:35 -0700883 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 else
Eric Dumazetadf30902009-06-02 05:19:30 +0000885 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886
Eric Dumazetadf30902009-06-02 05:19:30 +0000887 if (skb_dst(skb))
David S. Miller6700c272012-07-17 03:29:28 -0700888 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889
890 if (skb->protocol == htons(ETH_P_IP)) {
891 df |= (old_iph->frag_off&htons(IP_DF));
892
Pravin B Shelar68c33162013-02-14 14:02:41 +0000893 if (!skb_is_gso(skb) &&
894 (old_iph->frag_off&htons(IP_DF)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 mtu < ntohs(old_iph->tot_len)) {
896 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
897 ip_rt_put(rt);
898 goto tx_error;
899 }
900 }
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000901#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 else if (skb->protocol == htons(ETH_P_IPV6)) {
Eric Dumazetadf30902009-06-02 05:19:30 +0000903 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904
Eric Dumazetadf30902009-06-02 05:19:30 +0000905 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800906 if ((tunnel->parms.iph.daddr &&
907 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908 rt6->rt6i_dst.plen == 128) {
909 rt6->rt6i_flags |= RTF_MODIFIED;
David S. Millerdefb3512010-12-08 21:16:57 -0800910 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 }
912 }
913
Pravin B Shelar68c33162013-02-14 14:02:41 +0000914 if (!skb_is_gso(skb) &&
915 mtu >= IPV6_MIN_MTU &&
916 mtu < skb->len - tunnel->hlen + gre_hlen) {
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000917 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 ip_rt_put(rt);
919 goto tx_error;
920 }
921 }
922#endif
923
924 if (tunnel->err_count > 0) {
Wei Yongjunda6185d82009-02-24 23:34:48 -0800925 if (time_before(jiffies,
926 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927 tunnel->err_count--;
928
929 dst_link_failure(skb);
930 } else
931 tunnel->err_count = 0;
932 }
933
Changli Gaod8d1f302010-06-10 23:31:35 -0700934 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935
Patrick McHardycfbba492007-07-09 15:33:40 -0700936 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
937 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700938 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
Herbert Xu805dc1d2011-11-18 02:20:06 +0000939 if (max_headroom > dev->needed_headroom)
940 dev->needed_headroom = max_headroom;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941 if (!new_skb) {
942 ip_rt_put(rt);
Eric Dumazete985aad2010-09-27 03:57:11 +0000943 dev->stats.tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000945 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 }
947 if (skb->sk)
948 skb_set_owner_w(new_skb, skb->sk);
949 dev_kfree_skb(skb);
950 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700951 old_iph = ip_hdr(skb);
Eric Dumazetf7e75ba2012-12-20 16:00:27 +0000952 /* Warning : tiph value might point to freed memory */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 }
954
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700955 skb_push(skb, gre_hlen);
956 skb_reset_network_header(skb);
Isaku Yamahata861aa6d2012-12-24 16:51:03 +0000957 skb_set_transport_header(skb, sizeof(*iph));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800959 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
960 IPSKB_REROUTED);
Eric Dumazetadf30902009-06-02 05:19:30 +0000961 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700962 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963
964 /*
965 * Push down and install the IPIP header.
966 */
967
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700968 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969 iph->version = 4;
970 iph->ihl = sizeof(struct iphdr) >> 2;
971 iph->frag_off = df;
972 iph->protocol = IPPROTO_GRE;
973 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
David S. Millercbb1e852011-05-04 12:33:34 -0700974 iph->daddr = fl4.daddr;
975 iph->saddr = fl4.saddr;
Eric Dumazetf7e75ba2012-12-20 16:00:27 +0000976 iph->ttl = ttl;
Pravin B Shelar490ab082013-02-22 07:30:30 +0000977
978 tunnel_ip_select_ident(skb, old_iph, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979
Eric Dumazetf7e75ba2012-12-20 16:00:27 +0000980 if (ttl == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 if (skb->protocol == htons(ETH_P_IP))
982 iph->ttl = old_iph->ttl;
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000983#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000985 iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986#endif
987 else
David S. Miller323e1262010-12-12 21:55:08 -0800988 iph->ttl = ip4_dst_hoplimit(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 }
990
Herbert Xue1a80002008-10-09 12:00:17 -0700991 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
992 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
993 htons(ETH_P_TEB) : skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994
995 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000996 __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997
998 if (tunnel->parms.o_flags&GRE_SEQ) {
999 ++tunnel->o_seqno;
1000 *ptr = htonl(tunnel->o_seqno);
1001 ptr--;
1002 }
1003 if (tunnel->parms.o_flags&GRE_KEY) {
1004 *ptr = tunnel->parms.o_key;
1005 ptr--;
1006 }
Pravin B Shelar68c33162013-02-14 14:02:41 +00001007 /* Skip GRE checksum if skb is getting offloaded. */
1008 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) &&
1009 (tunnel->parms.o_flags&GRE_CSUM)) {
Pravin B Shelar54657402013-01-23 11:45:42 +00001010 int offset = skb_transport_offset(skb);
1011
Pravin B Shelar68c33162013-02-14 14:02:41 +00001012 if (skb_has_shared_frag(skb)) {
1013 err = __skb_linearize(skb);
Eric Dumazet4aa896c2013-02-19 12:14:10 +00001014 if (err)
Pravin B Shelar68c33162013-02-14 14:02:41 +00001015 goto tx_error;
Pravin B Shelar68c33162013-02-14 14:02:41 +00001016 }
1017
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018 *ptr = 0;
Pravin B Shelar54657402013-01-23 11:45:42 +00001019 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset,
1020 skb->len - offset,
1021 0));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022 }
1023 }
1024
Pravin B Shelar68c33162013-02-14 14:02:41 +00001025 nf_reset(skb);
1026
1027 pkt_len = skb->len - skb_transport_offset(skb);
1028 err = ip_local_out(skb);
1029 if (likely(net_xmit_eval(err) == 0)) {
1030 u64_stats_update_begin(&tstats->syncp);
1031 tstats->tx_bytes += pkt_len;
1032 tstats->tx_packets++;
1033 u64_stats_update_end(&tstats->syncp);
1034 } else {
1035 dev->stats.tx_errors++;
1036 dev->stats.tx_aborted_errors++;
1037 }
Patrick McHardy6ed10652009-06-23 06:03:08 +00001038 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039
David S. Miller496053f2012-01-11 16:46:32 -08001040#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041tx_error_icmp:
1042 dst_link_failure(skb);
David S. Miller496053f2012-01-11 16:46:32 -08001043#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044tx_error:
Eric Dumazete985aad2010-09-27 03:57:11 +00001045 dev->stats.tx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +00001047 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048}
1049
Herbert Xu42aa9162008-10-09 11:59:32 -07001050static int ipgre_tunnel_bind_dev(struct net_device *dev)
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001051{
1052 struct net_device *tdev = NULL;
1053 struct ip_tunnel *tunnel;
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001054 const struct iphdr *iph;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001055 int hlen = LL_MAX_HEADER;
1056 int mtu = ETH_DATA_LEN;
1057 int addend = sizeof(struct iphdr) + 4;
1058
1059 tunnel = netdev_priv(dev);
1060 iph = &tunnel->parms.iph;
1061
Herbert Xuc95b8192008-10-09 11:58:54 -07001062 /* Guess output device to choose reasonable mtu and needed_headroom */
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001063
1064 if (iph->daddr) {
David S. Millercbb1e852011-05-04 12:33:34 -07001065 struct flowi4 fl4;
1066 struct rtable *rt;
Eric Dumazete985aad2010-09-27 03:57:11 +00001067
David S. Millercbb1e852011-05-04 12:33:34 -07001068 rt = ip_route_output_gre(dev_net(dev), &fl4,
1069 iph->daddr, iph->saddr,
1070 tunnel->parms.o_key,
1071 RT_TOS(iph->tos),
1072 tunnel->parms.link);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001073 if (!IS_ERR(rt)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001074 tdev = rt->dst.dev;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001075 ip_rt_put(rt);
1076 }
Herbert Xue1a80002008-10-09 12:00:17 -07001077
1078 if (dev->type != ARPHRD_ETHER)
1079 dev->flags |= IFF_POINTOPOINT;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001080 }
1081
1082 if (!tdev && tunnel->parms.link)
Pavel Emelyanov96635522008-04-16 01:10:44 -07001083 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001084
1085 if (tdev) {
Herbert Xuc95b8192008-10-09 11:58:54 -07001086 hlen = tdev->hard_header_len + tdev->needed_headroom;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001087 mtu = tdev->mtu;
1088 }
1089 dev->iflink = tunnel->parms.link;
1090
1091 /* Precalculate GRE options length */
1092 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1093 if (tunnel->parms.o_flags&GRE_CSUM)
1094 addend += 4;
1095 if (tunnel->parms.o_flags&GRE_KEY)
1096 addend += 4;
1097 if (tunnel->parms.o_flags&GRE_SEQ)
1098 addend += 4;
1099 }
Herbert Xuc95b8192008-10-09 11:58:54 -07001100 dev->needed_headroom = addend + hlen;
Tom Goff8cdb0452009-08-14 16:33:56 -07001101 mtu -= dev->hard_header_len + addend;
Herbert Xu42aa9162008-10-09 11:59:32 -07001102
1103 if (mtu < 68)
1104 mtu = 68;
1105
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001106 tunnel->hlen = addend;
Pravin B Shelar68c33162013-02-14 14:02:41 +00001107 /* TCP offload with GRE SEQ is not supported. */
1108 if (!(tunnel->parms.o_flags & GRE_SEQ)) {
Dmitry Kravkoveb6b9a82013-02-18 09:50:53 +00001109 /* device supports enc gso offload*/
1110 if (tdev->hw_enc_features & NETIF_F_GRE_GSO) {
1111 dev->features |= NETIF_F_TSO;
1112 dev->hw_features |= NETIF_F_TSO;
1113 } else {
1114 dev->features |= NETIF_F_GSO_SOFTWARE;
1115 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1116 }
Pravin B Shelar68c33162013-02-14 14:02:41 +00001117 }
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001118
Herbert Xu42aa9162008-10-09 11:59:32 -07001119 return mtu;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001120}
1121
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122static int
1123ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1124{
1125 int err = 0;
1126 struct ip_tunnel_parm p;
1127 struct ip_tunnel *t;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001128 struct net *net = dev_net(dev);
1129 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130
1131 switch (cmd) {
1132 case SIOCGETTUNNEL:
1133 t = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001134 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1136 err = -EFAULT;
1137 break;
1138 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001139 t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 }
1141 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -08001142 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 memcpy(&p, &t->parms, sizeof(p));
1144 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1145 err = -EFAULT;
1146 break;
1147
1148 case SIOCADDTUNNEL:
1149 case SIOCCHGTUNNEL:
1150 err = -EPERM;
Eric W. Biederman52e804c2012-11-16 03:03:05 +00001151 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152 goto done;
1153
1154 err = -EFAULT;
1155 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1156 goto done;
1157
1158 err = -EINVAL;
1159 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1160 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1161 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1162 goto done;
1163 if (p.iph.ttl)
1164 p.iph.frag_off |= htons(IP_DF);
1165
1166 if (!(p.i_flags&GRE_KEY))
1167 p.i_key = 0;
1168 if (!(p.o_flags&GRE_KEY))
1169 p.o_key = 0;
1170
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001171 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001173 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 if (t != NULL) {
1175 if (t->dev != dev) {
1176 err = -EEXIST;
1177 break;
1178 }
1179 } else {
Eric Dumazet15078502010-09-15 11:07:53 +00001180 unsigned int nflags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181
Patrick McHardy2941a482006-01-08 22:05:26 -08001182 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183
Joe Perchesf97c1e02007-12-16 13:45:43 -08001184 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185 nflags = IFF_BROADCAST;
1186 else if (p.iph.daddr)
1187 nflags = IFF_POINTOPOINT;
1188
1189 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1190 err = -EINVAL;
1191 break;
1192 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001193 ipgre_tunnel_unlink(ign, t);
Pavel Emelyanov74b0b852010-10-27 05:43:53 +00001194 synchronize_net();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 t->parms.iph.saddr = p.iph.saddr;
1196 t->parms.iph.daddr = p.iph.daddr;
1197 t->parms.i_key = p.i_key;
1198 t->parms.o_key = p.o_key;
1199 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1200 memcpy(dev->broadcast, &p.iph.daddr, 4);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001201 ipgre_tunnel_link(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001202 netdev_state_change(dev);
1203 }
1204 }
1205
1206 if (t) {
1207 err = 0;
1208 if (cmd == SIOCCHGTUNNEL) {
1209 t->parms.iph.ttl = p.iph.ttl;
1210 t->parms.iph.tos = p.iph.tos;
1211 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001212 if (t->parms.link != p.link) {
1213 t->parms.link = p.link;
Herbert Xu42aa9162008-10-09 11:59:32 -07001214 dev->mtu = ipgre_tunnel_bind_dev(dev);
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001215 netdev_state_change(dev);
1216 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217 }
1218 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1219 err = -EFAULT;
1220 } else
1221 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1222 break;
1223
1224 case SIOCDELTUNNEL:
1225 err = -EPERM;
Eric W. Biederman52e804c2012-11-16 03:03:05 +00001226 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227 goto done;
1228
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001229 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230 err = -EFAULT;
1231 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1232 goto done;
1233 err = -ENOENT;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001234 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235 goto done;
1236 err = -EPERM;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001237 if (t == netdev_priv(ign->fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238 goto done;
1239 dev = t->dev;
1240 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08001241 unregister_netdevice(dev);
1242 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243 break;
1244
1245 default:
1246 err = -EINVAL;
1247 }
1248
1249done:
1250 return err;
1251}
1252
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1254{
Patrick McHardy2941a482006-01-08 22:05:26 -08001255 struct ip_tunnel *tunnel = netdev_priv(dev);
Herbert Xuc95b8192008-10-09 11:58:54 -07001256 if (new_mtu < 68 ||
1257 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258 return -EINVAL;
1259 dev->mtu = new_mtu;
1260 return 0;
1261}
1262
Linus Torvalds1da177e2005-04-16 15:20:36 -07001263/* Nice toy. Unfortunately, useless in real life :-)
1264 It allows to construct virtual multiprotocol broadcast "LAN"
1265 over the Internet, provided multicast routing is tuned.
1266
1267
1268 I have no idea was this bicycle invented before me,
1269 so that I had to set ARPHRD_IPGRE to a random value.
1270 I have an impression, that Cisco could make something similar,
1271 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001272
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1274 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1275
1276 ping -t 255 224.66.66.66
1277
1278 If nobody answers, mbone does not work.
1279
1280 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1281 ip addr add 10.66.66.<somewhat>/24 dev Universe
1282 ifconfig Universe up
1283 ifconfig Universe add fe80::<Your_real_addr>/10
1284 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1285 ftp 10.66.66.66
1286 ...
1287 ftp fec0:6666:6666::193.233.7.65
1288 ...
1289
1290 */
1291
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001292static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1293 unsigned short type,
Eric Dumazet15078502010-09-15 11:07:53 +00001294 const void *daddr, const void *saddr, unsigned int len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295{
Patrick McHardy2941a482006-01-08 22:05:26 -08001296 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001298 __be16 *p = (__be16 *)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299
1300 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1301 p[0] = t->parms.o_flags;
1302 p[1] = htons(type);
1303
1304 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001305 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001307
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308 if (saddr)
1309 memcpy(&iph->saddr, saddr, 4);
Timo Teräs6d55cb92010-03-03 04:01:13 +00001310 if (daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001311 memcpy(&iph->daddr, daddr, 4);
Timo Teräs6d55cb92010-03-03 04:01:13 +00001312 if (iph->daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001314
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315 return -t->hlen;
1316}
1317
Timo Teras6a5f44d2007-10-23 20:31:53 -07001318static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1319{
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001320 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
Timo Teras6a5f44d2007-10-23 20:31:53 -07001321 memcpy(haddr, &iph->saddr, 4);
1322 return 4;
1323}
1324
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001325static const struct header_ops ipgre_header_ops = {
1326 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001327 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001328};
1329
Timo Teras6a5f44d2007-10-23 20:31:53 -07001330#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331static int ipgre_open(struct net_device *dev)
1332{
Patrick McHardy2941a482006-01-08 22:05:26 -08001333 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334
Joe Perchesf97c1e02007-12-16 13:45:43 -08001335 if (ipv4_is_multicast(t->parms.iph.daddr)) {
David S. Millercbb1e852011-05-04 12:33:34 -07001336 struct flowi4 fl4;
1337 struct rtable *rt;
Eric Dumazete985aad2010-09-27 03:57:11 +00001338
David S. Millercbb1e852011-05-04 12:33:34 -07001339 rt = ip_route_output_gre(dev_net(dev), &fl4,
1340 t->parms.iph.daddr,
1341 t->parms.iph.saddr,
1342 t->parms.o_key,
1343 RT_TOS(t->parms.iph.tos),
1344 t->parms.link);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001345 if (IS_ERR(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001346 return -EADDRNOTAVAIL;
Changli Gaod8d1f302010-06-10 23:31:35 -07001347 dev = rt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001349 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350 return -EADDRNOTAVAIL;
1351 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001352 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001353 }
1354 return 0;
1355}
1356
1357static int ipgre_close(struct net_device *dev)
1358{
Patrick McHardy2941a482006-01-08 22:05:26 -08001359 struct ip_tunnel *t = netdev_priv(dev);
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001360
Joe Perchesf97c1e02007-12-16 13:45:43 -08001361 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001362 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001363 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Eric Dumazet8723e1b2010-10-19 00:39:26 +00001364 if (in_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 }
1367 return 0;
1368}
1369
1370#endif
1371
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001372static const struct net_device_ops ipgre_netdev_ops = {
1373 .ndo_init = ipgre_tunnel_init,
1374 .ndo_uninit = ipgre_tunnel_uninit,
1375#ifdef CONFIG_NET_IPGRE_BROADCAST
1376 .ndo_open = ipgre_open,
1377 .ndo_stop = ipgre_close,
1378#endif
1379 .ndo_start_xmit = ipgre_tunnel_xmit,
1380 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1381 .ndo_change_mtu = ipgre_tunnel_change_mtu,
stephen hemminger87b6d212012-04-12 06:31:16 +00001382 .ndo_get_stats64 = ipgre_get_stats64,
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001383};
1384
Eric Dumazete985aad2010-09-27 03:57:11 +00001385static void ipgre_dev_free(struct net_device *dev)
1386{
Eric Dumazet60769a52012-09-27 02:48:50 +00001387 struct ip_tunnel *tunnel = netdev_priv(dev);
1388
1389 gro_cells_destroy(&tunnel->gro_cells);
Eric Dumazete985aad2010-09-27 03:57:11 +00001390 free_percpu(dev->tstats);
1391 free_netdev(dev);
1392}
1393
Eric Dumazet6b78f162012-09-13 21:25:33 +00001394#define GRE_FEATURES (NETIF_F_SG | \
1395 NETIF_F_FRAGLIST | \
1396 NETIF_F_HIGHDMA | \
1397 NETIF_F_HW_CSUM)
1398
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399static void ipgre_tunnel_setup(struct net_device *dev)
1400{
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001401 dev->netdev_ops = &ipgre_netdev_ops;
Eric Dumazete985aad2010-09-27 03:57:11 +00001402 dev->destructor = ipgre_dev_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403
1404 dev->type = ARPHRD_IPGRE;
Herbert Xuc95b8192008-10-09 11:58:54 -07001405 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001406 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407 dev->flags = IFF_NOARP;
1408 dev->iflink = 0;
1409 dev->addr_len = 4;
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -07001410 dev->features |= NETIF_F_NETNS_LOCAL;
Eric Dumazet108bfa82009-05-28 22:35:10 +00001411 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
Eric Dumazet6b78f162012-09-13 21:25:33 +00001412
1413 dev->features |= GRE_FEATURES;
1414 dev->hw_features |= GRE_FEATURES;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415}
1416
1417static int ipgre_tunnel_init(struct net_device *dev)
1418{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419 struct ip_tunnel *tunnel;
1420 struct iphdr *iph;
Eric Dumazet60769a52012-09-27 02:48:50 +00001421 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422
Patrick McHardy2941a482006-01-08 22:05:26 -08001423 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424 iph = &tunnel->parms.iph;
1425
1426 tunnel->dev = dev;
1427 strcpy(tunnel->parms.name, dev->name);
1428
1429 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1430 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1431
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001434 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435 if (!iph->saddr)
1436 return -EINVAL;
1437 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001438 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439 }
1440#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001441 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001442 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443
Eric Dumazete985aad2010-09-27 03:57:11 +00001444 dev->tstats = alloc_percpu(struct pcpu_tstats);
1445 if (!dev->tstats)
1446 return -ENOMEM;
1447
Eric Dumazet60769a52012-09-27 02:48:50 +00001448 err = gro_cells_init(&tunnel->gro_cells, dev);
1449 if (err) {
1450 free_percpu(dev->tstats);
1451 return err;
1452 }
1453
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454 return 0;
1455}
1456
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001457static void ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001458{
Patrick McHardy2941a482006-01-08 22:05:26 -08001459 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460 struct iphdr *iph = &tunnel->parms.iph;
1461
1462 tunnel->dev = dev;
1463 strcpy(tunnel->parms.name, dev->name);
1464
1465 iph->version = 4;
1466 iph->protocol = IPPROTO_GRE;
1467 iph->ihl = 5;
1468 tunnel->hlen = sizeof(struct iphdr) + 4;
1469
1470 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471}
1472
1473
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001474static const struct gre_protocol ipgre_protocol = {
1475 .handler = ipgre_rcv,
1476 .err_handler = ipgre_err,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477};
1478
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001479static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001480{
1481 int prio;
1482
1483 for (prio = 0; prio < 4; prio++) {
1484 int h;
1485 for (h = 0; h < HASH_SIZE; h++) {
Eric Dumazet15078502010-09-15 11:07:53 +00001486 struct ip_tunnel *t;
1487
1488 t = rtnl_dereference(ign->tunnels[prio][h]);
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001489
1490 while (t != NULL) {
1491 unregister_netdevice_queue(t->dev, head);
Eric Dumazet15078502010-09-15 11:07:53 +00001492 t = rtnl_dereference(t->next);
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001493 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001494 }
1495 }
1496}
1497
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001498static int __net_init ipgre_init_net(struct net *net)
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001499{
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001500 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001501 int err;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001502
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001503 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1504 ipgre_tunnel_setup);
1505 if (!ign->fb_tunnel_dev) {
1506 err = -ENOMEM;
1507 goto err_alloc_dev;
1508 }
Alexey Dobriyanbe77e592008-11-23 17:26:26 -08001509 dev_net_set(ign->fb_tunnel_dev, net);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001510
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001511 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
Herbert Xuc19e6542008-10-09 11:59:55 -07001512 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001513
1514 if ((err = register_netdev(ign->fb_tunnel_dev)))
1515 goto err_reg_dev;
1516
Eric Dumazet3285ee32010-10-30 16:21:28 -07001517 rcu_assign_pointer(ign->tunnels_wc[0],
1518 netdev_priv(ign->fb_tunnel_dev));
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001519 return 0;
1520
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001521err_reg_dev:
Eric Dumazet3285ee32010-10-30 16:21:28 -07001522 ipgre_dev_free(ign->fb_tunnel_dev);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001523err_alloc_dev:
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001524 return err;
1525}
1526
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001527static void __net_exit ipgre_exit_net(struct net *net)
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001528{
1529 struct ipgre_net *ign;
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001530 LIST_HEAD(list);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001531
1532 ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001533 rtnl_lock();
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001534 ipgre_destroy_tunnels(ign, &list);
1535 unregister_netdevice_many(&list);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001536 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001537}
1538
1539static struct pernet_operations ipgre_net_ops = {
1540 .init = ipgre_init_net,
1541 .exit = ipgre_exit_net,
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001542 .id = &ipgre_net_id,
1543 .size = sizeof(struct ipgre_net),
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001544};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545
Herbert Xuc19e6542008-10-09 11:59:55 -07001546static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1547{
1548 __be16 flags;
1549
1550 if (!data)
1551 return 0;
1552
1553 flags = 0;
1554 if (data[IFLA_GRE_IFLAGS])
1555 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1556 if (data[IFLA_GRE_OFLAGS])
1557 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1558 if (flags & (GRE_VERSION|GRE_ROUTING))
1559 return -EINVAL;
1560
1561 return 0;
1562}
1563
Herbert Xue1a80002008-10-09 12:00:17 -07001564static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1565{
1566 __be32 daddr;
1567
1568 if (tb[IFLA_ADDRESS]) {
1569 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1570 return -EINVAL;
1571 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1572 return -EADDRNOTAVAIL;
1573 }
1574
1575 if (!data)
1576 goto out;
1577
1578 if (data[IFLA_GRE_REMOTE]) {
1579 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1580 if (!daddr)
1581 return -EINVAL;
1582 }
1583
1584out:
1585 return ipgre_tunnel_validate(tb, data);
1586}
1587
Herbert Xuc19e6542008-10-09 11:59:55 -07001588static void ipgre_netlink_parms(struct nlattr *data[],
1589 struct ip_tunnel_parm *parms)
1590{
Herbert Xu7bb82d922008-10-11 12:20:15 -07001591 memset(parms, 0, sizeof(*parms));
Herbert Xuc19e6542008-10-09 11:59:55 -07001592
1593 parms->iph.protocol = IPPROTO_GRE;
1594
1595 if (!data)
1596 return;
1597
1598 if (data[IFLA_GRE_LINK])
1599 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1600
1601 if (data[IFLA_GRE_IFLAGS])
1602 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1603
1604 if (data[IFLA_GRE_OFLAGS])
1605 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1606
1607 if (data[IFLA_GRE_IKEY])
1608 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1609
1610 if (data[IFLA_GRE_OKEY])
1611 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1612
1613 if (data[IFLA_GRE_LOCAL])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001614 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001615
1616 if (data[IFLA_GRE_REMOTE])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001617 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001618
1619 if (data[IFLA_GRE_TTL])
1620 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1621
1622 if (data[IFLA_GRE_TOS])
1623 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1624
1625 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1626 parms->iph.frag_off = htons(IP_DF);
1627}
1628
Herbert Xue1a80002008-10-09 12:00:17 -07001629static int ipgre_tap_init(struct net_device *dev)
1630{
1631 struct ip_tunnel *tunnel;
1632
1633 tunnel = netdev_priv(dev);
1634
1635 tunnel->dev = dev;
1636 strcpy(tunnel->parms.name, dev->name);
1637
1638 ipgre_tunnel_bind_dev(dev);
1639
Eric Dumazete985aad2010-09-27 03:57:11 +00001640 dev->tstats = alloc_percpu(struct pcpu_tstats);
1641 if (!dev->tstats)
1642 return -ENOMEM;
1643
Herbert Xue1a80002008-10-09 12:00:17 -07001644 return 0;
1645}
1646
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001647static const struct net_device_ops ipgre_tap_netdev_ops = {
1648 .ndo_init = ipgre_tap_init,
1649 .ndo_uninit = ipgre_tunnel_uninit,
1650 .ndo_start_xmit = ipgre_tunnel_xmit,
1651 .ndo_set_mac_address = eth_mac_addr,
1652 .ndo_validate_addr = eth_validate_addr,
1653 .ndo_change_mtu = ipgre_tunnel_change_mtu,
stephen hemminger87b6d212012-04-12 06:31:16 +00001654 .ndo_get_stats64 = ipgre_get_stats64,
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001655};
1656
Herbert Xue1a80002008-10-09 12:00:17 -07001657static void ipgre_tap_setup(struct net_device *dev)
1658{
1659
1660 ether_setup(dev);
1661
Herbert Xu2e9526b2009-10-30 05:51:48 +00001662 dev->netdev_ops = &ipgre_tap_netdev_ops;
Eric Dumazete985aad2010-09-27 03:57:11 +00001663 dev->destructor = ipgre_dev_free;
Herbert Xue1a80002008-10-09 12:00:17 -07001664
1665 dev->iflink = 0;
1666 dev->features |= NETIF_F_NETNS_LOCAL;
Pravin B Shelar68c33162013-02-14 14:02:41 +00001667
1668 dev->features |= GRE_FEATURES;
1669 dev->hw_features |= GRE_FEATURES;
Herbert Xue1a80002008-10-09 12:00:17 -07001670}
1671
Eric W. Biederman81adee42009-11-08 00:53:51 -08001672static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
Herbert Xuc19e6542008-10-09 11:59:55 -07001673 struct nlattr *data[])
1674{
1675 struct ip_tunnel *nt;
1676 struct net *net = dev_net(dev);
1677 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1678 int mtu;
1679 int err;
1680
1681 nt = netdev_priv(dev);
1682 ipgre_netlink_parms(data, &nt->parms);
1683
Herbert Xue1a80002008-10-09 12:00:17 -07001684 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
Herbert Xuc19e6542008-10-09 11:59:55 -07001685 return -EEXIST;
1686
Herbert Xue1a80002008-10-09 12:00:17 -07001687 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
Danny Kukawkaf2cedb62012-02-15 06:45:39 +00001688 eth_hw_addr_random(dev);
Herbert Xue1a80002008-10-09 12:00:17 -07001689
Herbert Xuc19e6542008-10-09 11:59:55 -07001690 mtu = ipgre_tunnel_bind_dev(dev);
1691 if (!tb[IFLA_MTU])
1692 dev->mtu = mtu;
1693
Eric Dumazetb790e012010-09-27 23:05:47 +00001694 /* Can use a lockless transmit, unless we generate output sequences */
1695 if (!(nt->parms.o_flags & GRE_SEQ))
1696 dev->features |= NETIF_F_LLTX;
1697
Herbert Xuc19e6542008-10-09 11:59:55 -07001698 err = register_netdevice(dev);
1699 if (err)
1700 goto out;
1701
1702 dev_hold(dev);
1703 ipgre_tunnel_link(ign, nt);
1704
1705out:
1706 return err;
1707}
1708
1709static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1710 struct nlattr *data[])
1711{
1712 struct ip_tunnel *t, *nt;
1713 struct net *net = dev_net(dev);
1714 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1715 struct ip_tunnel_parm p;
1716 int mtu;
1717
1718 if (dev == ign->fb_tunnel_dev)
1719 return -EINVAL;
1720
1721 nt = netdev_priv(dev);
1722 ipgre_netlink_parms(data, &p);
1723
1724 t = ipgre_tunnel_locate(net, &p, 0);
1725
1726 if (t) {
1727 if (t->dev != dev)
1728 return -EEXIST;
1729 } else {
Herbert Xuc19e6542008-10-09 11:59:55 -07001730 t = nt;
1731
Herbert Xu2e9526b2009-10-30 05:51:48 +00001732 if (dev->type != ARPHRD_ETHER) {
Eric Dumazet15078502010-09-15 11:07:53 +00001733 unsigned int nflags = 0;
Herbert Xuc19e6542008-10-09 11:59:55 -07001734
Herbert Xu2e9526b2009-10-30 05:51:48 +00001735 if (ipv4_is_multicast(p.iph.daddr))
1736 nflags = IFF_BROADCAST;
1737 else if (p.iph.daddr)
1738 nflags = IFF_POINTOPOINT;
1739
1740 if ((dev->flags ^ nflags) &
1741 (IFF_POINTOPOINT | IFF_BROADCAST))
1742 return -EINVAL;
1743 }
Herbert Xuc19e6542008-10-09 11:59:55 -07001744
1745 ipgre_tunnel_unlink(ign, t);
1746 t->parms.iph.saddr = p.iph.saddr;
1747 t->parms.iph.daddr = p.iph.daddr;
1748 t->parms.i_key = p.i_key;
Herbert Xu2e9526b2009-10-30 05:51:48 +00001749 if (dev->type != ARPHRD_ETHER) {
1750 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1751 memcpy(dev->broadcast, &p.iph.daddr, 4);
1752 }
Herbert Xuc19e6542008-10-09 11:59:55 -07001753 ipgre_tunnel_link(ign, t);
1754 netdev_state_change(dev);
1755 }
1756
1757 t->parms.o_key = p.o_key;
1758 t->parms.iph.ttl = p.iph.ttl;
1759 t->parms.iph.tos = p.iph.tos;
1760 t->parms.iph.frag_off = p.iph.frag_off;
1761
1762 if (t->parms.link != p.link) {
1763 t->parms.link = p.link;
1764 mtu = ipgre_tunnel_bind_dev(dev);
1765 if (!tb[IFLA_MTU])
1766 dev->mtu = mtu;
1767 netdev_state_change(dev);
1768 }
1769
1770 return 0;
1771}
1772
1773static size_t ipgre_get_size(const struct net_device *dev)
1774{
1775 return
1776 /* IFLA_GRE_LINK */
1777 nla_total_size(4) +
1778 /* IFLA_GRE_IFLAGS */
1779 nla_total_size(2) +
1780 /* IFLA_GRE_OFLAGS */
1781 nla_total_size(2) +
1782 /* IFLA_GRE_IKEY */
1783 nla_total_size(4) +
1784 /* IFLA_GRE_OKEY */
1785 nla_total_size(4) +
1786 /* IFLA_GRE_LOCAL */
1787 nla_total_size(4) +
1788 /* IFLA_GRE_REMOTE */
1789 nla_total_size(4) +
1790 /* IFLA_GRE_TTL */
1791 nla_total_size(1) +
1792 /* IFLA_GRE_TOS */
1793 nla_total_size(1) +
1794 /* IFLA_GRE_PMTUDISC */
1795 nla_total_size(1) +
1796 0;
1797}
1798
1799static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1800{
1801 struct ip_tunnel *t = netdev_priv(dev);
1802 struct ip_tunnel_parm *p = &t->parms;
1803
David S. Millerf3756b72012-04-01 20:39:02 -04001804 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1805 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
1806 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
1807 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1808 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1809 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1810 nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1811 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1812 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1813 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1814 !!(p->iph.frag_off & htons(IP_DF))))
1815 goto nla_put_failure;
Herbert Xuc19e6542008-10-09 11:59:55 -07001816 return 0;
1817
1818nla_put_failure:
1819 return -EMSGSIZE;
1820}
1821
1822static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1823 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1824 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1825 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1826 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1827 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001828 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1829 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
Herbert Xuc19e6542008-10-09 11:59:55 -07001830 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1831 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1832 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1833};
1834
1835static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1836 .kind = "gre",
1837 .maxtype = IFLA_GRE_MAX,
1838 .policy = ipgre_policy,
1839 .priv_size = sizeof(struct ip_tunnel),
1840 .setup = ipgre_tunnel_setup,
1841 .validate = ipgre_tunnel_validate,
1842 .newlink = ipgre_newlink,
1843 .changelink = ipgre_changelink,
1844 .get_size = ipgre_get_size,
1845 .fill_info = ipgre_fill_info,
1846};
1847
Herbert Xue1a80002008-10-09 12:00:17 -07001848static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1849 .kind = "gretap",
1850 .maxtype = IFLA_GRE_MAX,
1851 .policy = ipgre_policy,
1852 .priv_size = sizeof(struct ip_tunnel),
1853 .setup = ipgre_tap_setup,
1854 .validate = ipgre_tap_validate,
1855 .newlink = ipgre_newlink,
1856 .changelink = ipgre_changelink,
1857 .get_size = ipgre_get_size,
1858 .fill_info = ipgre_fill_info,
1859};
1860
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861/*
1862 * And now the modules code and kernel interface.
1863 */
1864
1865static int __init ipgre_init(void)
1866{
1867 int err;
1868
Joe Perches058bd4d2012-03-11 18:36:11 +00001869 pr_info("GRE over IPv4 tunneling driver\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001871 err = register_pernet_device(&ipgre_net_ops);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001872 if (err < 0)
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001873 return err;
1874
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001875 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001876 if (err < 0) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001877 pr_info("%s: can't add protocol\n", __func__);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001878 goto add_proto_failed;
1879 }
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001880
Herbert Xuc19e6542008-10-09 11:59:55 -07001881 err = rtnl_link_register(&ipgre_link_ops);
1882 if (err < 0)
1883 goto rtnl_link_failed;
1884
Herbert Xue1a80002008-10-09 12:00:17 -07001885 err = rtnl_link_register(&ipgre_tap_ops);
1886 if (err < 0)
1887 goto tap_ops_failed;
1888
Herbert Xuc19e6542008-10-09 11:59:55 -07001889out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001890 return err;
Herbert Xuc19e6542008-10-09 11:59:55 -07001891
Herbert Xue1a80002008-10-09 12:00:17 -07001892tap_ops_failed:
1893 rtnl_link_unregister(&ipgre_link_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001894rtnl_link_failed:
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001895 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001896add_proto_failed:
1897 unregister_pernet_device(&ipgre_net_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001898 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001899}
1900
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001901static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902{
Herbert Xue1a80002008-10-09 12:00:17 -07001903 rtnl_link_unregister(&ipgre_tap_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001904 rtnl_link_unregister(&ipgre_link_ops);
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001905 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
Joe Perches058bd4d2012-03-11 18:36:11 +00001906 pr_info("%s: can't remove protocol\n", __func__);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001907 unregister_pernet_device(&ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908}
1909
1910module_init(ipgre_init);
1911module_exit(ipgre_fini);
1912MODULE_LICENSE("GPL");
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001913MODULE_ALIAS_RTNL_LINK("gre");
1914MODULE_ALIAS_RTNL_LINK("gretap");
Vasiliy Kulikov8909c9ad2011-03-02 00:33:13 +03001915MODULE_ALIAS_NETDEV("gre0");