blob: fef9dbf3af00c91e1d3391865a731c47b289ac7d [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Mark Evans, <evansmp@uhura.aston.ac.uk>
11 * Corey Minyard <wf-rch!minyard@relay.EU.net>
12 * Florian La Roche, <flla@stud.uni-sb.de>
13 * Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
14 * Linus Torvalds, <torvalds@cs.helsinki.fi>
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Matthew Dillon, <dillon@apollo.west.oic.com>
17 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
18 * Jorge Cwik, <jorge@laser.satlink.net>
19 */
20
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/mm.h>
22#include <linux/module.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090023#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/sysctl.h>
25#include <linux/workqueue.h>
26#include <net/tcp.h>
27#include <net/inet_common.h>
28#include <net/xfrm.h>
29
David S. Millere994b7c2009-11-21 11:22:25 -080030int sysctl_tcp_syncookies __read_mostly = 1;
Glenn Griffinc6aefaf2008-02-07 21:49:26 -080031EXPORT_SYMBOL(sysctl_tcp_syncookies);
32
Brian Haleyab32ea52006-09-22 14:15:41 -070033int sysctl_tcp_abort_on_overflow __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -070034
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -070035struct inet_timewait_death_row tcp_death_row = {
36 .sysctl_max_tw_buckets = NR_FILE * 2,
37 .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
Ingo Molnare4d91912006-07-03 00:24:34 -070038 .death_lock = __SPIN_LOCK_UNLOCKED(tcp_death_row.death_lock),
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -070039 .hashinfo = &tcp_hashinfo,
40 .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
41 (unsigned long)&tcp_death_row),
42 .twkill_work = __WORK_INITIALIZER(tcp_death_row.twkill_work,
David Howells65f27f32006-11-22 14:55:48 +000043 inet_twdr_twkill_work),
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -070044/* Short-time timewait calendar */
45
46 .twcal_hand = -1,
47 .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
48 (unsigned long)&tcp_death_row),
49};
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -070050EXPORT_SYMBOL_GPL(tcp_death_row);
51
David S. Miller3f419d22010-11-29 13:37:14 -080052/* VJ's idea. Save last timestamp seen from this destination
53 * and hold it at least for normal timewait interval to use for duplicate
54 * segment detection in subsequent connections, before they enter synchronized
55 * state.
56 */
57
Eric Dumazeta2a385d2012-05-16 23:15:34 +000058static bool tcp_remember_stamp(struct sock *sk)
David S. Miller3f419d22010-11-29 13:37:14 -080059{
60 const struct inet_connection_sock *icsk = inet_csk(sk);
61 struct tcp_sock *tp = tcp_sk(sk);
62 struct inet_peer *peer;
David S. Miller3f419d22010-11-29 13:37:14 -080063
David S. Miller4670fd82012-06-09 01:25:47 -070064 peer = icsk->icsk_af_ops->get_peer(sk);
David S. Miller3f419d22010-11-29 13:37:14 -080065 if (peer) {
66 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
67 ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
68 peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
69 peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
70 peer->tcp_ts = tp->rx_opt.ts_recent;
71 }
Eric Dumazeta2a385d2012-05-16 23:15:34 +000072 return true;
David S. Miller3f419d22010-11-29 13:37:14 -080073 }
74
Eric Dumazeta2a385d2012-05-16 23:15:34 +000075 return false;
David S. Miller3f419d22010-11-29 13:37:14 -080076}
77
Eric Dumazeta2a385d2012-05-16 23:15:34 +000078static bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
David S. Millerccb7c412010-12-01 18:09:13 -080079{
80 struct sock *sk = (struct sock *) tw;
81 struct inet_peer *peer;
82
83 peer = twsk_getpeer(sk);
84 if (peer) {
85 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
86
87 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
88 ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
89 peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
90 peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
91 peer->tcp_ts = tcptw->tw_ts_recent;
92 }
93 inet_putpeer(peer);
Eric Dumazeta2a385d2012-05-16 23:15:34 +000094 return true;
David S. Millerccb7c412010-12-01 18:09:13 -080095 }
Eric Dumazeta2a385d2012-05-16 23:15:34 +000096 return false;
David S. Millerccb7c412010-12-01 18:09:13 -080097}
98
Eric Dumazeta2a385d2012-05-16 23:15:34 +000099static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100{
101 if (seq == s_win)
Eric Dumazeta2a385d2012-05-16 23:15:34 +0000102 return true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103 if (after(end_seq, s_win) && before(seq, e_win))
Eric Dumazeta2a385d2012-05-16 23:15:34 +0000104 return true;
Eric Dumazeta02cec22010-09-22 20:43:57 +0000105 return seq == e_win && seq == end_seq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106}
107
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900108/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109 * * Main purpose of TIME-WAIT state is to close connection gracefully,
110 * when one of ends sits in LAST-ACK or CLOSING retransmitting FIN
111 * (and, probably, tail of data) and one or more our ACKs are lost.
112 * * What is TIME-WAIT timeout? It is associated with maximal packet
113 * lifetime in the internet, which results in wrong conclusion, that
114 * it is set to catch "old duplicate segments" wandering out of their path.
115 * It is not quite correct. This timeout is calculated so that it exceeds
116 * maximal retransmission timeout enough to allow to lose one (or more)
117 * segments sent by peer and our ACKs. This time may be calculated from RTO.
118 * * When TIME-WAIT socket receives RST, it means that another end
119 * finally closed and we are allowed to kill TIME-WAIT too.
120 * * Second purpose of TIME-WAIT is catching old duplicate segments.
121 * Well, certainly it is pure paranoia, but if we load TIME-WAIT
122 * with this semantics, we MUST NOT kill TIME-WAIT state with RSTs.
123 * * If we invented some more clever way to catch duplicates
124 * (f.e. based on PAWS), we could truncate TIME-WAIT to several RTOs.
125 *
126 * The algorithm below is based on FORMAL INTERPRETATION of RFCs.
127 * When you compare it to RFCs, please, read section SEGMENT ARRIVES
128 * from the very beginning.
129 *
130 * NOTE. With recycling (and later with fin-wait-2) TW bucket
131 * is _not_ stateless. It means, that strictly speaking we must
132 * spinlock it. I do not want! Well, probability of misbehaviour
133 * is ridiculously low and, seems, we could use some mb() tricks
134 * to avoid misread sequence numbers, states etc. --ANK
135 */
136enum tcp_tw_status
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700137tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
138 const struct tcphdr *th)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139{
140 struct tcp_options_received tmp_opt;
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400141 const u8 *hash_location;
William Allen Simpson4957faa2009-12-02 18:25:27 +0000142 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
Eric Dumazeta2a385d2012-05-16 23:15:34 +0000143 bool paws_reject = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144
David S. Millerbb5b7c12009-12-15 20:56:42 -0800145 tmp_opt.saw_tstamp = 0;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700146 if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
David S. Millerbb5b7c12009-12-15 20:56:42 -0800147 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148
149 if (tmp_opt.saw_tstamp) {
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700150 tmp_opt.ts_recent = tcptw->tw_ts_recent;
151 tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
Ilpo Järvinenc887e6d2009-03-14 14:23:03 +0000152 paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 }
154 }
155
156 if (tw->tw_substate == TCP_FIN_WAIT2) {
157 /* Just repeat all the checks of tcp_rcv_state_process() */
158
159 /* Out of window, send ACK */
160 if (paws_reject ||
161 !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700162 tcptw->tw_rcv_nxt,
163 tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 return TCP_TW_ACK;
165
166 if (th->rst)
167 goto kill;
168
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700169 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170 goto kill_with_rst;
171
172 /* Dup ACK? */
Wei Yongjun1ac530b2009-06-24 22:29:31 +0000173 if (!th->ack ||
174 !after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700176 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177 return TCP_TW_SUCCESS;
178 }
179
180 /* New data or FIN. If new data arrive after half-duplex close,
181 * reset.
182 */
183 if (!th->fin ||
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700184 TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185kill_with_rst:
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700186 inet_twsk_deschedule(tw, &tcp_death_row);
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700187 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188 return TCP_TW_RST;
189 }
190
191 /* FIN arrived, enter true time-wait state. */
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700192 tw->tw_substate = TCP_TIME_WAIT;
193 tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 if (tmp_opt.saw_tstamp) {
James Morris9d729f72007-03-04 16:12:44 -0800195 tcptw->tw_ts_recent_stamp = get_seconds();
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700196 tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 }
198
David S. Millerccb7c412010-12-01 18:09:13 -0800199 if (tcp_death_row.sysctl_tw_recycle &&
200 tcptw->tw_ts_recent_stamp &&
201 tcp_tw_remember_stamp(tw))
Arnaldo Carvalho de Melo696ab2d2005-08-09 20:45:03 -0700202 inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout,
203 TCP_TIMEWAIT_LEN);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204 else
Arnaldo Carvalho de Melo696ab2d2005-08-09 20:45:03 -0700205 inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
206 TCP_TIMEWAIT_LEN);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 return TCP_TW_ACK;
208 }
209
210 /*
211 * Now real TIME-WAIT state.
212 *
213 * RFC 1122:
214 * "When a connection is [...] on TIME-WAIT state [...]
215 * [a TCP] MAY accept a new SYN from the remote TCP to
216 * reopen the connection directly, if it:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900217 *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218 * (1) assigns its initial sequence number for the new
219 * connection to be larger than the largest sequence
220 * number it used on the previous connection incarnation,
221 * and
222 *
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900223 * (2) returns to TIME-WAIT state if the SYN turns out
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224 * to be an old duplicate".
225 */
226
227 if (!paws_reject &&
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700228 (TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) {
230 /* In window segment, it may be only reset or bare ack. */
231
232 if (th->rst) {
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -0800233 /* This is TIME_WAIT assassination, in two flavors.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 * Oh well... nobody has a sufficient solution to this
235 * protocol bug yet.
236 */
237 if (sysctl_tcp_rfc1337 == 0) {
238kill:
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700239 inet_twsk_deschedule(tw, &tcp_death_row);
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700240 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241 return TCP_TW_SUCCESS;
242 }
243 }
Arnaldo Carvalho de Melo696ab2d2005-08-09 20:45:03 -0700244 inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
245 TCP_TIMEWAIT_LEN);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246
247 if (tmp_opt.saw_tstamp) {
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700248 tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
James Morris9d729f72007-03-04 16:12:44 -0800249 tcptw->tw_ts_recent_stamp = get_seconds();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250 }
251
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700252 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 return TCP_TW_SUCCESS;
254 }
255
256 /* Out of window segment.
257
258 All the segments are ACKed immediately.
259
260 The only exception is new SYN. We accept it, if it is
261 not old duplicate and we are not in danger to be killed
262 by delayed old duplicates. RFC check is that it has
263 newer sequence number works at rates <40Mbit/sec.
264 However, if paws works, it is reliable AND even more,
265 we even may relax silly seq space cutoff.
266
267 RED-PEN: we violate main RFC requirement, if this SYN will appear
268 old duplicate (i.e. we receive RST in reply to SYN-ACK),
269 we must return socket to time-wait state. It is not good,
270 but not fatal yet.
271 */
272
273 if (th->syn && !th->rst && !th->ack && !paws_reject &&
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700274 (after(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt) ||
275 (tmp_opt.saw_tstamp &&
276 (s32)(tcptw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) {
277 u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 if (isn == 0)
279 isn++;
280 TCP_SKB_CB(skb)->when = isn;
281 return TCP_TW_SYN;
282 }
283
284 if (paws_reject)
Pavel Emelyanovde0744a2008-07-16 20:31:16 -0700285 NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286
Stephen Hemminger2de979b2007-03-08 20:45:19 -0800287 if (!th->rst) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288 /* In this case we must reset the TIMEWAIT timer.
289 *
290 * If it is ACKless SYN it may be both old duplicate
291 * and new good SYN with random sequence number <rcv_nxt.
292 * Do not reschedule in the last case.
293 */
294 if (paws_reject || th->ack)
Arnaldo Carvalho de Melo696ab2d2005-08-09 20:45:03 -0700295 inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
296 TCP_TIMEWAIT_LEN);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297
298 /* Send ACK. Note, we do not put the bucket,
299 * it will be released by caller.
300 */
301 return TCP_TW_ACK;
302 }
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700303 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304 return TCP_TW_SUCCESS;
305}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000306EXPORT_SYMBOL(tcp_timewait_state_process);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900308/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309 * Move a socket to time-wait or dead fin-wait-2 state.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900310 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311void tcp_time_wait(struct sock *sk, int state, int timeo)
312{
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700313 struct inet_timewait_sock *tw = NULL;
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -0800314 const struct inet_connection_sock *icsk = inet_csk(sk);
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700315 const struct tcp_sock *tp = tcp_sk(sk);
Eric Dumazeta2a385d2012-05-16 23:15:34 +0000316 bool recycle_ok = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700318 if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
David S. Miller3f419d22010-11-29 13:37:14 -0800319 recycle_ok = tcp_remember_stamp(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700321 if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
Arnaldo Carvalho de Meloc6762702005-08-09 20:09:59 -0700322 tw = inet_twsk_alloc(sk, state);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700324 if (tw != NULL) {
325 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700326 const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700327
KOVACS Krisztian58af19e2011-10-18 10:17:35 +0000328 tw->tw_transparent = inet_sk(sk)->transparent;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329 tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700330 tcptw->tw_rcv_nxt = tp->rcv_nxt;
331 tcptw->tw_snd_nxt = tp->snd_nxt;
332 tcptw->tw_rcv_wnd = tcp_receive_window(tp);
333 tcptw->tw_ts_recent = tp->rx_opt.ts_recent;
334 tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000336#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 if (tw->tw_family == PF_INET6) {
338 struct ipv6_pinfo *np = inet6_sk(sk);
Arnaldo Carvalho de Melo0fa1a532005-12-13 23:23:09 -0800339 struct inet6_timewait_sock *tw6;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340
Arnaldo Carvalho de Melo0fa1a532005-12-13 23:23:09 -0800341 tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
342 tw6 = inet6_twsk((struct sock *)tw);
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000343 tw6->tw_v6_daddr = np->daddr;
344 tw6->tw_v6_rcv_saddr = np->rcv_saddr;
Eric Dumazetb903d322011-10-27 00:44:35 -0400345 tw->tw_tclass = np->tclass;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700346 tw->tw_ipv6only = np->ipv6only;
Arnaldo Carvalho de Meloc6762702005-08-09 20:09:59 -0700347 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348#endif
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800349
350#ifdef CONFIG_TCP_MD5SIG
351 /*
352 * The timewait bucket does not have the key DB from the
353 * sock structure. We just make a quick copy of the
354 * md5 key being used (if indeed we are using one)
355 * so the timewait ack generating code has the key.
356 */
357 do {
358 struct tcp_md5sig_key *key;
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000359 tcptw->tw_md5_key = NULL;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800360 key = tp->af_specific->md5_lookup(sk, sk);
361 if (key != NULL) {
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000362 tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
363 if (tcptw->tw_md5_key && tcp_alloc_md5sig_pool(sk) == NULL)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800364 BUG();
365 }
Stephen Hemminger2de979b2007-03-08 20:45:19 -0800366 } while (0);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800367#endif
368
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 /* Linkage updates. */
Arnaldo Carvalho de Meloe48c4142005-08-09 20:09:46 -0700370 __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371
372 /* Get the TIME_WAIT timeout firing. */
373 if (timeo < rto)
374 timeo = rto;
375
376 if (recycle_ok) {
377 tw->tw_timeout = rto;
378 } else {
379 tw->tw_timeout = TCP_TIMEWAIT_LEN;
380 if (state == TCP_TIME_WAIT)
381 timeo = TCP_TIMEWAIT_LEN;
382 }
383
Arnaldo Carvalho de Melo696ab2d2005-08-09 20:45:03 -0700384 inet_twsk_schedule(tw, &tcp_death_row, timeo,
385 TCP_TIMEWAIT_LEN);
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700386 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 } else {
388 /* Sorry, if we're out of memory, just CLOSE this
389 * socket up. We've got bigger problems than
390 * non-graceful socket closings.
391 */
Tom Herbert67631512010-12-08 12:16:33 -0800392 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 }
394
395 tcp_update_metrics(sk);
396 tcp_done(sk);
397}
398
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800399void tcp_twsk_destructor(struct sock *sk)
400{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800401#ifdef CONFIG_TCP_MD5SIG
David S. Millera9286302006-11-14 19:53:22 -0800402 struct tcp_timewait_sock *twsk = tcp_twsk(sk);
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000403 if (twsk->tw_md5_key) {
Robert Varga657e9642009-09-15 23:49:21 -0700404 tcp_free_md5sig_pool();
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000405 kfree_rcu(twsk->tw_md5_key, rcu);
406 }
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800407#endif
408}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800409EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
410
Ilpo Järvinenbdf1ee52007-05-27 02:04:16 -0700411static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
412 struct request_sock *req)
413{
414 tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0;
415}
416
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417/* This is not only more efficient than what we used to do, it eliminates
418 * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
419 *
420 * Actually, we could lots of memory writes here. tp of listening
421 * socket contains all necessary default parameters.
422 */
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700423struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424{
Eric Dumazete56c57d2011-11-08 17:07:07 -0500425 struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -0700427 if (newsk != NULL) {
Arnaldo Carvalho de Melo9f1d2602005-08-09 20:11:24 -0700428 const struct inet_request_sock *ireq = inet_rsk(req);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700429 struct tcp_request_sock *treq = tcp_rsk(req);
Arnaldo Carvalho de Meloa9948a72007-02-28 11:05:56 -0800430 struct inet_connection_sock *newicsk = inet_csk(newsk);
William Allen Simpson435cf552009-12-02 18:17:05 +0000431 struct tcp_sock *newtp = tcp_sk(newsk);
432 struct tcp_sock *oldtp = tcp_sk(sk);
433 struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
434
435 /* TCP Cookie Transactions require space for the cookie pair,
436 * as it differs for each connection. There is no need to
437 * copy any s_data_payload stored at the original socket.
438 * Failure will prevent resuming the connection.
439 *
440 * Presumed copied, in order of appearance:
441 * cookie_in_always, cookie_out_never
442 */
443 if (oldcvp != NULL) {
444 struct tcp_cookie_values *newcvp =
445 kzalloc(sizeof(*newtp->cookie_values),
446 GFP_ATOMIC);
447
448 if (newcvp != NULL) {
449 kref_init(&newcvp->kref);
450 newcvp->cookie_desired =
451 oldcvp->cookie_desired;
452 newtp->cookie_values = newcvp;
453 } else {
454 /* Not Yet Implemented */
455 newtp->cookie_values = NULL;
456 }
457 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459 /* Now setup tcp_sock */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 newtp->pred_flags = 0;
William Allen Simpson435cf552009-12-02 18:17:05 +0000461
462 newtp->rcv_wup = newtp->copied_seq =
463 newtp->rcv_nxt = treq->rcv_isn + 1;
464
465 newtp->snd_sml = newtp->snd_una =
466 newtp->snd_nxt = newtp->snd_up =
467 treq->snt_isn + 1 + tcp_s_data_size(oldtp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468
469 tcp_prequeue_init(newtp);
470
Hantzis Fotisee7537b2009-03-02 22:42:02 -0800471 tcp_init_wl(newtp, treq->rcv_isn);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473 newtp->srtt = 0;
474 newtp->mdev = TCP_TIMEOUT_INIT;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700475 newicsk->icsk_rto = TCP_TIMEOUT_INIT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476
477 newtp->packets_out = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478 newtp->retrans_out = 0;
479 newtp->sacked_out = 0;
480 newtp->fackets_out = 0;
Ilpo Järvinen0b6a05c2009-09-15 01:30:10 -0700481 newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
Yuchung Chengeed530b2012-05-02 13:30:03 +0000482 tcp_enable_early_retrans(newtp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483
484 /* So many TCP implementations out there (incorrectly) count the
485 * initial SYN frame in their delayed-ACK and congestion control
486 * algorithms that we must have the following bandaid to talk
487 * efficiently to them. -DaveM
488 */
Jerry Chu9ad7c042011-06-08 11:08:38 +0000489 newtp->snd_cwnd = TCP_INIT_CWND;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490 newtp->snd_cwnd_cnt = 0;
Stephen Hemminger9772efb2005-11-10 17:09:53 -0800491 newtp->bytes_acked = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492
493 newtp->frto_counter = 0;
494 newtp->frto_highmark = 0;
495
Eric Dumazetd8a6e652011-11-30 01:02:41 +0000496 if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops &&
497 !try_module_get(newicsk->icsk_ca_ops->owner))
498 newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700499
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -0300500 tcp_set_ca_state(newsk, TCP_CA_Open);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501 tcp_init_xmit_timers(newsk);
502 skb_queue_head_init(&newtp->out_of_order_queue);
William Allen Simpson435cf552009-12-02 18:17:05 +0000503 newtp->write_seq = newtp->pushed_seq =
504 treq->snt_isn + 1 + tcp_s_data_size(oldtp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505
506 newtp->rx_opt.saw_tstamp = 0;
507
508 newtp->rx_opt.dsack = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509 newtp->rx_opt.num_sacks = 0;
Ilpo Järvinencabeccb2009-02-28 04:44:38 +0000510
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511 newtp->urg_data = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513 if (sock_flag(newsk, SOCK_KEEPOPEN))
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700514 inet_csk_reset_keepalive_timer(newsk,
515 keepalive_time_when(newtp));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700517 newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
Stephen Hemminger2de979b2007-03-08 20:45:19 -0800518 if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 if (sysctl_tcp_fack)
Ilpo Järvinene60402d2007-08-09 15:14:46 +0300520 tcp_enable_fack(newtp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521 }
522 newtp->window_clamp = req->window_clamp;
523 newtp->rcv_ssthresh = req->rcv_wnd;
524 newtp->rcv_wnd = req->rcv_wnd;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700525 newtp->rx_opt.wscale_ok = ireq->wscale_ok;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526 if (newtp->rx_opt.wscale_ok) {
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700527 newtp->rx_opt.snd_wscale = ireq->snd_wscale;
528 newtp->rx_opt.rcv_wscale = ireq->rcv_wscale;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529 } else {
530 newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
531 newtp->window_clamp = min(newtp->window_clamp, 65535U);
532 }
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700533 newtp->snd_wnd = (ntohs(tcp_hdr(skb)->window) <<
534 newtp->rx_opt.snd_wscale);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535 newtp->max_window = newtp->snd_wnd;
536
537 if (newtp->rx_opt.tstamp_ok) {
538 newtp->rx_opt.ts_recent = req->ts_recent;
James Morris9d729f72007-03-04 16:12:44 -0800539 newtp->rx_opt.ts_recent_stamp = get_seconds();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540 newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
541 } else {
542 newtp->rx_opt.ts_recent_stamp = 0;
543 newtp->tcp_header_len = sizeof(struct tcphdr);
544 }
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800545#ifdef CONFIG_TCP_MD5SIG
546 newtp->md5sig_info = NULL; /*XXX*/
547 if (newtp->af_specific->md5_lookup(sk, newsk))
548 newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
549#endif
William Allen Simpsonbee7ca92009-11-10 09:51:18 +0000550 if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700551 newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 newtp->rx_opt.mss_clamp = req->mss;
553 TCP_ECN_openreq_child(newtp, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554
Pavel Emelyanov63231bddf2008-07-16 20:22:25 -0700555 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700556 }
557 return newsk;
558}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000559EXPORT_SYMBOL(tcp_create_openreq_child);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900561/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 * Process an incoming packet for SYN_RECV sockets represented
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700563 * as a request_sock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700564 */
565
Jianjun Kong5a5f3a82008-11-03 00:24:34 -0800566struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700567 struct request_sock *req,
568 struct request_sock **prev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569{
William Allen Simpson4957faa2009-12-02 18:25:27 +0000570 struct tcp_options_received tmp_opt;
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400571 const u8 *hash_location;
William Allen Simpson4957faa2009-12-02 18:25:27 +0000572 struct sock *child;
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700573 const struct tcphdr *th = tcp_hdr(skb);
Al Viro714e85b2006-11-14 20:51:49 -0800574 __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
Eric Dumazeta2a385d2012-05-16 23:15:34 +0000575 bool paws_reject = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576
David S. Millerbb5b7c12009-12-15 20:56:42 -0800577 tmp_opt.saw_tstamp = 0;
578 if (th->doff > (sizeof(struct tcphdr)>>2)) {
579 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580
581 if (tmp_opt.saw_tstamp) {
582 tmp_opt.ts_recent = req->ts_recent;
583 /* We do not store true stamp, but it is not required,
584 * it can be estimated (approximately)
585 * from another data.
586 */
James Morris9d729f72007-03-04 16:12:44 -0800587 tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
Ilpo Järvinenc887e6d2009-03-14 14:23:03 +0000588 paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 }
590 }
591
592 /* Check for pure retransmitted SYN. */
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700593 if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 flg == TCP_FLAG_SYN &&
595 !paws_reject) {
596 /*
597 * RFC793 draws (Incorrectly! It was fixed in RFC1122)
598 * this case on figure 6 and figure 8, but formal
599 * protocol description says NOTHING.
600 * To be more exact, it says that we should send ACK,
601 * because this segment (at least, if it has no data)
602 * is out of window.
603 *
604 * CONCLUSION: RFC793 (even with RFC1122) DOES NOT
605 * describe SYN-RECV state. All the description
606 * is wrong, we cannot believe to it and should
607 * rely only on common sense and implementation
608 * experience.
609 *
610 * Enforce "SYN-ACK" according to figure 8, figure 6
611 * of RFC793, fixed by RFC1122.
612 */
William Allen Simpsone6b4d112009-12-02 18:07:39 +0000613 req->rsk_ops->rtx_syn_ack(sk, req, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614 return NULL;
615 }
616
617 /* Further reproduces section "SEGMENT ARRIVES"
618 for state SYN-RECEIVED of RFC793.
619 It is broken, however, it does not work only
620 when SYNs are crossed.
621
622 You would think that SYN crossing is impossible here, since
623 we should have a SYN_SENT socket (from connect()) on our end,
624 but this is not true if the crossed SYNs were sent to both
625 ends by a malicious third party. We must defend against this,
626 and to do that we first verify the ACK (as per RFC793, page
627 36) and reset if it is invalid. Is this a true full defense?
628 To convince ourselves, let us consider a way in which the ACK
629 test can still pass in this 'malicious crossed SYNs' case.
630 Malicious sender sends identical SYNs (and thus identical sequence
631 numbers) to both A and B:
632
633 A: gets SYN, seq=7
634 B: gets SYN, seq=7
635
636 By our good fortune, both A and B select the same initial
637 send sequence number of seven :-)
638
639 A: sends SYN|ACK, seq=7, ack_seq=8
640 B: sends SYN|ACK, seq=7, ack_seq=8
641
642 So we are now A eating this SYN|ACK, ACK test passes. So
643 does sequence test, SYN is truncated, and thus we consider
644 it a bare ACK.
645
David S. Millerec0a1962008-06-12 16:31:35 -0700646 If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
647 bare ACK. Otherwise, we create an established connection. Both
648 ends (listening sockets) accept the new incoming connection and try
649 to talk to each other. 8-)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650
651 Note: This case is both harmless, and rare. Possibility is about the
652 same as us discovering intelligent life on another plant tomorrow.
653
654 But generally, we should (RFC lies!) to accept ACK
655 from SYNACK both here and in tcp_rcv_state_process().
656 tcp_rcv_state_process() does not, hence, we do not too.
657
658 Note that the case is absolutely generic:
659 we cannot optimize anything here without
660 violating protocol. All the checks must be made
661 before attempt to create socket.
662 */
663
664 /* RFC793 page 36: "If the connection is in any non-synchronized state ...
665 * and the incoming segment acknowledges something not yet
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -0800666 * sent (the segment carries an unacceptable ACK) ...
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 * a reset is sent."
668 *
669 * Invalid ACK: reset will be sent by listening socket
670 */
671 if ((flg & TCP_FLAG_ACK) &&
William Allen Simpson435cf552009-12-02 18:17:05 +0000672 (TCP_SKB_CB(skb)->ack_seq !=
673 tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk))))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 return sk;
675
676 /* Also, it would be not so bad idea to check rcv_tsecr, which
677 * is essentially ACK extension and too early or too late values
678 * should cause reset in unsynchronized states.
679 */
680
681 /* RFC793: "first check sequence number". */
682
683 if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700684 tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 /* Out of window: send ACK and drop. */
686 if (!(flg & TCP_FLAG_RST))
Gui Jianfeng6edafaa2008-08-06 23:50:04 -0700687 req->rsk_ops->send_ack(sk, skb, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688 if (paws_reject)
Pavel Emelyanovde0744a2008-07-16 20:31:16 -0700689 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690 return NULL;
691 }
692
693 /* In sequence, PAWS is OK. */
694
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700695 if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1))
Adam Langley2aaab9a2008-08-07 20:27:45 -0700696 req->ts_recent = tmp_opt.rcv_tsval;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697
Adam Langley2aaab9a2008-08-07 20:27:45 -0700698 if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
699 /* Truncate SYN, it is out of window starting
700 at tcp_rsk(req)->rcv_isn + 1. */
701 flg &= ~TCP_FLAG_SYN;
702 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700703
Adam Langley2aaab9a2008-08-07 20:27:45 -0700704 /* RFC793: "second check the RST bit" and
705 * "fourth, check the SYN bit"
706 */
707 if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
708 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
709 goto embryonic_reset;
710 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700711
Adam Langley2aaab9a2008-08-07 20:27:45 -0700712 /* ACK sequence verified above, just make sure ACK is
713 * set. If ACK not set, just silently drop the packet.
714 */
715 if (!(flg & TCP_FLAG_ACK))
716 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717
Julian Anastasovd1b99ba2009-10-19 10:01:56 +0000718 /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
719 if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
Adam Langley2aaab9a2008-08-07 20:27:45 -0700720 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
721 inet_rsk(req)->acked = 1;
Eric Dumazet907cdda2010-03-19 05:37:18 +0000722 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
Adam Langley2aaab9a2008-08-07 20:27:45 -0700723 return NULL;
724 }
Jerry Chu9ad7c042011-06-08 11:08:38 +0000725 if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
726 tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
727 else if (req->retrans) /* don't take RTT sample if retrans && ~TS */
728 tcp_rsk(req)->snt_synack = 0;
David S. Millerec0a1962008-06-12 16:31:35 -0700729
Adam Langley2aaab9a2008-08-07 20:27:45 -0700730 /* OK, ACK is valid, create big socket and
731 * feed this segment to it. It will repeat all
732 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
733 * ESTABLISHED STATE. If it will be dropped after
734 * socket is created, wait for troubles.
735 */
736 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
737 if (child == NULL)
738 goto listen_overflow;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739
Adam Langley2aaab9a2008-08-07 20:27:45 -0700740 inet_csk_reqsk_queue_unlink(sk, req, prev);
741 inet_csk_reqsk_queue_removed(sk, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742
Adam Langley2aaab9a2008-08-07 20:27:45 -0700743 inet_csk_reqsk_queue_add(sk, req, child);
744 return child;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745
Adam Langley2aaab9a2008-08-07 20:27:45 -0700746listen_overflow:
747 if (!sysctl_tcp_abort_on_overflow) {
748 inet_rsk(req)->acked = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749 return NULL;
Adam Langley2aaab9a2008-08-07 20:27:45 -0700750 }
751
752embryonic_reset:
753 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
754 if (!(flg & TCP_FLAG_RST))
755 req->rsk_ops->send_reset(sk, skb);
756
757 inet_csk_reqsk_queue_drop(sk, req, prev);
758 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000760EXPORT_SYMBOL(tcp_check_req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761
762/*
763 * Queue segment on the new socket if the new socket is active,
764 * otherwise we just shortcircuit this and continue with
765 * the new socket.
766 */
767
768int tcp_child_process(struct sock *parent, struct sock *child,
769 struct sk_buff *skb)
770{
771 int ret = 0;
772 int state = child->sk_state;
773
774 if (!sock_owned_by_user(child)) {
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700775 ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb),
776 skb->len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 /* Wakeup parent, send SIGIO */
778 if (state == TCP_SYN_RECV && child->sk_state != state)
779 parent->sk_data_ready(parent, 0);
780 } else {
781 /* Alas, it is possible again, because we do lookup
782 * in main socket hash table and lock on listening
783 * socket does not protect us more.
784 */
Zhu Yia3a858f2010-03-04 18:01:47 +0000785 __sk_add_backlog(child, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786 }
787
788 bh_unlock_sock(child);
789 sock_put(child);
790 return ret;
791}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792EXPORT_SYMBOL(tcp_child_process);