blob: d98b8de0b16c1a41473d7d9483df78e612a25318 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Definitions for the TCP module.
7 *
8 * Version: @(#)tcp.h 1.0.5 05/23/93
9 *
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version
16 * 2 of the License, or (at your option) any later version.
17 */
18#ifndef _TCP_H
19#define _TCP_H
20
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#define FASTRETRANS_DEBUG 1
22
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/list.h>
24#include <linux/tcp.h>
Paul Gortmaker187f1882011-11-23 20:12:59 -050025#include <linux/bug.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026#include <linux/slab.h>
27#include <linux/cache.h>
28#include <linux/percpu.h>
Herbert Xufb286bb2005-11-10 13:01:24 -080029#include <linux/skbuff.h>
Chris Leech97fc2f02006-05-23 17:55:33 -070030#include <linux/dmaengine.h>
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080031#include <linux/crypto.h>
Glenn Griffinc6aefaf2008-02-07 21:49:26 -080032#include <linux/cryptohash.h>
William Allen Simpson435cf552009-12-02 18:17:05 +000033#include <linux/kref.h>
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -070034
35#include <net/inet_connection_sock.h>
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -070036#include <net/inet_timewait_sock.h>
Arnaldo Carvalho de Melo77d8bf92005-08-09 20:00:51 -070037#include <net/inet_hashtables.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <net/checksum.h>
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -070039#include <net/request_sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <net/sock.h>
41#include <net/snmp.h>
42#include <net/ip.h>
Arnaldo Carvalho de Meloc752f072005-08-09 20:08:28 -070043#include <net/tcp_states.h>
Ilpo Järvinenbdf1ee52007-05-27 02:04:16 -070044#include <net/inet_ecn.h>
Satoru SATOH0c266892009-05-04 11:11:01 -070045#include <net/dst.h>
Arnaldo Carvalho de Meloc752f072005-08-09 20:08:28 -070046
Linus Torvalds1da177e2005-04-16 15:20:36 -070047#include <linux/seq_file.h>
Glauber Costa180d8cd2011-12-11 21:47:02 +000048#include <linux/memcontrol.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -070050extern struct inet_hashinfo tcp_hashinfo;
Linus Torvalds1da177e2005-04-16 15:20:36 -070051
Eric Dumazetdd24c002008-11-25 21:17:14 -080052extern struct percpu_counter tcp_orphan_count;
Linus Torvalds1da177e2005-04-16 15:20:36 -070053extern void tcp_time_wait(struct sock *sk, int state, int timeo);
Linus Torvalds1da177e2005-04-16 15:20:36 -070054
Linus Torvalds1da177e2005-04-16 15:20:36 -070055#define MAX_TCP_HEADER (128 + MAX_HEADER)
Adam Langley33ad7982008-07-19 00:04:31 -070056#define MAX_TCP_OPTION_SPACE 40
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58/*
59 * Never offer a window over 32767 without using window scaling. Some
60 * poor stacks do signed 16bit maths!
61 */
62#define MAX_TCP_WINDOW 32767U
63
Nandita Dukkipati356f0392010-12-20 14:15:56 +000064/* Offer an initial receive window of 10 mss. */
65#define TCP_DEFAULT_INIT_RCVWND 10
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067/* Minimal accepted MSS. It is (60+60+8) - (20+20). */
68#define TCP_MIN_MSS 88U
69
John Heffner5d424d52006-03-20 17:53:41 -080070/* The least MTU to use for probing */
71#define TCP_BASE_MSS 512
72
Linus Torvalds1da177e2005-04-16 15:20:36 -070073/* After receiving this amount of duplicate ACKs fast retransmit starts. */
74#define TCP_FASTRETRANS_THRESH 3
75
76/* Maximal reordering. */
77#define TCP_MAX_REORDERING 127
78
79/* Maximal number of ACKs sent quickly to accelerate slow-start. */
80#define TCP_MAX_QUICKACKS 16U
81
82/* urg_data states */
83#define TCP_URG_VALID 0x0100
84#define TCP_URG_NOTYET 0x0200
85#define TCP_URG_READ 0x0400
86
87#define TCP_RETR1 3 /*
88 * This is how many retries it does before it
89 * tries to figure out if the gateway is
90 * down. Minimal RFC value is 3; it corresponds
91 * to ~3sec-8min depending on RTO.
92 */
93
94#define TCP_RETR2 15 /*
95 * This should take at least
96 * 90 minutes to time out.
97 * RFC1122 says that the limit is 100 sec.
98 * 15 is ~13-30min depending on RTO.
99 */
100
101#define TCP_SYN_RETRIES 5 /* number of times to retry active opening a
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -0800102 * connection: ~180sec is RFC minimum */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103
104#define TCP_SYNACK_RETRIES 5 /* number of times to retry passive opening a
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -0800105 * connection: ~180sec is RFC minimum */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107#define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT
108 * state, about 60 seconds */
109#define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN
110 /* BSD style FIN_WAIT2 deadlock breaker.
111 * It used to be 3min, new value is 60sec,
112 * to combine FIN-WAIT-2 timeout with
113 * TIME-WAIT timer.
114 */
115
116#define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */
117#if HZ >= 100
118#define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */
119#define TCP_ATO_MIN ((unsigned)(HZ/25))
120#else
121#define TCP_DELACK_MIN 4U
122#define TCP_ATO_MIN 4U
123#endif
124#define TCP_RTO_MAX ((unsigned)(120*HZ))
125#define TCP_RTO_MIN ((unsigned)(HZ/5))
Jerry Chu9ad7c042011-06-08 11:08:38 +0000126#define TCP_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC2988bis initial RTO value */
127#define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now
128 * used as a fallback RTO for the
129 * initial data transmission if no
130 * valid RTT sample has been acquired,
131 * most likely due to retrans in 3WHS.
132 */
Mahesh A Saptasagar7f285eb2014-02-11 13:39:05 +0530133/* Number of full MSS to receive before Acking RFC2581 */
134#define TCP_DELACK_SEG 1
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135
136#define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
137 * for local resources.
138 */
139
140#define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */
141#define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */
142#define TCP_KEEPALIVE_INTVL (75*HZ)
143
144#define MAX_TCP_KEEPIDLE 32767
145#define MAX_TCP_KEEPINTVL 32767
146#define MAX_TCP_KEEPCNT 127
147#define MAX_TCP_SYNCNT 127
148
149#define TCP_SYNQ_INTERVAL (HZ/5) /* Period of SYNACK timer */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150
151#define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)
152#define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated
153 * after this time. It should be equal
154 * (or greater than) TCP_TIMEWAIT_LEN
155 * to provide reliability equal to one
156 * provided by timewait state.
157 */
158#define TCP_PAWS_WINDOW 1 /* Replay window for per-host
159 * timestamps. It must be less than
160 * minimal timewait lifetime.
161 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162/*
163 * TCP option
164 */
165
166#define TCPOPT_NOP 1 /* Padding */
167#define TCPOPT_EOL 0 /* End of options */
168#define TCPOPT_MSS 2 /* Segment size negotiating */
169#define TCPOPT_WINDOW 3 /* Window scaling */
170#define TCPOPT_SACK_PERM 4 /* SACK Permitted */
171#define TCPOPT_SACK 5 /* SACK Block */
172#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800173#define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
William Allen Simpson435cf552009-12-02 18:17:05 +0000174#define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175
176/*
177 * TCP option lengths
178 */
179
180#define TCPOLEN_MSS 4
181#define TCPOLEN_WINDOW 3
182#define TCPOLEN_SACK_PERM 2
183#define TCPOLEN_TIMESTAMP 10
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800184#define TCPOLEN_MD5SIG 18
William Allen Simpson435cf552009-12-02 18:17:05 +0000185#define TCPOLEN_COOKIE_BASE 2 /* Cookie-less header extension */
186#define TCPOLEN_COOKIE_PAIR 3 /* Cookie pair header extension */
187#define TCPOLEN_COOKIE_MIN (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN)
188#define TCPOLEN_COOKIE_MAX (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MAX)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189
190/* But this is what stacks really send out. */
191#define TCPOLEN_TSTAMP_ALIGNED 12
192#define TCPOLEN_WSCALE_ALIGNED 4
193#define TCPOLEN_SACKPERM_ALIGNED 4
194#define TCPOLEN_SACK_BASE 2
195#define TCPOLEN_SACK_BASE_ALIGNED 4
196#define TCPOLEN_SACK_PERBLOCK 8
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800197#define TCPOLEN_MD5SIG_ALIGNED 20
Adam Langley33ad7982008-07-19 00:04:31 -0700198#define TCPOLEN_MSS_ALIGNED 4
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200/* Flags in tp->nonagle */
201#define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */
202#define TCP_NAGLE_CORK 2 /* Socket is corked */
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -0800203#define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204
Andreas Petlund36e31b02010-02-18 02:47:01 +0000205/* TCP thin-stream limits */
206#define TCP_THIN_LINEAR_RETRIES 6 /* After 6 linear retries, do exp. backoff */
207
David S. Miller7eb38522011-02-05 18:13:45 -0800208/* TCP initial congestion window as per draft-hkchu-tcpm-initcwnd-01 */
David S. Miller442b9632011-02-02 17:05:11 -0800209#define TCP_INIT_CWND 10
210
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700211extern struct inet_timewait_death_row tcp_death_row;
212
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213/* sysctl variables for tcp */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214extern int sysctl_tcp_timestamps;
215extern int sysctl_tcp_window_scaling;
216extern int sysctl_tcp_sack;
217extern int sysctl_tcp_fin_timeout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218extern int sysctl_tcp_keepalive_time;
219extern int sysctl_tcp_keepalive_probes;
220extern int sysctl_tcp_keepalive_intvl;
221extern int sysctl_tcp_syn_retries;
222extern int sysctl_tcp_synack_retries;
223extern int sysctl_tcp_retries1;
224extern int sysctl_tcp_retries2;
225extern int sysctl_tcp_orphan_retries;
226extern int sysctl_tcp_syncookies;
227extern int sysctl_tcp_retrans_collapse;
228extern int sysctl_tcp_stdurg;
229extern int sysctl_tcp_rfc1337;
230extern int sysctl_tcp_abort_on_overflow;
231extern int sysctl_tcp_max_orphans;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232extern int sysctl_tcp_fack;
233extern int sysctl_tcp_reordering;
234extern int sysctl_tcp_ecn;
235extern int sysctl_tcp_dsack;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236extern int sysctl_tcp_wmem[3];
237extern int sysctl_tcp_rmem[3];
238extern int sysctl_tcp_app_win;
239extern int sysctl_tcp_adv_win_scale;
240extern int sysctl_tcp_tw_reuse;
241extern int sysctl_tcp_frto;
Ilpo Järvinen3cfe3ba2007-02-27 10:09:49 -0800242extern int sysctl_tcp_frto_response;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243extern int sysctl_tcp_low_latency;
Chris Leech95937822006-05-23 18:02:55 -0700244extern int sysctl_tcp_dma_copybreak;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245extern int sysctl_tcp_nometrics_save;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246extern int sysctl_tcp_moderate_rcvbuf;
247extern int sysctl_tcp_tso_win_divisor;
Stephen Hemminger9772efb2005-11-10 17:09:53 -0800248extern int sysctl_tcp_abc;
John Heffner5d424d52006-03-20 17:53:41 -0800249extern int sysctl_tcp_mtu_probing;
250extern int sysctl_tcp_base_mss;
Rick Jones15d99e02006-03-20 22:40:29 -0800251extern int sysctl_tcp_workaround_signed_windows;
David S. Miller35089bb2006-06-13 22:33:04 -0700252extern int sysctl_tcp_slow_start_after_idle;
John Heffner886236c2007-03-25 19:21:45 -0700253extern int sysctl_tcp_max_ssthresh;
William Allen Simpson519855c2009-12-02 18:14:19 +0000254extern int sysctl_tcp_cookie_size;
Andreas Petlund36e31b02010-02-18 02:47:01 +0000255extern int sysctl_tcp_thin_linear_timeouts;
Andreas Petlund7e380172010-02-18 04:48:19 +0000256extern int sysctl_tcp_thin_dupack;
Eric Dumazet34fb3502012-07-17 10:13:05 +0200257extern int sysctl_tcp_challenge_ack_limit;
JP Abgrall6891c8f2014-02-07 18:40:10 -0800258extern int sysctl_tcp_default_init_rwnd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259
Eric Dumazet8d987e52010-11-09 23:24:26 +0000260extern atomic_long_t tcp_memory_allocated;
Mahesh A Saptasagar7f285eb2014-02-11 13:39:05 +0530261
262/* sysctl variables for controlling various tcp parameters */
263extern int sysctl_tcp_delack_seg;
264extern int sysctl_tcp_use_userconfig;
Eric Dumazet17483762008-11-25 21:16:35 -0800265extern struct percpu_counter tcp_sockets_allocated;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266extern int tcp_memory_pressure;
267
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 * The next routines deal with comparing 32 bit unsigned ints
270 * and worry about wraparound (automatic with unsigned arithmetic).
271 */
272
273static inline int before(__u32 seq1, __u32 seq2)
274{
Gerrit Renker0d630cc2007-01-04 12:25:16 -0800275 return (__s32)(seq1-seq2) < 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276}
Gerrit Renker9a036b92006-12-20 10:25:55 -0800277#define after(seq2, seq1) before(seq1, seq2)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278
279/* is s2<=s1<=s3 ? */
280static inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
281{
282 return seq3 - seq2 >= seq1 - seq2;
283}
284
Arun Sharmaefcdbf22012-01-30 14:16:06 -0800285static inline bool tcp_out_of_memory(struct sock *sk)
286{
287 if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
288 sk_memory_allocated(sk) > sk_prot_mem_limits(sk, 2))
289 return true;
290 return false;
291}
292
David S. Millerad1af0f2010-08-25 02:27:49 -0700293static inline bool tcp_too_many_orphans(struct sock *sk, int shift)
Pavel Emelianove4fd5da2007-05-29 13:19:18 -0700294{
David S. Millerad1af0f2010-08-25 02:27:49 -0700295 struct percpu_counter *ocp = sk->sk_prot->orphan_count;
296 int orphans = percpu_counter_read_positive(ocp);
297
298 if (orphans << shift > sysctl_tcp_max_orphans) {
299 orphans = percpu_counter_sum_positive(ocp);
300 if (orphans << shift > sysctl_tcp_max_orphans)
301 return true;
302 }
David S. Millerad1af0f2010-08-25 02:27:49 -0700303 return false;
Pavel Emelianove4fd5da2007-05-29 13:19:18 -0700304}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305
Arun Sharmaefcdbf22012-01-30 14:16:06 -0800306extern bool tcp_check_oom(struct sock *sk, int shift);
307
Florian Westphala0f82f62009-04-19 09:43:48 +0000308/* syncookies: remember time of last synqueue overflow */
309static inline void tcp_synq_overflow(struct sock *sk)
310{
311 tcp_sk(sk)->rx_opt.ts_recent_stamp = jiffies;
312}
313
314/* syncookies: no recent synqueue overflow on this listening socket? */
315static inline int tcp_synq_no_recent_overflow(const struct sock *sk)
316{
317 unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
Jerry Chu9ad7c042011-06-08 11:08:38 +0000318 return time_after(jiffies, last_overflow + TCP_TIMEOUT_FALLBACK);
Florian Westphala0f82f62009-04-19 09:43:48 +0000319}
320
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321extern struct proto tcp_prot;
322
Pavel Emelyanov57ef42d2008-07-18 04:02:08 -0700323#define TCP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.tcp_statistics, field)
324#define TCP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.tcp_statistics, field)
325#define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
326#define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val)
Tom Herbertaa2ea052010-04-22 07:00:24 +0000327#define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328
Glauber Costa4acb4192012-01-30 01:20:17 +0000329extern void tcp_init_mem(struct net *net);
330
Changli Gao53d31762010-07-10 20:41:06 +0000331extern void tcp_v4_err(struct sk_buff *skb, u32);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332
Changli Gao53d31762010-07-10 20:41:06 +0000333extern void tcp_shutdown (struct sock *sk, int how);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334
Changli Gao53d31762010-07-10 20:41:06 +0000335extern int tcp_v4_rcv(struct sk_buff *skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336
David S. Miller3f419d22010-11-29 13:37:14 -0800337extern struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it);
David S. Millerccb7c412010-12-01 18:09:13 -0800338extern void *tcp_v4_tw_get_peer(struct sock *sk);
Changli Gao53d31762010-07-10 20:41:06 +0000339extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
Changli Gao7ba42912010-07-10 20:41:55 +0000340extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
341 size_t size);
342extern int tcp_sendpage(struct sock *sk, struct page *page, int offset,
343 size_t size, int flags);
Changli Gao53d31762010-07-10 20:41:06 +0000344extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
345extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400346 const struct tcphdr *th, unsigned int len);
Changli Gao53d31762010-07-10 20:41:06 +0000347extern int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400348 const struct tcphdr *th, unsigned int len);
Changli Gao53d31762010-07-10 20:41:06 +0000349extern void tcp_rcv_space_adjust(struct sock *sk);
350extern void tcp_cleanup_rbuf(struct sock *sk, int copied);
351extern int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
352extern void tcp_twsk_destructor(struct sock *sk);
353extern ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
354 struct pipe_inode_info *pipe, size_t len,
355 unsigned int flags);
Mahesh A Saptasagar7f285eb2014-02-11 13:39:05 +0530356/* sysctl master controller */
357extern int tcp_use_userconfig_sysctl_handler(struct ctl_table *, int,
358 void __user *, size_t *, loff_t *);
359extern int tcp_proc_delayed_ack_control(struct ctl_table *, int,
360 void __user *, size_t *, loff_t *);
Jens Axboe9c55e012007-11-06 23:30:13 -0800361
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700362static inline void tcp_dec_quickack_mode(struct sock *sk,
363 const unsigned int pkts)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364{
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700365 struct inet_connection_sock *icsk = inet_csk(sk);
David S. Millerfc6415b2005-07-05 15:17:45 -0700366
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700367 if (icsk->icsk_ack.quick) {
368 if (pkts >= icsk->icsk_ack.quick) {
369 icsk->icsk_ack.quick = 0;
David S. Millerfc6415b2005-07-05 15:17:45 -0700370 /* Leaving quickack mode we deflate ATO. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700371 icsk->icsk_ack.ato = TCP_ATO_MIN;
David S. Millerfc6415b2005-07-05 15:17:45 -0700372 } else
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700373 icsk->icsk_ack.quick -= pkts;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 }
375}
376
Ilpo Järvinenbdf1ee52007-05-27 02:04:16 -0700377#define TCP_ECN_OK 1
378#define TCP_ECN_QUEUE_CWR 2
379#define TCP_ECN_DEMAND_CWR 4
Eric Dumazet7a269ff2011-09-22 20:02:19 +0000380#define TCP_ECN_SEEN 8
Ilpo Järvinenbdf1ee52007-05-27 02:04:16 -0700381
382static __inline__ void
383TCP_ECN_create_request(struct request_sock *req, struct tcphdr *th)
384{
385 if (sysctl_tcp_ecn && th->ece && th->cwr)
386 inet_rsk(req)->ecn_ok = 1;
387}
388
Eric Dumazetfd2c3ef2009-11-03 03:26:03 +0000389enum tcp_tw_status {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 TCP_TW_SUCCESS = 0,
391 TCP_TW_RST = 1,
392 TCP_TW_ACK = 2,
393 TCP_TW_SYN = 3
394};
395
396
Changli Gao53d31762010-07-10 20:41:06 +0000397extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
398 struct sk_buff *skb,
399 const struct tcphdr *th);
400extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
401 struct request_sock *req,
402 struct request_sock **prev);
403extern int tcp_child_process(struct sock *parent, struct sock *child,
404 struct sk_buff *skb);
405extern int tcp_use_frto(struct sock *sk);
406extern void tcp_enter_frto(struct sock *sk);
407extern void tcp_enter_loss(struct sock *sk, int how);
408extern void tcp_clear_retrans(struct tcp_sock *tp);
409extern void tcp_update_metrics(struct sock *sk);
410extern void tcp_close(struct sock *sk, long timeout);
411extern unsigned int tcp_poll(struct file * file, struct socket *sock,
412 struct poll_table_struct *wait);
413extern int tcp_getsockopt(struct sock *sk, int level, int optname,
414 char __user *optval, int __user *optlen);
415extern int tcp_setsockopt(struct sock *sk, int level, int optname,
416 char __user *optval, unsigned int optlen);
417extern int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
418 char __user *optval, int __user *optlen);
419extern int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
420 char __user *optval, unsigned int optlen);
421extern void tcp_set_keepalive(struct sock *sk, int val);
422extern void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req);
423extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
424 size_t len, int nonblock, int flags, int *addr_len);
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400425extern void tcp_parse_options(const struct sk_buff *skb,
426 struct tcp_options_received *opt_rx, const u8 **hvpp,
Changli Gao53d31762010-07-10 20:41:06 +0000427 int estab);
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400428extern const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
YOSHIFUJI Hideaki7d5d5522008-04-17 12:29:53 +0900429
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430/*
431 * TCP v4 functions exported for the inet6 API
432 */
433
Changli Gao53d31762010-07-10 20:41:06 +0000434extern void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
435extern int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
436extern struct sock * tcp_create_openreq_child(struct sock *sk,
437 struct request_sock *req,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 struct sk_buff *skb);
Changli Gao53d31762010-07-10 20:41:06 +0000439extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
440 struct request_sock *req,
441 struct dst_entry *dst);
442extern int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
443extern int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
444 int addr_len);
445extern int tcp_connect(struct sock *sk);
446extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
447 struct request_sock *req,
448 struct request_values *rvp);
449extern int tcp_disconnect(struct sock *sk, int flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452/* From syncookies.c */
Florian Westphal2051f112008-03-23 22:21:28 -0700453extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
455 struct ip_options *opt);
Eric Dumazete05c82d2011-09-18 21:02:55 -0400456#ifdef CONFIG_SYN_COOKIES
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
458 __u16 *mss);
Eric Dumazete05c82d2011-09-18 21:02:55 -0400459#else
460static inline __u32 cookie_v4_init_sequence(struct sock *sk,
461 struct sk_buff *skb,
462 __u16 *mss)
463{
464 return 0;
465}
466#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467
Florian Westphal4dfc2812008-04-10 03:12:40 -0700468extern __u32 cookie_init_timestamp(struct request_sock *req);
Florian Westphal172d69e2010-06-21 11:48:45 +0000469extern bool cookie_check_timestamp(struct tcp_options_received *opt, bool *);
Florian Westphal4dfc2812008-04-10 03:12:40 -0700470
Glenn Griffinc6aefaf2008-02-07 21:49:26 -0800471/* From net/ipv6/syncookies.c */
472extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
Eric Dumazete05c82d2011-09-18 21:02:55 -0400473#ifdef CONFIG_SYN_COOKIES
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400474extern __u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb,
Glenn Griffinc6aefaf2008-02-07 21:49:26 -0800475 __u16 *mss);
Eric Dumazete05c82d2011-09-18 21:02:55 -0400476#else
477static inline __u32 cookie_v6_init_sequence(struct sock *sk,
478 struct sk_buff *skb,
479 __u16 *mss)
480{
481 return 0;
482}
483#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484/* tcp_output.c */
485
Ilpo Järvinen9e412ba2007-04-20 22:18:02 -0700486extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
487 int nonagle);
488extern int tcp_may_send_now(struct sock *sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000490extern void tcp_retransmit_timer(struct sock *sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491extern void tcp_xmit_retransmit_queue(struct sock *);
492extern void tcp_simple_retransmit(struct sock *);
493extern int tcp_trim_head(struct sock *, struct sk_buff *, u32);
David S. Miller6475be12005-09-01 22:47:01 -0700494extern int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495
496extern void tcp_send_probe0(struct sock *);
497extern void tcp_send_partial(struct sock *);
Changli Gao53d31762010-07-10 20:41:06 +0000498extern int tcp_write_wakeup(struct sock *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499extern void tcp_send_fin(struct sock *sk);
Al Virodd0fc662005-10-07 07:46:04 +0100500extern void tcp_send_active_reset(struct sock *sk, gfp_t priority);
Changli Gao53d31762010-07-10 20:41:06 +0000501extern int tcp_send_synack(struct sock *);
Eric Dumazet946cedc2011-08-30 03:21:44 +0000502extern int tcp_syn_flood_action(struct sock *sk,
503 const struct sk_buff *skb,
504 const char *proto);
David S. Millerc1b4a7e2005-07-05 15:24:38 -0700505extern void tcp_push_one(struct sock *, unsigned int mss_now);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506extern void tcp_send_ack(struct sock *sk);
507extern void tcp_send_delayed_ack(struct sock *sk);
508
David S. Millera762a982005-07-05 15:18:51 -0700509/* tcp_input.c */
510extern void tcp_cwnd_application_limited(struct sock *sk);
511
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512/* tcp_timer.c */
513extern void tcp_init_xmit_timers(struct sock *);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700514static inline void tcp_clear_xmit_timers(struct sock *sk)
515{
516 inet_csk_clear_xmit_timers(sk);
517}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
Ilpo Järvinen0c54b852009-03-14 14:23:05 +0000520extern unsigned int tcp_current_mss(struct sock *sk);
521
522/* Bound MSS / TSO packet size with the half of the window */
523static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
524{
Alexey Kuznetsov01f83d62010-09-15 10:27:52 -0700525 int cutoff;
526
527 /* When peer uses tiny windows, there is no use in packetizing
528 * to sub-MSS pieces for the sake of SWS or making sure there
529 * are enough packets in the pipe for fast recovery.
530 *
531 * On the other hand, for extremely large MSS devices, handling
532 * smaller than MSS windows in this way does make sense.
533 */
534 if (tp->max_window >= 512)
535 cutoff = (tp->max_window >> 1);
536 else
537 cutoff = tp->max_window;
538
539 if (cutoff && pktsize > cutoff)
540 return max_t(int, cutoff, 68U - tp->tcp_header_len);
Ilpo Järvinen0c54b852009-03-14 14:23:05 +0000541 else
542 return pktsize;
543}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544
Arnaldo Carvalho de Melo17b085e2005-08-12 12:59:17 -0300545/* tcp.c */
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400546extern void tcp_get_info(const struct sock *, struct tcp_info *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547
548/* Read 'sendfile()'-style from a TCP socket */
549typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
550 unsigned int, size_t);
551extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
552 sk_read_actor_t recv_actor);
553
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800554extern void tcp_initialize_rcv_mss(struct sock *sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400556extern int tcp_mtu_to_mss(const struct sock *sk, int pmtu);
557extern int tcp_mss_to_mtu(const struct sock *sk, int mss);
John Heffner5d424d52006-03-20 17:53:41 -0800558extern void tcp_mtup_init(struct sock *sk);
Jerry Chu9ad7c042011-06-08 11:08:38 +0000559extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt);
John Heffner5d424d52006-03-20 17:53:41 -0800560
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000561static inline void tcp_bound_rto(const struct sock *sk)
562{
563 if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
564 inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
565}
566
567static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
568{
569 return (tp->srtt >> 3) + tp->rttvar;
570}
571
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800572static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573{
574 tp->pred_flags = htonl((tp->tcp_header_len << 26) |
575 ntohl(TCP_FLAG_ACK) |
576 snd_wnd);
577}
578
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800579static inline void tcp_fast_path_on(struct tcp_sock *tp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580{
581 __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
582}
583
Ilpo Järvinen9e412ba2007-04-20 22:18:02 -0700584static inline void tcp_fast_path_check(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585{
Ilpo Järvinen9e412ba2007-04-20 22:18:02 -0700586 struct tcp_sock *tp = tcp_sk(sk);
587
David S. Millerb03efcf2005-07-08 14:57:23 -0700588 if (skb_queue_empty(&tp->out_of_order_queue) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 tp->rcv_wnd &&
590 atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
591 !tp->urg_data)
592 tcp_fast_path_on(tp);
593}
594
Satoru SATOH0c266892009-05-04 11:11:01 -0700595/* Compute the actual rto_min value */
596static inline u32 tcp_rto_min(struct sock *sk)
597{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400598 const struct dst_entry *dst = __sk_dst_get(sk);
Satoru SATOH0c266892009-05-04 11:11:01 -0700599 u32 rto_min = TCP_RTO_MIN;
600
601 if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
602 rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
603 return rto_min;
604}
605
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606/* Compute the actual receive window we are currently advertising.
607 * Rcv_nxt can be after the window if our peer push more data
608 * than the offered window.
609 */
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800610static inline u32 tcp_receive_window(const struct tcp_sock *tp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611{
612 s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
613
614 if (win < 0)
615 win = 0;
616 return (u32) win;
617}
618
619/* Choose a new window, without checks for shrinking, and without
620 * scaling applied to the result. The caller does these things
621 * if necessary. This is a "raw" window selection.
622 */
Changli Gao53d31762010-07-10 20:41:06 +0000623extern u32 __tcp_select_window(struct sock *sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624
625/* TCP timestamps are only 32-bits, this causes a slight
626 * complication on 64-bit systems since we store a snapshot
Stephen Hemminger31f34262005-11-15 15:17:10 -0800627 * of jiffies in the buffer control blocks below. We decided
628 * to use only the low 32-bits of jiffies and hide the ugly
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629 * casts with the following macro.
630 */
631#define tcp_time_stamp ((__u32)(jiffies))
632
Changli Gaoa3433f32010-06-12 14:01:43 +0000633#define tcp_flag_byte(th) (((u_int8_t *)th)[13])
634
635#define TCPHDR_FIN 0x01
636#define TCPHDR_SYN 0x02
637#define TCPHDR_RST 0x04
638#define TCPHDR_PSH 0x08
639#define TCPHDR_ACK 0x10
640#define TCPHDR_URG 0x20
641#define TCPHDR_ECE 0x40
642#define TCPHDR_CWR 0x80
643
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -0800644/* This is what the send packet queuing engine uses to pass
Eric Dumazetf86586f2010-07-15 21:41:00 -0700645 * TCP per-packet control information to the transmission code.
646 * We also store the host-order sequence numbers in here too.
647 * This is 44 bytes if IPV6 is enabled.
648 * If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649 */
650struct tcp_skb_cb {
651 union {
652 struct inet_skb_parm h4;
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000653#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 struct inet6_skb_parm h6;
655#endif
656 } header; /* For incoming frames */
657 __u32 seq; /* Starting sequence number */
658 __u32 end_seq; /* SEQ + FIN + SYN + datalen */
659 __u32 when; /* used to compute rtt's */
Eric Dumazet4de075e2011-09-27 13:25:05 -0400660 __u8 tcp_flags; /* TCP header flags. (tcp[13]) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661 __u8 sacked; /* State flags for SACK/FACK. */
662#define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */
663#define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */
664#define TCPCB_LOST 0x04 /* SKB is lost */
665#define TCPCB_TAGBITS 0x07 /* All tag bits */
Eric Dumazetb82d1bb2011-09-27 02:20:08 -0400666 __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
667 /* 1 byte hole */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668#define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */
669#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
670
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671 __u32 ack_seq; /* Sequence number ACK'd */
672};
673
674#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
675
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676/* Due to TSO, an SKB can be composed of multiple actual
677 * packets. To keep these tracked properly, we use this.
678 */
679static inline int tcp_skb_pcount(const struct sk_buff *skb)
680{
Herbert Xu79671682006-06-22 02:40:14 -0700681 return skb_shinfo(skb)->gso_segs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682}
683
684/* This is valid iff tcp_skb_pcount() > 1. */
685static inline int tcp_skb_mss(const struct sk_buff *skb)
686{
Herbert Xu79671682006-06-22 02:40:14 -0700687 return skb_shinfo(skb)->gso_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688}
689
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700690/* Events passed to congestion control interface */
691enum tcp_ca_event {
692 CA_EVENT_TX_START, /* first transmit when no packets in flight */
693 CA_EVENT_CWND_RESTART, /* congestion window restart */
694 CA_EVENT_COMPLETE_CWR, /* end of congestion recovery */
695 CA_EVENT_FRTO, /* fast recovery timeout */
696 CA_EVENT_LOSS, /* loss timeout */
697 CA_EVENT_FAST_ACK, /* in sequence ack */
698 CA_EVENT_SLOW_ACK, /* other ack */
699};
700
701/*
702 * Interface for adding new TCP congestion control handlers
703 */
704#define TCP_CA_NAME_MAX 16
Stephen Hemminger3ff825b2006-11-09 16:32:06 -0800705#define TCP_CA_MAX 128
706#define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX)
707
Stephen Hemminger164891a2007-04-23 22:26:16 -0700708#define TCP_CONG_NON_RESTRICTED 0x1
709#define TCP_CONG_RTT_STAMP 0x2
710
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700711struct tcp_congestion_ops {
712 struct list_head list;
Stephen Hemminger164891a2007-04-23 22:26:16 -0700713 unsigned long flags;
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700714
715 /* initialize private data (optional) */
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -0300716 void (*init)(struct sock *sk);
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700717 /* cleanup private data (optional) */
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -0300718 void (*release)(struct sock *sk);
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700719
720 /* return slow start threshold (required) */
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -0300721 u32 (*ssthresh)(struct sock *sk);
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700722 /* lower bound for congestion window (optional) */
Stephen Hemminger72dc5b92006-06-05 17:30:08 -0700723 u32 (*min_cwnd)(const struct sock *sk);
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700724 /* do new cwnd calculation (required) */
Ilpo Järvinenc3a05c62007-12-02 00:47:59 +0200725 void (*cong_avoid)(struct sock *sk, u32 ack, u32 in_flight);
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700726 /* call before changing ca_state (optional) */
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -0300727 void (*set_state)(struct sock *sk, u8 new_state);
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700728 /* call when cwnd event occurs (optional) */
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -0300729 void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700730 /* new value of cwnd after loss (optional) */
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -0300731 u32 (*undo_cwnd)(struct sock *sk);
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700732 /* hook for packet ack accounting (optional) */
Stephen Hemminger30cfd0b2007-07-25 23:49:34 -0700733 void (*pkts_acked)(struct sock *sk, u32 num_acked, s32 rtt_us);
Arnaldo Carvalho de Melo73c1f4a2005-08-12 12:51:49 -0300734 /* get info for inet_diag (optional) */
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -0300735 void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700736
737 char name[TCP_CA_NAME_MAX];
738 struct module *owner;
739};
740
741extern int tcp_register_congestion_control(struct tcp_congestion_ops *type);
742extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
743
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -0300744extern void tcp_init_congestion_control(struct sock *sk);
745extern void tcp_cleanup_congestion_control(struct sock *sk);
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700746extern int tcp_set_default_congestion_control(const char *name);
747extern void tcp_get_default_congestion_control(char *name);
Stephen Hemminger3ff825b2006-11-09 16:32:06 -0800748extern void tcp_get_available_congestion_control(char *buf, size_t len);
Stephen Hemmingerce7bc3b2006-11-09 16:35:15 -0800749extern void tcp_get_allowed_congestion_control(char *buf, size_t len);
750extern int tcp_set_allowed_congestion_control(char *allowed);
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -0300751extern int tcp_set_congestion_control(struct sock *sk, const char *name);
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800752extern void tcp_slow_start(struct tcp_sock *tp);
Ilpo Järvinen758ce5c2009-02-28 04:44:37 +0000753extern void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w);
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700754
Stephen Hemminger5f8ef482005-06-23 20:37:36 -0700755extern struct tcp_congestion_ops tcp_init_congestion_ops;
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -0300756extern u32 tcp_reno_ssthresh(struct sock *sk);
Ilpo Järvinenc3a05c62007-12-02 00:47:59 +0200757extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight);
Stephen Hemminger72dc5b92006-06-05 17:30:08 -0700758extern u32 tcp_reno_min_cwnd(const struct sock *sk);
David S. Millera8acfba2005-06-23 23:45:02 -0700759extern struct tcp_congestion_ops tcp_reno;
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700760
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -0300761static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700762{
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -0300763 struct inet_connection_sock *icsk = inet_csk(sk);
764
765 if (icsk->icsk_ca_ops->set_state)
766 icsk->icsk_ca_ops->set_state(sk, ca_state);
767 icsk->icsk_ca_state = ca_state;
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700768}
769
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -0300770static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700771{
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -0300772 const struct inet_connection_sock *icsk = inet_csk(sk);
773
774 if (icsk->icsk_ca_ops->cwnd_event)
775 icsk->icsk_ca_ops->cwnd_event(sk, event);
Stephen Hemminger317a76f2005-06-23 12:19:55 -0700776}
777
Ilpo Järvinene60402d2007-08-09 15:14:46 +0300778/* These functions determine how the current flow behaves in respect of SACK
779 * handling. SACK is negotiated with the peer, and therefore it can vary
780 * between different flows.
781 *
782 * tcp_is_sack - SACK enabled
783 * tcp_is_reno - No SACK
784 * tcp_is_fack - FACK enabled, implies SACK enabled
785 */
786static inline int tcp_is_sack(const struct tcp_sock *tp)
787{
788 return tp->rx_opt.sack_ok;
789}
790
791static inline int tcp_is_reno(const struct tcp_sock *tp)
792{
793 return !tcp_is_sack(tp);
794}
795
796static inline int tcp_is_fack(const struct tcp_sock *tp)
797{
Vijay Subramanianab562222011-12-20 13:23:24 +0000798 return tp->rx_opt.sack_ok & TCP_FACK_ENABLED;
Ilpo Järvinene60402d2007-08-09 15:14:46 +0300799}
800
801static inline void tcp_enable_fack(struct tcp_sock *tp)
802{
Vijay Subramanianab562222011-12-20 13:23:24 +0000803 tp->rx_opt.sack_ok |= TCP_FACK_ENABLED;
Ilpo Järvinene60402d2007-08-09 15:14:46 +0300804}
805
Ilpo Järvinen83ae4082007-08-09 14:37:30 +0300806static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
807{
808 return tp->sacked_out + tp->lost_out;
809}
810
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811/* This determines how many packets are "in the network" to the best
812 * of our knowledge. In many cases it is conservative, but where
813 * detailed information is available from the receiver (via SACK
814 * blocks etc.) we can make more aggressive calculations.
815 *
816 * Use this for decisions involving congestion control, use just
817 * tp->packets_out to determine if the send queue is empty or not.
818 *
819 * Read this equation as:
820 *
821 * "Packets sent once on transmission queue" MINUS
822 * "Packets left network, but not honestly ACKed yet" PLUS
823 * "Packets fast retransmitted"
824 */
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800825static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826{
Ilpo Järvinen83ae4082007-08-09 14:37:30 +0300827 return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828}
829
Ilpo Järvinen0b6a05c2009-09-15 01:30:10 -0700830#define TCP_INFINITE_SSTHRESH 0x7fffffff
831
832static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
833{
834 return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
835}
836
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837/* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
838 * The exception is rate halving phase, when cwnd is decreasing towards
839 * ssthresh.
840 */
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -0300841static inline __u32 tcp_current_ssthresh(const struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842{
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -0300843 const struct tcp_sock *tp = tcp_sk(sk);
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400844
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -0300845 if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 return tp->snd_ssthresh;
847 else
848 return max(tp->snd_ssthresh,
849 ((tp->snd_cwnd >> 1) +
850 (tp->snd_cwnd >> 2)));
851}
852
Ilpo Järvinenb9c45952007-07-27 16:36:17 +0300853/* Use define here intentionally to get WARN_ON location shown at the caller */
854#define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855
Ilpo Järvinen3cfe3ba2007-02-27 10:09:49 -0800856extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400857extern __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858
Neal Cardwell6b5a5c02011-11-21 17:15:14 +0000859/* The maximum number of MSS of available cwnd for which TSO defers
860 * sending if not using sysctl_tcp_tso_win_divisor.
861 */
862static inline __u32 tcp_max_tso_deferred_mss(const struct tcp_sock *tp)
863{
864 return 3;
865}
866
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867/* Slow start with delack produces 3 packets of burst, so that
John Heffnerdd9e0dd2008-04-15 15:26:39 -0700868 * it is safe "de facto". This will be the default - same as
869 * the default reordering threshold - but if reordering increases,
870 * we must be able to allow cwnd to burst at least this much in order
871 * to not pull it back when holes are filled.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872 */
873static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp)
874{
John Heffnerdd9e0dd2008-04-15 15:26:39 -0700875 return tp->reordering;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876}
877
Ilpo Järvinen90840de2007-12-31 04:48:41 -0800878/* Returns end sequence number of the receiver's advertised window */
879static inline u32 tcp_wnd_end(const struct tcp_sock *tp)
880{
881 return tp->snd_una + tp->snd_wnd;
882}
Ilpo Järvinencea14e02008-01-12 03:19:12 -0800883extern int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight);
Stephen Hemmingerf4805ed2005-11-10 16:53:30 -0800884
Chuck Leverc1bd24b2007-10-23 21:08:54 -0700885static inline void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss,
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800886 const struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887{
888 if (skb->len < mss)
889 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
890}
891
Ilpo Järvinen9e412ba2007-04-20 22:18:02 -0700892static inline void tcp_check_probe_timer(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400894 const struct tcp_sock *tp = tcp_sk(sk);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700895 const struct inet_connection_sock *icsk = inet_csk(sk);
Ilpo Järvinen9e412ba2007-04-20 22:18:02 -0700896
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700897 if (!tp->packets_out && !icsk->icsk_pending)
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -0700898 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
899 icsk->icsk_rto, TCP_RTO_MAX);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900}
901
Hantzis Fotisee7537b2009-03-02 22:42:02 -0800902static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903{
904 tp->snd_wl1 = seq;
905}
906
Hantzis Fotisee7537b2009-03-02 22:42:02 -0800907static inline void tcp_update_wl(struct tcp_sock *tp, u32 seq)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908{
909 tp->snd_wl1 = seq;
910}
911
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912/*
913 * Calculate(/check) TCP checksum
914 */
Frederik Deweerdtba7808e2007-02-04 20:15:27 -0800915static inline __sum16 tcp_v4_check(int len, __be32 saddr,
916 __be32 daddr, __wsum base)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917{
918 return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
919}
920
Al Virob51655b2006-11-14 21:40:42 -0800921static inline __sum16 __tcp_checksum_complete(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922{
Herbert Xufb286bb2005-11-10 13:01:24 -0800923 return __skb_checksum_complete(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924}
925
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800926static inline int tcp_checksum_complete(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927{
Herbert Xu60476372007-04-09 11:59:39 -0700928 return !skb_csum_unnecessary(skb) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 __tcp_checksum_complete(skb);
930}
931
932/* Prequeue for VJ style copy to user, combined with checksumming. */
933
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800934static inline void tcp_prequeue_init(struct tcp_sock *tp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935{
936 tp->ucopy.task = NULL;
937 tp->ucopy.len = 0;
938 tp->ucopy.memory = 0;
939 skb_queue_head_init(&tp->ucopy.prequeue);
Chris Leech97fc2f02006-05-23 17:55:33 -0700940#ifdef CONFIG_NET_DMA
941 tp->ucopy.dma_chan = NULL;
942 tp->ucopy.wakeup = 0;
943 tp->ucopy.pinned_list = NULL;
944 tp->ucopy.dma_cookie = 0;
945#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946}
947
948/* Packet is added to VJ-style prequeue for processing in process
949 * context, if a reader task is waiting. Apparently, this exciting
950 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
951 * failed somewhere. Latency? Burstiness? Well, at least now we will
952 * see, why it failed. 8)8) --ANK
953 *
954 * NOTE: is this not too big to inline?
955 */
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800956static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957{
958 struct tcp_sock *tp = tcp_sk(sk);
959
Eric Dumazetf5f8d862009-05-07 07:08:38 +0000960 if (sysctl_tcp_low_latency || !tp->ucopy.task)
961 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962
Eric Dumazet8e1546d2013-04-24 18:34:55 -0700963 skb_dst_force(skb);
Eric Dumazetf5f8d862009-05-07 07:08:38 +0000964 __skb_queue_tail(&tp->ucopy.prequeue, skb);
965 tp->ucopy.memory += skb->truesize;
966 if (tp->ucopy.memory > sk->sk_rcvbuf) {
967 struct sk_buff *skb1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968
Eric Dumazetf5f8d862009-05-07 07:08:38 +0000969 BUG_ON(sock_owned_by_user(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970
Eric Dumazetf5f8d862009-05-07 07:08:38 +0000971 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
972 sk_backlog_rcv(sk, skb1);
973 NET_INC_STATS_BH(sock_net(sk),
974 LINUX_MIB_TCPPREQUEUEDROPPED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975 }
Eric Dumazetf5f8d862009-05-07 07:08:38 +0000976
977 tp->ucopy.memory = 0;
978 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
Eric Dumazetaa395142010-04-20 13:03:51 +0000979 wake_up_interruptible_sync_poll(sk_sleep(sk),
Eric Dumazet7aedec22009-05-07 07:20:39 +0000980 POLLIN | POLLRDNORM | POLLRDBAND);
Eric Dumazetf5f8d862009-05-07 07:08:38 +0000981 if (!inet_csk_ack_scheduled(sk))
982 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
David S. Miller22f6dac2009-05-08 02:48:30 -0700983 (3 * tcp_rto_min(sk)) / 4,
Eric Dumazetf5f8d862009-05-07 07:08:38 +0000984 TCP_RTO_MAX);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985 }
Eric Dumazetf5f8d862009-05-07 07:08:38 +0000986 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987}
988
989
990#undef STATE_TRACE
991
992#ifdef STATE_TRACE
993static const char *statename[]={
994 "Unused","Established","Syn Sent","Syn Recv",
995 "Fin Wait 1","Fin Wait 2","Time Wait", "Close",
996 "Close Wait","Last ACK","Listen","Closing"
997};
998#endif
Ilpo Järvinen490d5042008-01-12 03:17:20 -0800999extern void tcp_set_state(struct sock *sk, int state);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000
Andi Kleen4ac02ba2007-04-20 17:11:46 -07001001extern void tcp_done(struct sock *sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002
Stephen Hemminger40efc6f2006-01-03 16:03:49 -08001003static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004{
1005 rx_opt->dsack = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006 rx_opt->num_sacks = 0;
1007}
1008
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009/* Determine a window scaling and initial window to offer. */
1010extern void tcp_select_initial_window(int __space, __u32 mss,
1011 __u32 *rcv_wnd, __u32 *window_clamp,
laurent chavey31d12922009-12-15 11:15:28 +00001012 int wscale_ok, __u8 *rcv_wscale,
1013 __u32 init_rcv_wnd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014
1015static inline int tcp_win_from_space(int space)
1016{
1017 return sysctl_tcp_adv_win_scale<=0 ?
1018 (space>>(-sysctl_tcp_adv_win_scale)) :
1019 space - (space>>sysctl_tcp_adv_win_scale);
1020}
1021
1022/* Note: caller must be prepared to deal with negative returns */
1023static inline int tcp_space(const struct sock *sk)
1024{
1025 return tcp_win_from_space(sk->sk_rcvbuf -
1026 atomic_read(&sk->sk_rmem_alloc));
1027}
1028
1029static inline int tcp_full_space(const struct sock *sk)
1030{
1031 return tcp_win_from_space(sk->sk_rcvbuf);
1032}
1033
Stephen Hemminger40efc6f2006-01-03 16:03:49 -08001034static inline void tcp_openreq_init(struct request_sock *req,
1035 struct tcp_options_received *rx_opt,
1036 struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001038 struct inet_request_sock *ireq = inet_rsk(req);
1039
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040 req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
Florian Westphal4dfc2812008-04-10 03:12:40 -07001041 req->cookie_ts = 0;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001042 tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043 req->mss = rx_opt->mss_clamp;
1044 req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001045 ireq->tstamp_ok = rx_opt->tstamp_ok;
1046 ireq->sack_ok = rx_opt->sack_ok;
1047 ireq->snd_wscale = rx_opt->snd_wscale;
1048 ireq->wscale_ok = rx_opt->wscale_ok;
1049 ireq->acked = 0;
1050 ireq->ecn_ok = 0;
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001051 ireq->rmt_port = tcp_hdr(skb)->source;
KOVACS Krisztiana3116ac2008-10-01 07:46:49 -07001052 ireq->loc_port = tcp_hdr(skb)->dest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053}
1054
Pavel Emelyanov5c52ba12008-07-16 20:28:10 -07001055extern void tcp_enter_memory_pressure(struct sock *sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057static inline int keepalive_intvl_when(const struct tcp_sock *tp)
1058{
1059 return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
1060}
1061
1062static inline int keepalive_time_when(const struct tcp_sock *tp)
1063{
1064 return tp->keepalive_time ? : sysctl_tcp_keepalive_time;
1065}
1066
Eric Dumazetdf19a622009-08-28 23:48:54 -07001067static inline int keepalive_probes(const struct tcp_sock *tp)
1068{
1069 return tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
1070}
1071
Flavio Leitner6c37e5d2010-04-26 18:33:27 +00001072static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
1073{
1074 const struct inet_connection_sock *icsk = &tp->inet_conn;
1075
1076 return min_t(u32, tcp_time_stamp - icsk->icsk_ack.lrcvtime,
1077 tcp_time_stamp - tp->rcv_tstamp);
1078}
1079
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001080static inline int tcp_fin_time(const struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081{
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001082 int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout;
1083 const int rto = inet_csk(sk)->icsk_rto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001085 if (fin_timeout < (rto << 2) - (rto >> 1))
1086 fin_timeout = (rto << 2) - (rto >> 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087
1088 return fin_timeout;
1089}
1090
Ilpo Järvinenc887e6d2009-03-14 14:23:03 +00001091static inline int tcp_paws_check(const struct tcp_options_received *rx_opt,
1092 int paws_win)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093{
Ilpo Järvinenc887e6d2009-03-14 14:23:03 +00001094 if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
1095 return 1;
1096 if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))
1097 return 1;
Eric Dumazetbc2ce892010-12-16 14:08:34 -08001098 /*
1099 * Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0,
1100 * then following tcp messages have valid values. Ignore 0 value,
1101 * or else 'negative' tsval might forbid us to accept their packets.
1102 */
1103 if (!rx_opt->ts_recent)
1104 return 1;
Ilpo Järvinenc887e6d2009-03-14 14:23:03 +00001105 return 0;
1106}
1107
1108static inline int tcp_paws_reject(const struct tcp_options_received *rx_opt,
1109 int rst)
1110{
1111 if (tcp_paws_check(rx_opt, 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112 return 0;
1113
1114 /* RST segments are not recommended to carry timestamp,
1115 and, if they do, it is recommended to ignore PAWS because
1116 "their cleanup function should take precedence over timestamps."
1117 Certainly, it is mistake. It is necessary to understand the reasons
1118 of this constraint to relax it: if peer reboots, clock may go
1119 out-of-sync and half-open connections will not be reset.
1120 Actually, the problem would be not existing if all
1121 the implementations followed draft about maintaining clock
1122 via reboots. Linux-2.2 DOES NOT!
1123
1124 However, we can relax time bounds for RST segments to MSL.
1125 */
James Morris9d729f72007-03-04 16:12:44 -08001126 if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127 return 0;
1128 return 1;
1129}
1130
Pavel Emelyanova9c19322008-07-16 20:21:42 -07001131static inline void tcp_mib_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132{
1133 /* See RFC 2012 */
Pavel Emelyanovcf1100a2008-07-16 20:27:38 -07001134 TCP_ADD_STATS_USER(net, TCP_MIB_RTOALGORITHM, 1);
1135 TCP_ADD_STATS_USER(net, TCP_MIB_RTOMIN, TCP_RTO_MIN*1000/HZ);
1136 TCP_ADD_STATS_USER(net, TCP_MIB_RTOMAX, TCP_RTO_MAX*1000/HZ);
1137 TCP_ADD_STATS_USER(net, TCP_MIB_MAXCONN, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138}
1139
Ilpo Järvinen5af4ec22007-09-20 11:30:48 -07001140/* from STCP */
Ilpo Järvinenef9da472008-09-20 21:25:15 -07001141static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp)
David S. Miller0800f172007-09-20 11:40:37 -07001142{
Stephen Hemminger6a438bb2005-11-10 17:14:59 -08001143 tp->lost_skb_hint = NULL;
1144 tp->scoreboard_skb_hint = NULL;
Ilpo Järvinenef9da472008-09-20 21:25:15 -07001145}
1146
1147static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp)
1148{
1149 tcp_clear_retrans_hints_partial(tp);
Stephen Hemminger6a438bb2005-11-10 17:14:59 -08001150 tp->retransmit_skb_hint = NULL;
Ilpo Järvinenb7689202007-09-20 11:37:19 -07001151}
1152
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001153/* MD5 Signature */
1154struct crypto_hash;
1155
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001156union tcp_md5_addr {
1157 struct in_addr a4;
1158#if IS_ENABLED(CONFIG_IPV6)
1159 struct in6_addr a6;
1160#endif
1161};
1162
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001163/* - key database */
1164struct tcp_md5sig_key {
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001165 struct hlist_node node;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001166 u8 keylen;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001167 u8 family; /* AF_INET or AF_INET6 */
1168 union tcp_md5_addr addr;
1169 u8 key[TCP_MD5SIG_MAXKEYLEN];
1170 struct rcu_head rcu;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001171};
1172
1173/* - sock block */
1174struct tcp_md5sig_info {
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001175 struct hlist_head head;
Eric Dumazeta8afca02012-01-31 18:45:40 +00001176 struct rcu_head rcu;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001177};
1178
1179/* - pseudo header */
1180struct tcp4_pseudohdr {
1181 __be32 saddr;
1182 __be32 daddr;
1183 __u8 pad;
1184 __u8 protocol;
1185 __be16 len;
1186};
1187
1188struct tcp6_pseudohdr {
1189 struct in6_addr saddr;
1190 struct in6_addr daddr;
1191 __be32 len;
1192 __be32 protocol; /* including padding */
1193};
1194
1195union tcp_md5sum_block {
1196 struct tcp4_pseudohdr ip4;
Eric Dumazetdfd56b82011-12-10 09:48:31 +00001197#if IS_ENABLED(CONFIG_IPV6)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001198 struct tcp6_pseudohdr ip6;
1199#endif
1200};
1201
1202/* - pool: digest algorithm, hash description and scratch buffer */
1203struct tcp_md5sig_pool {
1204 struct hash_desc md5_desc;
1205 union tcp_md5sum_block md5_blk;
1206};
1207
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001208/* - functions */
Changli Gao53d31762010-07-10 20:41:06 +00001209extern int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001210 const struct sock *sk,
1211 const struct request_sock *req,
1212 const struct sk_buff *skb);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001213extern int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
1214 int family, const u8 *newkey,
1215 u8 newkeylen, gfp_t gfp);
1216extern int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
1217 int family);
1218extern struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
1219 struct sock *addr_sk);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001220
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +09001221#ifdef CONFIG_TCP_MD5SIG
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001222extern struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
1223 const union tcp_md5_addr *addr, int family);
1224#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +09001225#else
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001226static inline struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
1227 const union tcp_md5_addr *addr,
1228 int family)
1229{
1230 return NULL;
1231}
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +09001232#define tcp_twsk_md5_key(twsk) NULL
1233#endif
1234
Eric Dumazet765cf992011-09-12 20:28:37 +00001235extern struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *);
Changli Gao53d31762010-07-10 20:41:06 +00001236extern void tcp_free_md5sig_pool(void);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001237
Eric Dumazet35790c02010-05-16 00:34:04 -07001238extern struct tcp_md5sig_pool *tcp_get_md5sig_pool(void);
Changli Gao53d31762010-07-10 20:41:06 +00001239extern void tcp_put_md5sig_pool(void);
Eric Dumazet35790c02010-05-16 00:34:04 -07001240
Eric Dumazetca35a0e2011-10-24 01:52:35 -04001241extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, const struct tcphdr *);
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001242extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *,
Adam Langley49a72df2008-07-19 00:01:42 -07001243 unsigned header_len);
1244extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001245 const struct tcp_md5sig_key *key);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001246
David S. Millerfe067e82007-03-07 12:12:44 -08001247/* write queue abstraction */
1248static inline void tcp_write_queue_purge(struct sock *sk)
1249{
1250 struct sk_buff *skb;
1251
1252 while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
Hideo Aoki3ab224b2007-12-31 00:11:19 -08001253 sk_wmem_free_skb(sk, skb);
1254 sk_mem_reclaim(sk);
Ilpo Järvinen8818a9d2009-12-02 22:24:02 -08001255 tcp_clear_all_retrans_hints(tcp_sk(sk));
David S. Millerfe067e82007-03-07 12:12:44 -08001256}
1257
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001258static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk)
David S. Millerfe067e82007-03-07 12:12:44 -08001259{
David S. Millercd07a8e2008-09-23 00:50:13 -07001260 return skb_peek(&sk->sk_write_queue);
David S. Millerfe067e82007-03-07 12:12:44 -08001261}
1262
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001263static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk)
David S. Millerfe067e82007-03-07 12:12:44 -08001264{
David S. Millercd07a8e2008-09-23 00:50:13 -07001265 return skb_peek_tail(&sk->sk_write_queue);
David S. Millerfe067e82007-03-07 12:12:44 -08001266}
1267
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001268static inline struct sk_buff *tcp_write_queue_next(const struct sock *sk,
1269 const struct sk_buff *skb)
David S. Millerfe067e82007-03-07 12:12:44 -08001270{
David S. Millercd07a8e2008-09-23 00:50:13 -07001271 return skb_queue_next(&sk->sk_write_queue, skb);
David S. Millerfe067e82007-03-07 12:12:44 -08001272}
1273
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001274static inline struct sk_buff *tcp_write_queue_prev(const struct sock *sk,
1275 const struct sk_buff *skb)
Ilpo Järvinen832d11c2008-11-24 21:20:15 -08001276{
1277 return skb_queue_prev(&sk->sk_write_queue, skb);
1278}
1279
David S. Millerfe067e82007-03-07 12:12:44 -08001280#define tcp_for_write_queue(skb, sk) \
David S. Millercd07a8e2008-09-23 00:50:13 -07001281 skb_queue_walk(&(sk)->sk_write_queue, skb)
David S. Millerfe067e82007-03-07 12:12:44 -08001282
1283#define tcp_for_write_queue_from(skb, sk) \
David S. Millercd07a8e2008-09-23 00:50:13 -07001284 skb_queue_walk_from(&(sk)->sk_write_queue, skb)
David S. Millerfe067e82007-03-07 12:12:44 -08001285
Ilpo Järvinen234b6862007-12-02 00:48:02 +02001286#define tcp_for_write_queue_from_safe(skb, tmp, sk) \
David S. Millercd07a8e2008-09-23 00:50:13 -07001287 skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
Ilpo Järvinen234b6862007-12-02 00:48:02 +02001288
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001289static inline struct sk_buff *tcp_send_head(const struct sock *sk)
David S. Millerfe067e82007-03-07 12:12:44 -08001290{
1291 return sk->sk_send_head;
1292}
1293
David S. Millercd07a8e2008-09-23 00:50:13 -07001294static inline bool tcp_skb_is_last(const struct sock *sk,
1295 const struct sk_buff *skb)
1296{
1297 return skb_queue_is_last(&sk->sk_write_queue, skb);
1298}
1299
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001300static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff *skb)
David S. Millerfe067e82007-03-07 12:12:44 -08001301{
David S. Millercd07a8e2008-09-23 00:50:13 -07001302 if (tcp_skb_is_last(sk, skb))
David S. Millerfe067e82007-03-07 12:12:44 -08001303 sk->sk_send_head = NULL;
David S. Millercd07a8e2008-09-23 00:50:13 -07001304 else
1305 sk->sk_send_head = tcp_write_queue_next(sk, skb);
David S. Millerfe067e82007-03-07 12:12:44 -08001306}
1307
1308static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked)
1309{
1310 if (sk->sk_send_head == skb_unlinked)
1311 sk->sk_send_head = NULL;
1312}
1313
1314static inline void tcp_init_send_head(struct sock *sk)
1315{
1316 sk->sk_send_head = NULL;
1317}
1318
1319static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
1320{
1321 __skb_queue_tail(&sk->sk_write_queue, skb);
1322}
1323
1324static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
1325{
1326 __tcp_add_write_queue_tail(sk, skb);
1327
1328 /* Queue it, remembering where we must start sending. */
Ilpo Järvinen6859d492007-12-02 00:48:06 +02001329 if (sk->sk_send_head == NULL) {
David S. Millerfe067e82007-03-07 12:12:44 -08001330 sk->sk_send_head = skb;
Ilpo Järvinen6859d492007-12-02 00:48:06 +02001331
1332 if (tcp_sk(sk)->highest_sack == NULL)
1333 tcp_sk(sk)->highest_sack = skb;
1334 }
David S. Millerfe067e82007-03-07 12:12:44 -08001335}
1336
1337static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *skb)
1338{
1339 __skb_queue_head(&sk->sk_write_queue, skb);
1340}
1341
1342/* Insert buff after skb on the write queue of sk. */
1343static inline void tcp_insert_write_queue_after(struct sk_buff *skb,
1344 struct sk_buff *buff,
1345 struct sock *sk)
1346{
Gerrit Renker7de6c032008-04-14 00:05:09 -07001347 __skb_queue_after(&sk->sk_write_queue, skb, buff);
David S. Millerfe067e82007-03-07 12:12:44 -08001348}
1349
David S. Miller43f59c82008-09-21 21:28:51 -07001350/* Insert new before skb on the write queue of sk. */
David S. Millerfe067e82007-03-07 12:12:44 -08001351static inline void tcp_insert_write_queue_before(struct sk_buff *new,
1352 struct sk_buff *skb,
1353 struct sock *sk)
1354{
David S. Miller43f59c82008-09-21 21:28:51 -07001355 __skb_queue_before(&sk->sk_write_queue, skb, new);
Ilpo Järvinen6e421412007-11-19 23:24:09 -08001356
1357 if (sk->sk_send_head == skb)
1358 sk->sk_send_head = new;
David S. Millerfe067e82007-03-07 12:12:44 -08001359}
1360
1361static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk)
1362{
1363 __skb_unlink(skb, &sk->sk_write_queue);
1364}
1365
David S. Millerfe067e82007-03-07 12:12:44 -08001366static inline int tcp_write_queue_empty(struct sock *sk)
1367{
1368 return skb_queue_empty(&sk->sk_write_queue);
1369}
1370
Krishna Kumar12d50c42009-12-08 22:26:13 +00001371static inline void tcp_push_pending_frames(struct sock *sk)
1372{
1373 if (tcp_send_head(sk)) {
1374 struct tcp_sock *tp = tcp_sk(sk);
1375
1376 __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle);
1377 }
1378}
1379
Neal Cardwellecb97192012-02-27 17:52:52 -05001380/* Start sequence of the skb just after the highest skb with SACKed
1381 * bit, valid only if sacked_out > 0 or when the caller has ensured
1382 * validity by itself.
Ilpo Järvinena47e5a92007-11-15 19:41:46 -08001383 */
1384static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp)
1385{
1386 if (!tp->sacked_out)
1387 return tp->snd_una;
Ilpo Järvinen6859d492007-12-02 00:48:06 +02001388
1389 if (tp->highest_sack == NULL)
1390 return tp->snd_nxt;
1391
Ilpo Järvinena47e5a92007-11-15 19:41:46 -08001392 return TCP_SKB_CB(tp->highest_sack)->seq;
1393}
1394
Ilpo Järvinen6859d492007-12-02 00:48:06 +02001395static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb)
1396{
1397 tcp_sk(sk)->highest_sack = tcp_skb_is_last(sk, skb) ? NULL :
1398 tcp_write_queue_next(sk, skb);
1399}
1400
1401static inline struct sk_buff *tcp_highest_sack(struct sock *sk)
1402{
1403 return tcp_sk(sk)->highest_sack;
1404}
1405
1406static inline void tcp_highest_sack_reset(struct sock *sk)
1407{
1408 tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk);
1409}
1410
1411/* Called when old skb is about to be deleted (to be combined with new skb) */
1412static inline void tcp_highest_sack_combine(struct sock *sk,
1413 struct sk_buff *old,
1414 struct sk_buff *new)
1415{
1416 if (tcp_sk(sk)->sacked_out && (old == tcp_sk(sk)->highest_sack))
1417 tcp_sk(sk)->highest_sack = new;
1418}
1419
Andreas Petlund5aa4b322010-02-18 02:45:45 +00001420/* Determines whether this is a thin stream (which may suffer from
1421 * increased latency). Used to trigger latency-reducing mechanisms.
1422 */
1423static inline unsigned int tcp_stream_is_thin(struct tcp_sock *tp)
1424{
1425 return tp->packets_out < 4 && !tcp_in_initial_slowstart(tp);
1426}
1427
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428/* /proc */
1429enum tcp_seq_states {
1430 TCP_SEQ_STATE_LISTENING,
1431 TCP_SEQ_STATE_OPENREQ,
1432 TCP_SEQ_STATE_ESTABLISHED,
1433 TCP_SEQ_STATE_TIME_WAIT,
1434};
1435
Arjan van de Ven73cb88e2011-10-30 06:46:30 +00001436int tcp_seq_open(struct inode *inode, struct file *file);
1437
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438struct tcp_seq_afinfo {
Arjan van de Ven73cb88e2011-10-30 06:46:30 +00001439 char *name;
1440 sa_family_t family;
1441 const struct file_operations *seq_fops;
1442 struct seq_operations seq_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443};
1444
1445struct tcp_iter_state {
Denis V. Luneva4146b12008-04-13 22:11:14 -07001446 struct seq_net_private p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447 sa_family_t family;
1448 enum tcp_seq_states state;
1449 struct sock *syn_wait_sk;
Tom Herberta8b690f2010-06-07 00:43:42 -07001450 int bucket, offset, sbucket, num, uid;
1451 loff_t last_pos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452};
1453
Daniel Lezcano6f8b13b2008-03-21 04:14:45 -07001454extern int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo);
1455extern void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001457extern struct request_sock_ops tcp_request_sock_ops;
Glenn Griffinc6aefaf2008-02-07 21:49:26 -08001458extern struct request_sock_ops tcp6_request_sock_ops;
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001459
Brian Haley7d06b2e2008-06-14 17:04:49 -07001460extern void tcp_v4_destroy_sock(struct sock *sk);
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001461
Herbert Xua430a432006-07-08 13:34:56 -07001462extern int tcp_v4_gso_send_check(struct sk_buff *skb);
Michał Mirosławc8f44af2011-11-15 15:29:55 +00001463extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
1464 netdev_features_t features);
Herbert Xubf296b12008-12-15 23:43:36 -08001465extern struct sk_buff **tcp_gro_receive(struct sk_buff **head,
1466 struct sk_buff *skb);
1467extern struct sk_buff **tcp4_gro_receive(struct sk_buff **head,
1468 struct sk_buff *skb);
1469extern int tcp_gro_complete(struct sk_buff *skb);
1470extern int tcp4_gro_complete(struct sk_buff *skb);
Herbert Xuf4c50d92006-06-22 03:02:40 -07001471
Robert Love2365d052008-05-12 17:08:29 -04001472extern int tcp_nuke_addr(struct net *net, struct sockaddr *addr);
1473
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001474#ifdef CONFIG_PROC_FS
Changli Gao53d31762010-07-10 20:41:06 +00001475extern int tcp4_proc_init(void);
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001476extern void tcp4_proc_exit(void);
1477#endif
1478
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001479/* TCP af-specific functions */
1480struct tcp_sock_af_ops {
1481#ifdef CONFIG_TCP_MD5SIG
1482 struct tcp_md5sig_key *(*md5_lookup) (struct sock *sk,
1483 struct sock *addr_sk);
1484 int (*calc_md5_hash) (char *location,
1485 struct tcp_md5sig_key *md5,
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001486 const struct sock *sk,
1487 const struct request_sock *req,
1488 const struct sk_buff *skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001489 int (*md5_parse) (struct sock *sk,
1490 char __user *optval,
1491 int optlen);
1492#endif
1493};
1494
1495struct tcp_request_sock_ops {
1496#ifdef CONFIG_TCP_MD5SIG
1497 struct tcp_md5sig_key *(*md5_lookup) (struct sock *sk,
1498 struct request_sock *req);
John Dykstrae3afe7b2009-07-16 05:04:51 +00001499 int (*calc_md5_hash) (char *location,
1500 struct tcp_md5sig_key *md5,
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001501 const struct sock *sk,
1502 const struct request_sock *req,
1503 const struct sk_buff *skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001504#endif
1505};
1506
William Allen Simpsonda5c78c2009-12-02 18:12:09 +00001507/* Using SHA1 for now, define some constants.
1508 */
1509#define COOKIE_DIGEST_WORDS (SHA_DIGEST_WORDS)
1510#define COOKIE_MESSAGE_WORDS (SHA_MESSAGE_BYTES / 4)
1511#define COOKIE_WORKSPACE_WORDS (COOKIE_DIGEST_WORDS + COOKIE_MESSAGE_WORDS)
1512
1513extern int tcp_cookie_generator(u32 *bakery);
1514
William Allen Simpson435cf552009-12-02 18:17:05 +00001515/**
1516 * struct tcp_cookie_values - each socket needs extra space for the
1517 * cookies, together with (optional) space for any SYN data.
1518 *
1519 * A tcp_sock contains a pointer to the current value, and this is
1520 * cloned to the tcp_timewait_sock.
1521 *
1522 * @cookie_pair: variable data from the option exchange.
1523 *
1524 * @cookie_desired: user specified tcpct_cookie_desired. Zero
1525 * indicates default (sysctl_tcp_cookie_size).
1526 * After cookie sent, remembers size of cookie.
1527 * Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX.
1528 *
1529 * @s_data_desired: user specified tcpct_s_data_desired. When the
1530 * constant payload is specified (@s_data_constant),
1531 * holds its length instead.
1532 * Range 0 to TCP_MSS_DESIRED.
1533 *
1534 * @s_data_payload: constant data that is to be included in the
1535 * payload of SYN or SYNACK segments when the
1536 * cookie option is present.
1537 */
1538struct tcp_cookie_values {
1539 struct kref kref;
1540 u8 cookie_pair[TCP_COOKIE_PAIR_SIZE];
1541 u8 cookie_pair_size;
1542 u8 cookie_desired;
1543 u16 s_data_desired:11,
1544 s_data_constant:1,
1545 s_data_in:1,
1546 s_data_out:1,
1547 s_data_unused:2;
1548 u8 s_data_payload[0];
1549};
1550
1551static inline void tcp_cookie_values_release(struct kref *kref)
1552{
1553 kfree(container_of(kref, struct tcp_cookie_values, kref));
1554}
1555
1556/* The length of constant payload data. Note that s_data_desired is
1557 * overloaded, depending on s_data_constant: either the length of constant
1558 * data (returned here) or the limit on variable data.
1559 */
1560static inline int tcp_s_data_size(const struct tcp_sock *tp)
1561{
1562 return (tp->cookie_values != NULL && tp->cookie_values->s_data_constant)
1563 ? tp->cookie_values->s_data_desired
1564 : 0;
1565}
1566
1567/**
1568 * struct tcp_extend_values - tcp_ipv?.c to tcp_output.c workspace.
1569 *
1570 * As tcp_request_sock has already been extended in other places, the
1571 * only remaining method is to pass stack values along as function
1572 * parameters. These parameters are not needed after sending SYNACK.
1573 *
1574 * @cookie_bakery: cryptographic secret and message workspace.
1575 *
1576 * @cookie_plus: bytes in authenticator/cookie option, copied from
1577 * struct tcp_options_received (above).
1578 */
1579struct tcp_extend_values {
1580 struct request_values rv;
1581 u32 cookie_bakery[COOKIE_WORKSPACE_WORDS];
1582 u8 cookie_plus:6,
1583 cookie_out_never:1,
1584 cookie_in_always:1;
1585};
1586
1587static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp)
1588{
1589 return (struct tcp_extend_values *)rvp;
1590}
1591
Denis V. Lunev9b0f9762008-02-29 11:13:15 -08001592extern void tcp_v4_init(void);
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001593extern void tcp_init(void);
1594
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595#endif /* _TCP_H */