| Vlad Yasevich | 60c778b | 2008-01-11 09:57:09 -0500 | [diff] [blame] | 1 | /* SCTP kernel implementation | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2 |  * Copyright (c) 1999-2000 Cisco, Inc. | 
 | 3 |  * Copyright (c) 1999-2001 Motorola, Inc. | 
 | 4 |  * Copyright (c) 2001-2003 International Business Machines Corp. | 
 | 5 |  * Copyright (c) 2001 Intel Corp. | 
 | 6 |  * Copyright (c) 2001 La Monte H.P. Yarroll | 
 | 7 |  * | 
| Vlad Yasevich | 60c778b | 2008-01-11 09:57:09 -0500 | [diff] [blame] | 8 |  * This file is part of the SCTP kernel implementation | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 9 |  * | 
 | 10 |  * This module provides the abstraction for an SCTP tranport representing | 
 | 11 |  * a remote transport address.  For local transport addresses, we just use | 
 | 12 |  * union sctp_addr. | 
 | 13 |  * | 
| Vlad Yasevich | 60c778b | 2008-01-11 09:57:09 -0500 | [diff] [blame] | 14 |  * This SCTP implementation is free software; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 15 |  * you can redistribute it and/or modify it under the terms of | 
 | 16 |  * the GNU General Public License as published by | 
 | 17 |  * the Free Software Foundation; either version 2, or (at your option) | 
 | 18 |  * any later version. | 
 | 19 |  * | 
| Vlad Yasevich | 60c778b | 2008-01-11 09:57:09 -0500 | [diff] [blame] | 20 |  * This SCTP implementation is distributed in the hope that it | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 21 |  * will be useful, but WITHOUT ANY WARRANTY; without even the implied | 
 | 22 |  *                 ************************ | 
 | 23 |  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | 
 | 24 |  * See the GNU General Public License for more details. | 
 | 25 |  * | 
 | 26 |  * You should have received a copy of the GNU General Public License | 
 | 27 |  * along with GNU CC; see the file COPYING.  If not, write to | 
 | 28 |  * the Free Software Foundation, 59 Temple Place - Suite 330, | 
 | 29 |  * Boston, MA 02111-1307, USA. | 
 | 30 |  * | 
 | 31 |  * Please send any bug reports or fixes you make to the | 
 | 32 |  * email address(es): | 
 | 33 |  *    lksctp developers <lksctp-developers@lists.sourceforge.net> | 
 | 34 |  * | 
 | 35 |  * Or submit a bug report through the following website: | 
 | 36 |  *    http://www.sf.net/projects/lksctp | 
 | 37 |  * | 
 | 38 |  * Written or modified by: | 
 | 39 |  *    La Monte H.P. Yarroll <piggy@acm.org> | 
 | 40 |  *    Karl Knutson          <karl@athena.chicago.il.us> | 
 | 41 |  *    Jon Grimm             <jgrimm@us.ibm.com> | 
 | 42 |  *    Xingang Guo           <xingang.guo@intel.com> | 
 | 43 |  *    Hui Huang             <hui.huang@nokia.com> | 
 | 44 |  *    Sridhar Samudrala	    <sri@us.ibm.com> | 
 | 45 |  *    Ardelle Fan	    <ardelle.fan@intel.com> | 
 | 46 |  * | 
 | 47 |  * Any bugs reported given to us we will try to fix... any fixes shared will | 
 | 48 |  * be incorporated into the next SCTP release. | 
 | 49 |  */ | 
 | 50 |  | 
 | 51 | #include <linux/types.h> | 
| Sridhar Samudrala | ad8fec1 | 2006-07-21 14:48:50 -0700 | [diff] [blame] | 52 | #include <linux/random.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 53 | #include <net/sctp/sctp.h> | 
 | 54 | #include <net/sctp/sm.h> | 
 | 55 |  | 
 | 56 | /* 1st Level Abstractions.  */ | 
 | 57 |  | 
 | 58 | /* Initialize a new transport from provided memory.  */ | 
 | 59 | static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, | 
 | 60 | 						  const union sctp_addr *addr, | 
| Al Viro | dd0fc66 | 2005-10-07 07:46:04 +0100 | [diff] [blame] | 61 | 						  gfp_t gfp) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 62 | { | 
 | 63 | 	/* Copy in the address.  */ | 
 | 64 | 	peer->ipaddr = *addr; | 
 | 65 | 	peer->af_specific = sctp_get_af_specific(addr->sa.sa_family); | 
 | 66 | 	peer->asoc = NULL; | 
 | 67 |  | 
 | 68 | 	peer->dst = NULL; | 
 | 69 | 	memset(&peer->saddr, 0, sizeof(union sctp_addr)); | 
 | 70 |  | 
 | 71 | 	/* From 6.3.1 RTO Calculation: | 
 | 72 | 	 * | 
 | 73 | 	 * C1) Until an RTT measurement has been made for a packet sent to the | 
 | 74 | 	 * given destination transport address, set RTO to the protocol | 
 | 75 | 	 * parameter 'RTO.Initial'. | 
 | 76 | 	 */ | 
| Vlad Yasevich | b6157d8 | 2007-10-24 15:59:16 -0400 | [diff] [blame] | 77 | 	peer->last_rto = peer->rto = msecs_to_jiffies(sctp_rto_initial); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 78 | 	peer->rtt = 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 79 | 	peer->rttvar = 0; | 
 | 80 | 	peer->srtt = 0; | 
 | 81 | 	peer->rto_pending = 0; | 
| Vlad Yasevich | a646523 | 2008-06-04 12:38:43 -0700 | [diff] [blame] | 82 | 	peer->fast_recovery = 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 83 |  | 
 | 84 | 	peer->last_time_heard = jiffies; | 
 | 85 | 	peer->last_time_used = jiffies; | 
 | 86 | 	peer->last_time_ecne_reduced = jiffies; | 
 | 87 |  | 
| Frank Filz | 3f7a87d | 2005-06-20 13:14:57 -0700 | [diff] [blame] | 88 | 	peer->init_sent_count = 0; | 
 | 89 |  | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 90 | 	peer->param_flags = SPP_HB_DISABLE | | 
 | 91 | 			    SPP_PMTUD_ENABLE | | 
 | 92 | 			    SPP_SACKDELAY_ENABLE; | 
 | 93 | 	peer->hbinterval  = 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 94 |  | 
 | 95 | 	/* Initialize the default path max_retrans.  */ | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 96 | 	peer->pathmaxrxt  = sctp_max_retrans_path; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 97 | 	peer->error_count = 0; | 
 | 98 |  | 
 | 99 | 	INIT_LIST_HEAD(&peer->transmitted); | 
 | 100 | 	INIT_LIST_HEAD(&peer->send_ready); | 
 | 101 | 	INIT_LIST_HEAD(&peer->transports); | 
 | 102 |  | 
| Florian Westphal | 6d0ccba | 2008-07-18 23:04:39 -0700 | [diff] [blame] | 103 | 	peer->T3_rtx_timer.expires = 0; | 
 | 104 | 	peer->hb_timer.expires = 0; | 
 | 105 |  | 
| Pavel Emelyanov | b24b8a2 | 2008-01-23 21:20:07 -0800 | [diff] [blame] | 106 | 	setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, | 
 | 107 | 			(unsigned long)peer); | 
 | 108 | 	setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event, | 
 | 109 | 			(unsigned long)peer); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 110 |  | 
| Sridhar Samudrala | ad8fec1 | 2006-07-21 14:48:50 -0700 | [diff] [blame] | 111 | 	/* Initialize the 64-bit random nonce sent with heartbeat. */ | 
 | 112 | 	get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); | 
 | 113 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 114 | 	atomic_set(&peer->refcnt, 1); | 
 | 115 | 	peer->dead = 0; | 
 | 116 |  | 
 | 117 | 	peer->malloced = 0; | 
 | 118 |  | 
 | 119 | 	/* Initialize the state information for SFR-CACC */ | 
 | 120 | 	peer->cacc.changeover_active = 0; | 
 | 121 | 	peer->cacc.cycling_changeover = 0; | 
 | 122 | 	peer->cacc.next_tsn_at_change = 0; | 
 | 123 | 	peer->cacc.cacc_saw_newack = 0; | 
 | 124 |  | 
 | 125 | 	return peer; | 
 | 126 | } | 
 | 127 |  | 
 | 128 | /* Allocate and initialize a new transport.  */ | 
| Alexey Dobriyan | 3182cd8 | 2005-07-11 20:57:47 -0700 | [diff] [blame] | 129 | struct sctp_transport *sctp_transport_new(const union sctp_addr *addr, | 
| Al Viro | dd0fc66 | 2005-10-07 07:46:04 +0100 | [diff] [blame] | 130 | 					  gfp_t gfp) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 131 | { | 
| YOSHIFUJI Hideaki | d808ad9 | 2007-02-09 23:25:18 +0900 | [diff] [blame] | 132 | 	struct sctp_transport *transport; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 133 |  | 
| YOSHIFUJI Hideaki | d808ad9 | 2007-02-09 23:25:18 +0900 | [diff] [blame] | 134 | 	transport = t_new(struct sctp_transport, gfp); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 135 | 	if (!transport) | 
 | 136 | 		goto fail; | 
 | 137 |  | 
 | 138 | 	if (!sctp_transport_init(transport, addr, gfp)) | 
 | 139 | 		goto fail_init; | 
 | 140 |  | 
 | 141 | 	transport->malloced = 1; | 
 | 142 | 	SCTP_DBG_OBJCNT_INC(transport); | 
 | 143 |  | 
 | 144 | 	return transport; | 
 | 145 |  | 
 | 146 | fail_init: | 
 | 147 | 	kfree(transport); | 
 | 148 |  | 
 | 149 | fail: | 
 | 150 | 	return NULL; | 
 | 151 | } | 
 | 152 |  | 
 | 153 | /* This transport is no longer needed.  Free up if possible, or | 
 | 154 |  * delay until it last reference count. | 
 | 155 |  */ | 
 | 156 | void sctp_transport_free(struct sctp_transport *transport) | 
 | 157 | { | 
 | 158 | 	transport->dead = 1; | 
 | 159 |  | 
 | 160 | 	/* Try to delete the heartbeat timer.  */ | 
 | 161 | 	if (del_timer(&transport->hb_timer)) | 
 | 162 | 		sctp_transport_put(transport); | 
 | 163 |  | 
 | 164 | 	/* Delete the T3_rtx timer if it's active. | 
 | 165 | 	 * There is no point in not doing this now and letting | 
 | 166 | 	 * structure hang around in memory since we know | 
 | 167 | 	 * the tranport is going away. | 
 | 168 | 	 */ | 
 | 169 | 	if (timer_pending(&transport->T3_rtx_timer) && | 
 | 170 | 	    del_timer(&transport->T3_rtx_timer)) | 
 | 171 | 		sctp_transport_put(transport); | 
 | 172 |  | 
 | 173 |  | 
 | 174 | 	sctp_transport_put(transport); | 
 | 175 | } | 
 | 176 |  | 
 | 177 | /* Destroy the transport data structure. | 
 | 178 |  * Assumes there are no more users of this structure. | 
 | 179 |  */ | 
 | 180 | static void sctp_transport_destroy(struct sctp_transport *transport) | 
 | 181 | { | 
 | 182 | 	SCTP_ASSERT(transport->dead, "Transport is not dead", return); | 
 | 183 |  | 
 | 184 | 	if (transport->asoc) | 
 | 185 | 		sctp_association_put(transport->asoc); | 
 | 186 |  | 
| YOSHIFUJI Hideaki | d808ad9 | 2007-02-09 23:25:18 +0900 | [diff] [blame] | 187 | 	sctp_packet_free(&transport->packet); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 188 |  | 
 | 189 | 	dst_release(transport->dst); | 
 | 190 | 	kfree(transport); | 
 | 191 | 	SCTP_DBG_OBJCNT_DEC(transport); | 
 | 192 | } | 
 | 193 |  | 
 | 194 | /* Start T3_rtx timer if it is not already running and update the heartbeat | 
 | 195 |  * timer.  This routine is called every time a DATA chunk is sent. | 
 | 196 |  */ | 
| Vlad Yasevich | 62aeaff | 2008-06-04 12:39:11 -0700 | [diff] [blame] | 197 | void sctp_transport_reset_timers(struct sctp_transport *transport, int force) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 198 | { | 
 | 199 | 	/* RFC 2960 6.3.2 Retransmission Timer Rules | 
 | 200 | 	 * | 
 | 201 | 	 * R1) Every time a DATA chunk is sent to any address(including a | 
 | 202 | 	 * retransmission), if the T3-rtx timer of that address is not running | 
 | 203 | 	 * start it running so that it will expire after the RTO of that | 
 | 204 | 	 * address. | 
 | 205 | 	 */ | 
 | 206 |  | 
| Vlad Yasevich | 62aeaff | 2008-06-04 12:39:11 -0700 | [diff] [blame] | 207 | 	if (force || !timer_pending(&transport->T3_rtx_timer)) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 208 | 		if (!mod_timer(&transport->T3_rtx_timer, | 
 | 209 | 			       jiffies + transport->rto)) | 
 | 210 | 			sctp_transport_hold(transport); | 
 | 211 |  | 
 | 212 | 	/* When a data chunk is sent, reset the heartbeat interval.  */ | 
 | 213 | 	if (!mod_timer(&transport->hb_timer, | 
 | 214 | 		       sctp_transport_timeout(transport))) | 
 | 215 | 	    sctp_transport_hold(transport); | 
 | 216 | } | 
 | 217 |  | 
 | 218 | /* This transport has been assigned to an association. | 
 | 219 |  * Initialize fields from the association or from the sock itself. | 
 | 220 |  * Register the reference count in the association. | 
 | 221 |  */ | 
 | 222 | void sctp_transport_set_owner(struct sctp_transport *transport, | 
 | 223 | 			      struct sctp_association *asoc) | 
 | 224 | { | 
 | 225 | 	transport->asoc = asoc; | 
 | 226 | 	sctp_association_hold(asoc); | 
 | 227 | } | 
 | 228 |  | 
 | 229 | /* Initialize the pmtu of a transport. */ | 
 | 230 | void sctp_transport_pmtu(struct sctp_transport *transport) | 
 | 231 | { | 
 | 232 | 	struct dst_entry *dst; | 
 | 233 |  | 
 | 234 | 	dst = transport->af_specific->get_dst(NULL, &transport->ipaddr, NULL); | 
 | 235 |  | 
 | 236 | 	if (dst) { | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 237 | 		transport->pathmtu = dst_mtu(dst); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 238 | 		dst_release(dst); | 
 | 239 | 	} else | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 240 | 		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 241 | } | 
 | 242 |  | 
| Vlad Yasevich | c910b47 | 2007-06-07 13:47:03 -0400 | [diff] [blame] | 243 | /* this is a complete rip-off from __sk_dst_check | 
 | 244 |  * the cookie is always 0 since this is how it's used in the | 
 | 245 |  * pmtu code | 
 | 246 |  */ | 
 | 247 | static struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t) | 
 | 248 | { | 
 | 249 | 	struct dst_entry *dst = t->dst; | 
 | 250 |  | 
 | 251 | 	if (dst && dst->obsolete && dst->ops->check(dst, 0) == NULL) { | 
 | 252 | 		dst_release(t->dst); | 
 | 253 | 		t->dst = NULL; | 
 | 254 | 		return NULL; | 
 | 255 | 	} | 
 | 256 |  | 
 | 257 | 	return dst; | 
 | 258 | } | 
 | 259 |  | 
 | 260 | void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) | 
 | 261 | { | 
 | 262 | 	struct dst_entry *dst; | 
 | 263 |  | 
 | 264 | 	if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { | 
 | 265 | 		printk(KERN_WARNING "%s: Reported pmtu %d too low, " | 
 | 266 | 		       "using default minimum of %d\n", | 
| Harvey Harrison | 0dc4787 | 2008-03-05 20:47:47 -0800 | [diff] [blame] | 267 | 		       __func__, pmtu, | 
| Vlad Yasevich | c910b47 | 2007-06-07 13:47:03 -0400 | [diff] [blame] | 268 | 		       SCTP_DEFAULT_MINSEGMENT); | 
 | 269 | 		/* Use default minimum segment size and disable | 
 | 270 | 		 * pmtu discovery on this transport. | 
 | 271 | 		 */ | 
 | 272 | 		t->pathmtu = SCTP_DEFAULT_MINSEGMENT; | 
| Vlad Yasevich | c910b47 | 2007-06-07 13:47:03 -0400 | [diff] [blame] | 273 | 	} else { | 
 | 274 | 		t->pathmtu = pmtu; | 
 | 275 | 	} | 
 | 276 |  | 
 | 277 | 	dst = sctp_transport_dst_check(t); | 
 | 278 | 	if (dst) | 
 | 279 | 		dst->ops->update_pmtu(dst, pmtu); | 
 | 280 | } | 
 | 281 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 282 | /* Caches the dst entry and source address for a transport's destination | 
 | 283 |  * address. | 
 | 284 |  */ | 
 | 285 | void sctp_transport_route(struct sctp_transport *transport, | 
 | 286 | 			  union sctp_addr *saddr, struct sctp_sock *opt) | 
 | 287 | { | 
 | 288 | 	struct sctp_association *asoc = transport->asoc; | 
 | 289 | 	struct sctp_af *af = transport->af_specific; | 
 | 290 | 	union sctp_addr *daddr = &transport->ipaddr; | 
 | 291 | 	struct dst_entry *dst; | 
 | 292 |  | 
 | 293 | 	dst = af->get_dst(asoc, daddr, saddr); | 
 | 294 |  | 
 | 295 | 	if (saddr) | 
 | 296 | 		memcpy(&transport->saddr, saddr, sizeof(union sctp_addr)); | 
 | 297 | 	else | 
| YOSHIFUJI Hideaki | e511710 | 2008-05-29 19:55:05 +0900 | [diff] [blame] | 298 | 		af->get_saddr(opt, asoc, dst, daddr, &transport->saddr); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 299 |  | 
 | 300 | 	transport->dst = dst; | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 301 | 	if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) { | 
 | 302 | 		return; | 
 | 303 | 	} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 304 | 	if (dst) { | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 305 | 		transport->pathmtu = dst_mtu(dst); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 306 |  | 
 | 307 | 		/* Initialize sk->sk_rcv_saddr, if the transport is the | 
 | 308 | 		 * association's active path for getsockname(). | 
| YOSHIFUJI Hideaki | d808ad9 | 2007-02-09 23:25:18 +0900 | [diff] [blame] | 309 | 		 */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 310 | 		if (asoc && (transport == asoc->peer.active_path)) | 
| Neil Horman | bf031ff | 2005-12-02 20:32:29 -0800 | [diff] [blame] | 311 | 			opt->pf->af->to_sk_saddr(&transport->saddr, | 
 | 312 | 						 asoc->base.sk); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 313 | 	} else | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 314 | 		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 315 | } | 
 | 316 |  | 
 | 317 | /* Hold a reference to a transport.  */ | 
 | 318 | void sctp_transport_hold(struct sctp_transport *transport) | 
 | 319 | { | 
 | 320 | 	atomic_inc(&transport->refcnt); | 
 | 321 | } | 
 | 322 |  | 
 | 323 | /* Release a reference to a transport and clean up | 
 | 324 |  * if there are no more references. | 
 | 325 |  */ | 
 | 326 | void sctp_transport_put(struct sctp_transport *transport) | 
 | 327 | { | 
 | 328 | 	if (atomic_dec_and_test(&transport->refcnt)) | 
 | 329 | 		sctp_transport_destroy(transport); | 
 | 330 | } | 
 | 331 |  | 
 | 332 | /* Update transport's RTO based on the newly calculated RTT. */ | 
 | 333 | void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) | 
 | 334 | { | 
 | 335 | 	/* Check for valid transport.  */ | 
 | 336 | 	SCTP_ASSERT(tp, "NULL transport", return); | 
 | 337 |  | 
 | 338 | 	/* We should not be doing any RTO updates unless rto_pending is set.  */ | 
 | 339 | 	SCTP_ASSERT(tp->rto_pending, "rto_pending not set", return); | 
 | 340 |  | 
 | 341 | 	if (tp->rttvar || tp->srtt) { | 
 | 342 | 		/* 6.3.1 C3) When a new RTT measurement R' is made, set | 
 | 343 | 		 * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'| | 
 | 344 | 		 * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R' | 
 | 345 | 		 */ | 
 | 346 |  | 
 | 347 | 		/* Note:  The above algorithm has been rewritten to | 
 | 348 | 		 * express rto_beta and rto_alpha as inverse powers | 
 | 349 | 		 * of two. | 
 | 350 | 		 * For example, assuming the default value of RTO.Alpha of | 
 | 351 | 		 * 1/8, rto_alpha would be expressed as 3. | 
 | 352 | 		 */ | 
 | 353 | 		tp->rttvar = tp->rttvar - (tp->rttvar >> sctp_rto_beta) | 
 | 354 | 			+ ((abs(tp->srtt - rtt)) >> sctp_rto_beta); | 
 | 355 | 		tp->srtt = tp->srtt - (tp->srtt >> sctp_rto_alpha) | 
 | 356 | 			+ (rtt >> sctp_rto_alpha); | 
 | 357 | 	} else { | 
 | 358 | 		/* 6.3.1 C2) When the first RTT measurement R is made, set | 
 | 359 | 		 * SRTT <- R, RTTVAR <- R/2. | 
 | 360 | 		 */ | 
 | 361 | 		tp->srtt = rtt; | 
 | 362 | 		tp->rttvar = rtt >> 1; | 
 | 363 | 	} | 
 | 364 |  | 
 | 365 | 	/* 6.3.1 G1) Whenever RTTVAR is computed, if RTTVAR = 0, then | 
 | 366 | 	 * adjust RTTVAR <- G, where G is the CLOCK GRANULARITY. | 
 | 367 | 	 */ | 
 | 368 | 	if (tp->rttvar == 0) | 
 | 369 | 		tp->rttvar = SCTP_CLOCK_GRANULARITY; | 
 | 370 |  | 
 | 371 | 	/* 6.3.1 C3) After the computation, update RTO <- SRTT + 4 * RTTVAR. */ | 
 | 372 | 	tp->rto = tp->srtt + (tp->rttvar << 2); | 
 | 373 |  | 
 | 374 | 	/* 6.3.1 C6) Whenever RTO is computed, if it is less than RTO.Min | 
 | 375 | 	 * seconds then it is rounded up to RTO.Min seconds. | 
 | 376 | 	 */ | 
 | 377 | 	if (tp->rto < tp->asoc->rto_min) | 
 | 378 | 		tp->rto = tp->asoc->rto_min; | 
 | 379 |  | 
 | 380 | 	/* 6.3.1 C7) A maximum value may be placed on RTO provided it is | 
 | 381 | 	 * at least RTO.max seconds. | 
 | 382 | 	 */ | 
 | 383 | 	if (tp->rto > tp->asoc->rto_max) | 
 | 384 | 		tp->rto = tp->asoc->rto_max; | 
 | 385 |  | 
 | 386 | 	tp->rtt = rtt; | 
| Vlad Yasevich | b6157d8 | 2007-10-24 15:59:16 -0400 | [diff] [blame] | 387 | 	tp->last_rto = tp->rto; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 388 |  | 
 | 389 | 	/* Reset rto_pending so that a new RTT measurement is started when a | 
 | 390 | 	 * new data chunk is sent. | 
 | 391 | 	 */ | 
 | 392 | 	tp->rto_pending = 0; | 
 | 393 |  | 
 | 394 | 	SCTP_DEBUG_PRINTK("%s: transport: %p, rtt: %d, srtt: %d " | 
| Harvey Harrison | 0dc4787 | 2008-03-05 20:47:47 -0800 | [diff] [blame] | 395 | 			  "rttvar: %d, rto: %ld\n", __func__, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 396 | 			  tp, rtt, tp->srtt, tp->rttvar, tp->rto); | 
 | 397 | } | 
 | 398 |  | 
 | 399 | /* This routine updates the transport's cwnd and partial_bytes_acked | 
 | 400 |  * parameters based on the bytes acked in the received SACK. | 
 | 401 |  */ | 
 | 402 | void sctp_transport_raise_cwnd(struct sctp_transport *transport, | 
 | 403 | 			       __u32 sack_ctsn, __u32 bytes_acked) | 
 | 404 | { | 
 | 405 | 	__u32 cwnd, ssthresh, flight_size, pba, pmtu; | 
 | 406 |  | 
 | 407 | 	cwnd = transport->cwnd; | 
 | 408 | 	flight_size = transport->flight_size; | 
 | 409 |  | 
| Vlad Yasevich | a646523 | 2008-06-04 12:38:43 -0700 | [diff] [blame] | 410 | 	/* See if we need to exit Fast Recovery first */ | 
 | 411 | 	if (transport->fast_recovery && | 
 | 412 | 	    TSN_lte(transport->fast_recovery_exit, sack_ctsn)) | 
 | 413 | 		transport->fast_recovery = 0; | 
 | 414 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 415 | 	/* The appropriate cwnd increase algorithm is performed if, and only | 
| Vlad Yasevich | a646523 | 2008-06-04 12:38:43 -0700 | [diff] [blame] | 416 | 	 * if the cumulative TSN whould advanced and the congestion window is | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 417 | 	 * being fully utilized. | 
 | 418 | 	 */ | 
| Vlad Yasevich | a646523 | 2008-06-04 12:38:43 -0700 | [diff] [blame] | 419 | 	if (TSN_lte(sack_ctsn, transport->asoc->ctsn_ack_point) || | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 420 | 	    (flight_size < cwnd)) | 
 | 421 | 		return; | 
 | 422 |  | 
 | 423 | 	ssthresh = transport->ssthresh; | 
 | 424 | 	pba = transport->partial_bytes_acked; | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 425 | 	pmtu = transport->asoc->pathmtu; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 426 |  | 
 | 427 | 	if (cwnd <= ssthresh) { | 
| Vlad Yasevich | a646523 | 2008-06-04 12:38:43 -0700 | [diff] [blame] | 428 | 		/* RFC 4960 7.2.1 | 
 | 429 | 		 * o  When cwnd is less than or equal to ssthresh, an SCTP | 
 | 430 | 		 *    endpoint MUST use the slow-start algorithm to increase | 
 | 431 | 		 *    cwnd only if the current congestion window is being fully | 
 | 432 | 		 *    utilized, an incoming SACK advances the Cumulative TSN | 
 | 433 | 		 *    Ack Point, and the data sender is not in Fast Recovery. | 
 | 434 | 		 *    Only when these three conditions are met can the cwnd be | 
 | 435 | 		 *    increased; otherwise, the cwnd MUST not be increased. | 
 | 436 | 		 *    If these conditions are met, then cwnd MUST be increased | 
 | 437 | 		 *    by, at most, the lesser of 1) the total size of the | 
 | 438 | 		 *    previously outstanding DATA chunk(s) acknowledged, and | 
 | 439 | 		 *    2) the destination's path MTU.  This upper bound protects | 
 | 440 | 		 *    against the ACK-Splitting attack outlined in [SAVAGE99]. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 441 | 		 */ | 
| Vlad Yasevich | a646523 | 2008-06-04 12:38:43 -0700 | [diff] [blame] | 442 | 		if (transport->fast_recovery) | 
 | 443 | 			return; | 
 | 444 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 445 | 		if (bytes_acked > pmtu) | 
 | 446 | 			cwnd += pmtu; | 
 | 447 | 		else | 
 | 448 | 			cwnd += bytes_acked; | 
 | 449 | 		SCTP_DEBUG_PRINTK("%s: SLOW START: transport: %p, " | 
 | 450 | 				  "bytes_acked: %d, cwnd: %d, ssthresh: %d, " | 
 | 451 | 				  "flight_size: %d, pba: %d\n", | 
| Harvey Harrison | 0dc4787 | 2008-03-05 20:47:47 -0800 | [diff] [blame] | 452 | 				  __func__, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 453 | 				  transport, bytes_acked, cwnd, | 
 | 454 | 				  ssthresh, flight_size, pba); | 
 | 455 | 	} else { | 
 | 456 | 		/* RFC 2960 7.2.2 Whenever cwnd is greater than ssthresh, | 
 | 457 | 		 * upon each SACK arrival that advances the Cumulative TSN Ack | 
 | 458 | 		 * Point, increase partial_bytes_acked by the total number of | 
 | 459 | 		 * bytes of all new chunks acknowledged in that SACK including | 
 | 460 | 		 * chunks acknowledged by the new Cumulative TSN Ack and by | 
 | 461 | 		 * Gap Ack Blocks. | 
 | 462 | 		 * | 
 | 463 | 		 * When partial_bytes_acked is equal to or greater than cwnd | 
 | 464 | 		 * and before the arrival of the SACK the sender had cwnd or | 
 | 465 | 		 * more bytes of data outstanding (i.e., before arrival of the | 
 | 466 | 		 * SACK, flightsize was greater than or equal to cwnd), | 
 | 467 | 		 * increase cwnd by MTU, and reset partial_bytes_acked to | 
 | 468 | 		 * (partial_bytes_acked - cwnd). | 
 | 469 | 		 */ | 
 | 470 | 		pba += bytes_acked; | 
 | 471 | 		if (pba >= cwnd) { | 
 | 472 | 			cwnd += pmtu; | 
 | 473 | 			pba = ((cwnd < pba) ? (pba - cwnd) : 0); | 
 | 474 | 		} | 
 | 475 | 		SCTP_DEBUG_PRINTK("%s: CONGESTION AVOIDANCE: " | 
 | 476 | 				  "transport: %p, bytes_acked: %d, cwnd: %d, " | 
 | 477 | 				  "ssthresh: %d, flight_size: %d, pba: %d\n", | 
| Harvey Harrison | 0dc4787 | 2008-03-05 20:47:47 -0800 | [diff] [blame] | 478 | 				  __func__, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 479 | 				  transport, bytes_acked, cwnd, | 
 | 480 | 				  ssthresh, flight_size, pba); | 
 | 481 | 	} | 
 | 482 |  | 
 | 483 | 	transport->cwnd = cwnd; | 
 | 484 | 	transport->partial_bytes_acked = pba; | 
 | 485 | } | 
 | 486 |  | 
 | 487 | /* This routine is used to lower the transport's cwnd when congestion is | 
 | 488 |  * detected. | 
 | 489 |  */ | 
 | 490 | void sctp_transport_lower_cwnd(struct sctp_transport *transport, | 
 | 491 | 			       sctp_lower_cwnd_t reason) | 
 | 492 | { | 
 | 493 | 	switch (reason) { | 
 | 494 | 	case SCTP_LOWER_CWND_T3_RTX: | 
 | 495 | 		/* RFC 2960 Section 7.2.3, sctpimpguide | 
 | 496 | 		 * When the T3-rtx timer expires on an address, SCTP should | 
 | 497 | 		 * perform slow start by: | 
 | 498 | 		 *      ssthresh = max(cwnd/2, 4*MTU) | 
 | 499 | 		 *      cwnd = 1*MTU | 
 | 500 | 		 *      partial_bytes_acked = 0 | 
 | 501 | 		 */ | 
 | 502 | 		transport->ssthresh = max(transport->cwnd/2, | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 503 | 					  4*transport->asoc->pathmtu); | 
 | 504 | 		transport->cwnd = transport->asoc->pathmtu; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 505 | 		break; | 
 | 506 |  | 
 | 507 | 	case SCTP_LOWER_CWND_FAST_RTX: | 
 | 508 | 		/* RFC 2960 7.2.4 Adjust the ssthresh and cwnd of the | 
 | 509 | 		 * destination address(es) to which the missing DATA chunks | 
 | 510 | 		 * were last sent, according to the formula described in | 
 | 511 | 		 * Section 7.2.3. | 
| YOSHIFUJI Hideaki | d808ad9 | 2007-02-09 23:25:18 +0900 | [diff] [blame] | 512 | 		 * | 
 | 513 | 		 * RFC 2960 7.2.3, sctpimpguide Upon detection of packet | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 514 | 		 * losses from SACK (see Section 7.2.4), An endpoint | 
 | 515 | 		 * should do the following: | 
 | 516 | 		 *      ssthresh = max(cwnd/2, 4*MTU) | 
 | 517 | 		 *      cwnd = ssthresh | 
 | 518 | 		 *      partial_bytes_acked = 0 | 
 | 519 | 		 */ | 
| Vlad Yasevich | a646523 | 2008-06-04 12:38:43 -0700 | [diff] [blame] | 520 | 		if (transport->fast_recovery) | 
 | 521 | 			return; | 
 | 522 |  | 
 | 523 | 		/* Mark Fast recovery */ | 
 | 524 | 		transport->fast_recovery = 1; | 
 | 525 | 		transport->fast_recovery_exit = transport->asoc->next_tsn - 1; | 
 | 526 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 527 | 		transport->ssthresh = max(transport->cwnd/2, | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 528 | 					  4*transport->asoc->pathmtu); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 529 | 		transport->cwnd = transport->ssthresh; | 
 | 530 | 		break; | 
 | 531 |  | 
 | 532 | 	case SCTP_LOWER_CWND_ECNE: | 
 | 533 | 		/* RFC 2481 Section 6.1.2. | 
 | 534 | 		 * If the sender receives an ECN-Echo ACK packet | 
 | 535 | 		 * then the sender knows that congestion was encountered in the | 
 | 536 | 		 * network on the path from the sender to the receiver. The | 
 | 537 | 		 * indication of congestion should be treated just as a | 
 | 538 | 		 * congestion loss in non-ECN Capable TCP. That is, the TCP | 
 | 539 | 		 * source halves the congestion window "cwnd" and reduces the | 
 | 540 | 		 * slow start threshold "ssthresh". | 
 | 541 | 		 * A critical condition is that TCP does not react to | 
 | 542 | 		 * congestion indications more than once every window of | 
 | 543 | 		 * data (or more loosely more than once every round-trip time). | 
 | 544 | 		 */ | 
 | 545 | 		if ((jiffies - transport->last_time_ecne_reduced) > | 
 | 546 | 		    transport->rtt) { | 
 | 547 | 			transport->ssthresh = max(transport->cwnd/2, | 
| YOSHIFUJI Hideaki | d808ad9 | 2007-02-09 23:25:18 +0900 | [diff] [blame] | 548 | 						  4*transport->asoc->pathmtu); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 549 | 			transport->cwnd = transport->ssthresh; | 
 | 550 | 			transport->last_time_ecne_reduced = jiffies; | 
 | 551 | 		} | 
 | 552 | 		break; | 
 | 553 |  | 
 | 554 | 	case SCTP_LOWER_CWND_INACTIVE: | 
 | 555 | 		/* RFC 2960 Section 7.2.1, sctpimpguide | 
 | 556 | 		 * When the endpoint does not transmit data on a given | 
 | 557 | 		 * transport address, the cwnd of the transport address | 
 | 558 | 		 * should be adjusted to max(cwnd/2, 4*MTU) per RTO. | 
 | 559 | 		 * NOTE: Although the draft recommends that this check needs | 
 | 560 | 		 * to be done every RTO interval, we do it every hearbeat | 
 | 561 | 		 * interval. | 
 | 562 | 		 */ | 
 | 563 | 		if ((jiffies - transport->last_time_used) > transport->rto) | 
 | 564 | 			transport->cwnd = max(transport->cwnd/2, | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 565 | 						 4*transport->asoc->pathmtu); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 566 | 		break; | 
| Stephen Hemminger | 3ff50b7 | 2007-04-20 17:09:22 -0700 | [diff] [blame] | 567 | 	} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 568 |  | 
 | 569 | 	transport->partial_bytes_acked = 0; | 
 | 570 | 	SCTP_DEBUG_PRINTK("%s: transport: %p reason: %d cwnd: " | 
| Harvey Harrison | 0dc4787 | 2008-03-05 20:47:47 -0800 | [diff] [blame] | 571 | 			  "%d ssthresh: %d\n", __func__, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 572 | 			  transport, reason, | 
 | 573 | 			  transport->cwnd, transport->ssthresh); | 
 | 574 | } | 
 | 575 |  | 
 | 576 | /* What is the next timeout value for this transport? */ | 
 | 577 | unsigned long sctp_transport_timeout(struct sctp_transport *t) | 
 | 578 | { | 
 | 579 | 	unsigned long timeout; | 
| Sridhar Samudrala | ad8fec1 | 2006-07-21 14:48:50 -0700 | [diff] [blame] | 580 | 	timeout = t->rto + sctp_jitter(t->rto); | 
 | 581 | 	if (t->state != SCTP_UNCONFIRMED) | 
 | 582 | 		timeout += t->hbinterval; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 583 | 	timeout += jiffies; | 
 | 584 | 	return timeout; | 
 | 585 | } | 
| Vlad Yasevich | 749bf92 | 2007-03-19 17:02:30 -0700 | [diff] [blame] | 586 |  | 
 | 587 | /* Reset transport variables to their initial values */ | 
 | 588 | void sctp_transport_reset(struct sctp_transport *t) | 
 | 589 | { | 
 | 590 | 	struct sctp_association *asoc = t->asoc; | 
 | 591 |  | 
 | 592 | 	/* RFC 2960 (bis), Section 5.2.4 | 
 | 593 | 	 * All the congestion control parameters (e.g., cwnd, ssthresh) | 
 | 594 | 	 * related to this peer MUST be reset to their initial values | 
 | 595 | 	 * (see Section 6.2.1) | 
 | 596 | 	 */ | 
 | 597 | 	t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); | 
| Vlad Yasevich | 289f424 | 2007-03-22 12:26:25 -0700 | [diff] [blame] | 598 | 	t->ssthresh = asoc->peer.i.a_rwnd; | 
| Vlad Yasevich | b6157d8 | 2007-10-24 15:59:16 -0400 | [diff] [blame] | 599 | 	t->last_rto = t->rto = asoc->rto_initial; | 
| Vlad Yasevich | 749bf92 | 2007-03-19 17:02:30 -0700 | [diff] [blame] | 600 | 	t->rtt = 0; | 
 | 601 | 	t->srtt = 0; | 
 | 602 | 	t->rttvar = 0; | 
 | 603 |  | 
 | 604 | 	/* Reset these additional varibles so that we have a clean | 
 | 605 | 	 * slate. | 
 | 606 | 	 */ | 
 | 607 | 	t->partial_bytes_acked = 0; | 
 | 608 | 	t->flight_size = 0; | 
 | 609 | 	t->error_count = 0; | 
 | 610 | 	t->rto_pending = 0; | 
| Vlad Yasevich | a646523 | 2008-06-04 12:38:43 -0700 | [diff] [blame] | 611 | 	t->fast_recovery = 0; | 
| Vlad Yasevich | 749bf92 | 2007-03-19 17:02:30 -0700 | [diff] [blame] | 612 |  | 
 | 613 | 	/* Initialize the state information for SFR-CACC */ | 
 | 614 | 	t->cacc.changeover_active = 0; | 
 | 615 | 	t->cacc.cycling_changeover = 0; | 
 | 616 | 	t->cacc.next_tsn_at_change = 0; | 
 | 617 | 	t->cacc.cacc_saw_newack = 0; | 
 | 618 | } |