| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* SCTP kernel reference Implementation | 
 | 2 |  * Copyright (c) 1999-2000 Cisco, Inc. | 
 | 3 |  * Copyright (c) 1999-2001 Motorola, Inc. | 
 | 4 |  * Copyright (c) 2001-2003 International Business Machines Corp. | 
 | 5 |  * Copyright (c) 2001 Intel Corp. | 
 | 6 |  * Copyright (c) 2001 La Monte H.P. Yarroll | 
 | 7 |  * | 
 | 8 |  * This file is part of the SCTP kernel reference Implementation | 
 | 9 |  * | 
 | 10 |  * This module provides the abstraction for an SCTP tranport representing | 
 | 11 |  * a remote transport address.  For local transport addresses, we just use | 
 | 12 |  * union sctp_addr. | 
 | 13 |  * | 
 | 14 |  * The SCTP reference implementation is free software; | 
 | 15 |  * you can redistribute it and/or modify it under the terms of | 
 | 16 |  * the GNU General Public License as published by | 
 | 17 |  * the Free Software Foundation; either version 2, or (at your option) | 
 | 18 |  * any later version. | 
 | 19 |  * | 
 | 20 |  * The SCTP reference implementation is distributed in the hope that it | 
 | 21 |  * will be useful, but WITHOUT ANY WARRANTY; without even the implied | 
 | 22 |  *                 ************************ | 
 | 23 |  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | 
 | 24 |  * See the GNU General Public License for more details. | 
 | 25 |  * | 
 | 26 |  * You should have received a copy of the GNU General Public License | 
 | 27 |  * along with GNU CC; see the file COPYING.  If not, write to | 
 | 28 |  * the Free Software Foundation, 59 Temple Place - Suite 330, | 
 | 29 |  * Boston, MA 02111-1307, USA. | 
 | 30 |  * | 
 | 31 |  * Please send any bug reports or fixes you make to the | 
 | 32 |  * email address(es): | 
 | 33 |  *    lksctp developers <lksctp-developers@lists.sourceforge.net> | 
 | 34 |  * | 
 | 35 |  * Or submit a bug report through the following website: | 
 | 36 |  *    http://www.sf.net/projects/lksctp | 
 | 37 |  * | 
 | 38 |  * Written or modified by: | 
 | 39 |  *    La Monte H.P. Yarroll <piggy@acm.org> | 
 | 40 |  *    Karl Knutson          <karl@athena.chicago.il.us> | 
 | 41 |  *    Jon Grimm             <jgrimm@us.ibm.com> | 
 | 42 |  *    Xingang Guo           <xingang.guo@intel.com> | 
 | 43 |  *    Hui Huang             <hui.huang@nokia.com> | 
 | 44 |  *    Sridhar Samudrala	    <sri@us.ibm.com> | 
 | 45 |  *    Ardelle Fan	    <ardelle.fan@intel.com> | 
 | 46 |  * | 
 | 47 |  * Any bugs reported given to us we will try to fix... any fixes shared will | 
 | 48 |  * be incorporated into the next SCTP release. | 
 | 49 |  */ | 
 | 50 |  | 
 | 51 | #include <linux/types.h> | 
| Sridhar Samudrala | ad8fec1 | 2006-07-21 14:48:50 -0700 | [diff] [blame] | 52 | #include <linux/random.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 53 | #include <net/sctp/sctp.h> | 
 | 54 | #include <net/sctp/sm.h> | 
 | 55 |  | 
 | 56 | /* 1st Level Abstractions.  */ | 
 | 57 |  | 
 | 58 | /* Initialize a new transport from provided memory.  */ | 
 | 59 | static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, | 
 | 60 | 						  const union sctp_addr *addr, | 
| Al Viro | dd0fc66 | 2005-10-07 07:46:04 +0100 | [diff] [blame] | 61 | 						  gfp_t gfp) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 62 | { | 
 | 63 | 	/* Copy in the address.  */ | 
 | 64 | 	peer->ipaddr = *addr; | 
 | 65 | 	peer->af_specific = sctp_get_af_specific(addr->sa.sa_family); | 
 | 66 | 	peer->asoc = NULL; | 
 | 67 |  | 
 | 68 | 	peer->dst = NULL; | 
 | 69 | 	memset(&peer->saddr, 0, sizeof(union sctp_addr)); | 
 | 70 |  | 
 | 71 | 	/* From 6.3.1 RTO Calculation: | 
 | 72 | 	 * | 
 | 73 | 	 * C1) Until an RTT measurement has been made for a packet sent to the | 
 | 74 | 	 * given destination transport address, set RTO to the protocol | 
 | 75 | 	 * parameter 'RTO.Initial'. | 
 | 76 | 	 */ | 
 | 77 | 	peer->rtt = 0; | 
| Vladislav Yasevich | 3fd091e | 2006-08-22 13:29:17 -0700 | [diff] [blame] | 78 | 	peer->rto = msecs_to_jiffies(sctp_rto_initial); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 79 | 	peer->rttvar = 0; | 
 | 80 | 	peer->srtt = 0; | 
 | 81 | 	peer->rto_pending = 0; | 
 | 82 |  | 
 | 83 | 	peer->last_time_heard = jiffies; | 
 | 84 | 	peer->last_time_used = jiffies; | 
 | 85 | 	peer->last_time_ecne_reduced = jiffies; | 
 | 86 |  | 
| Frank Filz | 3f7a87d | 2005-06-20 13:14:57 -0700 | [diff] [blame] | 87 | 	peer->init_sent_count = 0; | 
 | 88 |  | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 89 | 	peer->param_flags = SPP_HB_DISABLE | | 
 | 90 | 			    SPP_PMTUD_ENABLE | | 
 | 91 | 			    SPP_SACKDELAY_ENABLE; | 
 | 92 | 	peer->hbinterval  = 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 93 |  | 
 | 94 | 	/* Initialize the default path max_retrans.  */ | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 95 | 	peer->pathmaxrxt  = sctp_max_retrans_path; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 96 | 	peer->error_count = 0; | 
 | 97 |  | 
 | 98 | 	INIT_LIST_HEAD(&peer->transmitted); | 
 | 99 | 	INIT_LIST_HEAD(&peer->send_ready); | 
 | 100 | 	INIT_LIST_HEAD(&peer->transports); | 
 | 101 |  | 
 | 102 | 	/* Set up the retransmission timer.  */ | 
 | 103 | 	init_timer(&peer->T3_rtx_timer); | 
 | 104 | 	peer->T3_rtx_timer.function = sctp_generate_t3_rtx_event; | 
 | 105 | 	peer->T3_rtx_timer.data = (unsigned long)peer; | 
 | 106 |  | 
 | 107 | 	/* Set up the heartbeat timer. */ | 
 | 108 | 	init_timer(&peer->hb_timer); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 109 | 	peer->hb_timer.function = sctp_generate_heartbeat_event; | 
 | 110 | 	peer->hb_timer.data = (unsigned long)peer; | 
 | 111 |  | 
| Sridhar Samudrala | ad8fec1 | 2006-07-21 14:48:50 -0700 | [diff] [blame] | 112 | 	/* Initialize the 64-bit random nonce sent with heartbeat. */ | 
 | 113 | 	get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); | 
 | 114 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 115 | 	atomic_set(&peer->refcnt, 1); | 
 | 116 | 	peer->dead = 0; | 
 | 117 |  | 
 | 118 | 	peer->malloced = 0; | 
 | 119 |  | 
 | 120 | 	/* Initialize the state information for SFR-CACC */ | 
 | 121 | 	peer->cacc.changeover_active = 0; | 
 | 122 | 	peer->cacc.cycling_changeover = 0; | 
 | 123 | 	peer->cacc.next_tsn_at_change = 0; | 
 | 124 | 	peer->cacc.cacc_saw_newack = 0; | 
 | 125 |  | 
 | 126 | 	return peer; | 
 | 127 | } | 
 | 128 |  | 
 | 129 | /* Allocate and initialize a new transport.  */ | 
| Alexey Dobriyan | 3182cd8 | 2005-07-11 20:57:47 -0700 | [diff] [blame] | 130 | struct sctp_transport *sctp_transport_new(const union sctp_addr *addr, | 
| Al Viro | dd0fc66 | 2005-10-07 07:46:04 +0100 | [diff] [blame] | 131 | 					  gfp_t gfp) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 132 | { | 
| YOSHIFUJI Hideaki | d808ad9 | 2007-02-09 23:25:18 +0900 | [diff] [blame] | 133 | 	struct sctp_transport *transport; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 134 |  | 
| YOSHIFUJI Hideaki | d808ad9 | 2007-02-09 23:25:18 +0900 | [diff] [blame] | 135 | 	transport = t_new(struct sctp_transport, gfp); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 136 | 	if (!transport) | 
 | 137 | 		goto fail; | 
 | 138 |  | 
 | 139 | 	if (!sctp_transport_init(transport, addr, gfp)) | 
 | 140 | 		goto fail_init; | 
 | 141 |  | 
 | 142 | 	transport->malloced = 1; | 
 | 143 | 	SCTP_DBG_OBJCNT_INC(transport); | 
 | 144 |  | 
 | 145 | 	return transport; | 
 | 146 |  | 
 | 147 | fail_init: | 
 | 148 | 	kfree(transport); | 
 | 149 |  | 
 | 150 | fail: | 
 | 151 | 	return NULL; | 
 | 152 | } | 
 | 153 |  | 
 | 154 | /* This transport is no longer needed.  Free up if possible, or | 
 | 155 |  * delay until it last reference count. | 
 | 156 |  */ | 
 | 157 | void sctp_transport_free(struct sctp_transport *transport) | 
 | 158 | { | 
 | 159 | 	transport->dead = 1; | 
 | 160 |  | 
 | 161 | 	/* Try to delete the heartbeat timer.  */ | 
 | 162 | 	if (del_timer(&transport->hb_timer)) | 
 | 163 | 		sctp_transport_put(transport); | 
 | 164 |  | 
 | 165 | 	/* Delete the T3_rtx timer if it's active. | 
 | 166 | 	 * There is no point in not doing this now and letting | 
 | 167 | 	 * structure hang around in memory since we know | 
 | 168 | 	 * the tranport is going away. | 
 | 169 | 	 */ | 
 | 170 | 	if (timer_pending(&transport->T3_rtx_timer) && | 
 | 171 | 	    del_timer(&transport->T3_rtx_timer)) | 
 | 172 | 		sctp_transport_put(transport); | 
 | 173 |  | 
 | 174 |  | 
 | 175 | 	sctp_transport_put(transport); | 
 | 176 | } | 
 | 177 |  | 
 | 178 | /* Destroy the transport data structure. | 
 | 179 |  * Assumes there are no more users of this structure. | 
 | 180 |  */ | 
 | 181 | static void sctp_transport_destroy(struct sctp_transport *transport) | 
 | 182 | { | 
 | 183 | 	SCTP_ASSERT(transport->dead, "Transport is not dead", return); | 
 | 184 |  | 
 | 185 | 	if (transport->asoc) | 
 | 186 | 		sctp_association_put(transport->asoc); | 
 | 187 |  | 
| YOSHIFUJI Hideaki | d808ad9 | 2007-02-09 23:25:18 +0900 | [diff] [blame] | 188 | 	sctp_packet_free(&transport->packet); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 189 |  | 
 | 190 | 	dst_release(transport->dst); | 
 | 191 | 	kfree(transport); | 
 | 192 | 	SCTP_DBG_OBJCNT_DEC(transport); | 
 | 193 | } | 
 | 194 |  | 
 | 195 | /* Start T3_rtx timer if it is not already running and update the heartbeat | 
 | 196 |  * timer.  This routine is called every time a DATA chunk is sent. | 
 | 197 |  */ | 
 | 198 | void sctp_transport_reset_timers(struct sctp_transport *transport) | 
 | 199 | { | 
 | 200 | 	/* RFC 2960 6.3.2 Retransmission Timer Rules | 
 | 201 | 	 * | 
 | 202 | 	 * R1) Every time a DATA chunk is sent to any address(including a | 
 | 203 | 	 * retransmission), if the T3-rtx timer of that address is not running | 
 | 204 | 	 * start it running so that it will expire after the RTO of that | 
 | 205 | 	 * address. | 
 | 206 | 	 */ | 
 | 207 |  | 
 | 208 | 	if (!timer_pending(&transport->T3_rtx_timer)) | 
 | 209 | 		if (!mod_timer(&transport->T3_rtx_timer, | 
 | 210 | 			       jiffies + transport->rto)) | 
 | 211 | 			sctp_transport_hold(transport); | 
 | 212 |  | 
 | 213 | 	/* When a data chunk is sent, reset the heartbeat interval.  */ | 
 | 214 | 	if (!mod_timer(&transport->hb_timer, | 
 | 215 | 		       sctp_transport_timeout(transport))) | 
 | 216 | 	    sctp_transport_hold(transport); | 
 | 217 | } | 
 | 218 |  | 
 | 219 | /* This transport has been assigned to an association. | 
 | 220 |  * Initialize fields from the association or from the sock itself. | 
 | 221 |  * Register the reference count in the association. | 
 | 222 |  */ | 
 | 223 | void sctp_transport_set_owner(struct sctp_transport *transport, | 
 | 224 | 			      struct sctp_association *asoc) | 
 | 225 | { | 
 | 226 | 	transport->asoc = asoc; | 
 | 227 | 	sctp_association_hold(asoc); | 
 | 228 | } | 
 | 229 |  | 
 | 230 | /* Initialize the pmtu of a transport. */ | 
 | 231 | void sctp_transport_pmtu(struct sctp_transport *transport) | 
 | 232 | { | 
 | 233 | 	struct dst_entry *dst; | 
 | 234 |  | 
 | 235 | 	dst = transport->af_specific->get_dst(NULL, &transport->ipaddr, NULL); | 
 | 236 |  | 
 | 237 | 	if (dst) { | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 238 | 		transport->pathmtu = dst_mtu(dst); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 239 | 		dst_release(dst); | 
 | 240 | 	} else | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 241 | 		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 242 | } | 
 | 243 |  | 
| Vlad Yasevich | c910b47 | 2007-06-07 13:47:03 -0400 | [diff] [blame] | 244 | /* this is a complete rip-off from __sk_dst_check | 
 | 245 |  * the cookie is always 0 since this is how it's used in the | 
 | 246 |  * pmtu code | 
 | 247 |  */ | 
 | 248 | static struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t) | 
 | 249 | { | 
 | 250 | 	struct dst_entry *dst = t->dst; | 
 | 251 |  | 
 | 252 | 	if (dst && dst->obsolete && dst->ops->check(dst, 0) == NULL) { | 
 | 253 | 		dst_release(t->dst); | 
 | 254 | 		t->dst = NULL; | 
 | 255 | 		return NULL; | 
 | 256 | 	} | 
 | 257 |  | 
 | 258 | 	return dst; | 
 | 259 | } | 
 | 260 |  | 
 | 261 | void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) | 
 | 262 | { | 
 | 263 | 	struct dst_entry *dst; | 
 | 264 |  | 
 | 265 | 	if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { | 
 | 266 | 		printk(KERN_WARNING "%s: Reported pmtu %d too low, " | 
 | 267 | 		       "using default minimum of %d\n", | 
 | 268 | 		       __FUNCTION__, pmtu, | 
 | 269 | 		       SCTP_DEFAULT_MINSEGMENT); | 
 | 270 | 		/* Use default minimum segment size and disable | 
 | 271 | 		 * pmtu discovery on this transport. | 
 | 272 | 		 */ | 
 | 273 | 		t->pathmtu = SCTP_DEFAULT_MINSEGMENT; | 
| Vlad Yasevich | c910b47 | 2007-06-07 13:47:03 -0400 | [diff] [blame] | 274 | 	} else { | 
 | 275 | 		t->pathmtu = pmtu; | 
 | 276 | 	} | 
 | 277 |  | 
 | 278 | 	dst = sctp_transport_dst_check(t); | 
 | 279 | 	if (dst) | 
 | 280 | 		dst->ops->update_pmtu(dst, pmtu); | 
 | 281 | } | 
 | 282 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 283 | /* Caches the dst entry and source address for a transport's destination | 
 | 284 |  * address. | 
 | 285 |  */ | 
 | 286 | void sctp_transport_route(struct sctp_transport *transport, | 
 | 287 | 			  union sctp_addr *saddr, struct sctp_sock *opt) | 
 | 288 | { | 
 | 289 | 	struct sctp_association *asoc = transport->asoc; | 
 | 290 | 	struct sctp_af *af = transport->af_specific; | 
 | 291 | 	union sctp_addr *daddr = &transport->ipaddr; | 
 | 292 | 	struct dst_entry *dst; | 
 | 293 |  | 
 | 294 | 	dst = af->get_dst(asoc, daddr, saddr); | 
 | 295 |  | 
 | 296 | 	if (saddr) | 
 | 297 | 		memcpy(&transport->saddr, saddr, sizeof(union sctp_addr)); | 
 | 298 | 	else | 
 | 299 | 		af->get_saddr(asoc, dst, daddr, &transport->saddr); | 
 | 300 |  | 
 | 301 | 	transport->dst = dst; | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 302 | 	if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) { | 
 | 303 | 		return; | 
 | 304 | 	} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 305 | 	if (dst) { | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 306 | 		transport->pathmtu = dst_mtu(dst); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 307 |  | 
 | 308 | 		/* Initialize sk->sk_rcv_saddr, if the transport is the | 
 | 309 | 		 * association's active path for getsockname(). | 
| YOSHIFUJI Hideaki | d808ad9 | 2007-02-09 23:25:18 +0900 | [diff] [blame] | 310 | 		 */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 311 | 		if (asoc && (transport == asoc->peer.active_path)) | 
| Neil Horman | bf031ff | 2005-12-02 20:32:29 -0800 | [diff] [blame] | 312 | 			opt->pf->af->to_sk_saddr(&transport->saddr, | 
 | 313 | 						 asoc->base.sk); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 314 | 	} else | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 315 | 		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 316 | } | 
 | 317 |  | 
 | 318 | /* Hold a reference to a transport.  */ | 
 | 319 | void sctp_transport_hold(struct sctp_transport *transport) | 
 | 320 | { | 
 | 321 | 	atomic_inc(&transport->refcnt); | 
 | 322 | } | 
 | 323 |  | 
 | 324 | /* Release a reference to a transport and clean up | 
 | 325 |  * if there are no more references. | 
 | 326 |  */ | 
 | 327 | void sctp_transport_put(struct sctp_transport *transport) | 
 | 328 | { | 
 | 329 | 	if (atomic_dec_and_test(&transport->refcnt)) | 
 | 330 | 		sctp_transport_destroy(transport); | 
 | 331 | } | 
 | 332 |  | 
 | 333 | /* Update transport's RTO based on the newly calculated RTT. */ | 
 | 334 | void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) | 
 | 335 | { | 
 | 336 | 	/* Check for valid transport.  */ | 
 | 337 | 	SCTP_ASSERT(tp, "NULL transport", return); | 
 | 338 |  | 
 | 339 | 	/* We should not be doing any RTO updates unless rto_pending is set.  */ | 
 | 340 | 	SCTP_ASSERT(tp->rto_pending, "rto_pending not set", return); | 
 | 341 |  | 
 | 342 | 	if (tp->rttvar || tp->srtt) { | 
 | 343 | 		/* 6.3.1 C3) When a new RTT measurement R' is made, set | 
 | 344 | 		 * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'| | 
 | 345 | 		 * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R' | 
 | 346 | 		 */ | 
 | 347 |  | 
 | 348 | 		/* Note:  The above algorithm has been rewritten to | 
 | 349 | 		 * express rto_beta and rto_alpha as inverse powers | 
 | 350 | 		 * of two. | 
 | 351 | 		 * For example, assuming the default value of RTO.Alpha of | 
 | 352 | 		 * 1/8, rto_alpha would be expressed as 3. | 
 | 353 | 		 */ | 
 | 354 | 		tp->rttvar = tp->rttvar - (tp->rttvar >> sctp_rto_beta) | 
 | 355 | 			+ ((abs(tp->srtt - rtt)) >> sctp_rto_beta); | 
 | 356 | 		tp->srtt = tp->srtt - (tp->srtt >> sctp_rto_alpha) | 
 | 357 | 			+ (rtt >> sctp_rto_alpha); | 
 | 358 | 	} else { | 
 | 359 | 		/* 6.3.1 C2) When the first RTT measurement R is made, set | 
 | 360 | 		 * SRTT <- R, RTTVAR <- R/2. | 
 | 361 | 		 */ | 
 | 362 | 		tp->srtt = rtt; | 
 | 363 | 		tp->rttvar = rtt >> 1; | 
 | 364 | 	} | 
 | 365 |  | 
 | 366 | 	/* 6.3.1 G1) Whenever RTTVAR is computed, if RTTVAR = 0, then | 
 | 367 | 	 * adjust RTTVAR <- G, where G is the CLOCK GRANULARITY. | 
 | 368 | 	 */ | 
 | 369 | 	if (tp->rttvar == 0) | 
 | 370 | 		tp->rttvar = SCTP_CLOCK_GRANULARITY; | 
 | 371 |  | 
 | 372 | 	/* 6.3.1 C3) After the computation, update RTO <- SRTT + 4 * RTTVAR. */ | 
 | 373 | 	tp->rto = tp->srtt + (tp->rttvar << 2); | 
 | 374 |  | 
 | 375 | 	/* 6.3.1 C6) Whenever RTO is computed, if it is less than RTO.Min | 
 | 376 | 	 * seconds then it is rounded up to RTO.Min seconds. | 
 | 377 | 	 */ | 
 | 378 | 	if (tp->rto < tp->asoc->rto_min) | 
 | 379 | 		tp->rto = tp->asoc->rto_min; | 
 | 380 |  | 
 | 381 | 	/* 6.3.1 C7) A maximum value may be placed on RTO provided it is | 
 | 382 | 	 * at least RTO.max seconds. | 
 | 383 | 	 */ | 
 | 384 | 	if (tp->rto > tp->asoc->rto_max) | 
 | 385 | 		tp->rto = tp->asoc->rto_max; | 
 | 386 |  | 
 | 387 | 	tp->rtt = rtt; | 
 | 388 |  | 
 | 389 | 	/* Reset rto_pending so that a new RTT measurement is started when a | 
 | 390 | 	 * new data chunk is sent. | 
 | 391 | 	 */ | 
 | 392 | 	tp->rto_pending = 0; | 
 | 393 |  | 
 | 394 | 	SCTP_DEBUG_PRINTK("%s: transport: %p, rtt: %d, srtt: %d " | 
| Vlad Yasevich | 8116ffa | 2006-01-17 11:55:17 -0800 | [diff] [blame] | 395 | 			  "rttvar: %d, rto: %ld\n", __FUNCTION__, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 396 | 			  tp, rtt, tp->srtt, tp->rttvar, tp->rto); | 
 | 397 | } | 
 | 398 |  | 
 | 399 | /* This routine updates the transport's cwnd and partial_bytes_acked | 
 | 400 |  * parameters based on the bytes acked in the received SACK. | 
 | 401 |  */ | 
 | 402 | void sctp_transport_raise_cwnd(struct sctp_transport *transport, | 
 | 403 | 			       __u32 sack_ctsn, __u32 bytes_acked) | 
 | 404 | { | 
 | 405 | 	__u32 cwnd, ssthresh, flight_size, pba, pmtu; | 
 | 406 |  | 
 | 407 | 	cwnd = transport->cwnd; | 
 | 408 | 	flight_size = transport->flight_size; | 
 | 409 |  | 
 | 410 | 	/* The appropriate cwnd increase algorithm is performed if, and only | 
 | 411 | 	 * if the cumulative TSN has advanced and the congestion window is | 
 | 412 | 	 * being fully utilized. | 
 | 413 | 	 */ | 
 | 414 | 	if ((transport->asoc->ctsn_ack_point >= sack_ctsn) || | 
 | 415 | 	    (flight_size < cwnd)) | 
 | 416 | 		return; | 
 | 417 |  | 
 | 418 | 	ssthresh = transport->ssthresh; | 
 | 419 | 	pba = transport->partial_bytes_acked; | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 420 | 	pmtu = transport->asoc->pathmtu; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 421 |  | 
 | 422 | 	if (cwnd <= ssthresh) { | 
 | 423 | 		/* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less | 
 | 424 | 		 * than or equal to ssthresh an SCTP endpoint MUST use the | 
 | 425 | 		 * slow start algorithm to increase cwnd only if the current | 
 | 426 | 		 * congestion window is being fully utilized and an incoming | 
 | 427 | 		 * SACK advances the Cumulative TSN Ack Point. Only when these | 
 | 428 | 		 * two conditions are met can the cwnd be increased otherwise | 
 | 429 | 		 * the cwnd MUST not be increased. If these conditions are met | 
 | 430 | 		 * then cwnd MUST be increased by at most the lesser of | 
 | 431 | 		 * 1) the total size of the previously outstanding DATA | 
 | 432 | 		 * chunk(s) acknowledged, and 2) the destination's path MTU. | 
 | 433 | 		 */ | 
 | 434 | 		if (bytes_acked > pmtu) | 
 | 435 | 			cwnd += pmtu; | 
 | 436 | 		else | 
 | 437 | 			cwnd += bytes_acked; | 
 | 438 | 		SCTP_DEBUG_PRINTK("%s: SLOW START: transport: %p, " | 
 | 439 | 				  "bytes_acked: %d, cwnd: %d, ssthresh: %d, " | 
 | 440 | 				  "flight_size: %d, pba: %d\n", | 
 | 441 | 				  __FUNCTION__, | 
 | 442 | 				  transport, bytes_acked, cwnd, | 
 | 443 | 				  ssthresh, flight_size, pba); | 
 | 444 | 	} else { | 
 | 445 | 		/* RFC 2960 7.2.2 Whenever cwnd is greater than ssthresh, | 
 | 446 | 		 * upon each SACK arrival that advances the Cumulative TSN Ack | 
 | 447 | 		 * Point, increase partial_bytes_acked by the total number of | 
 | 448 | 		 * bytes of all new chunks acknowledged in that SACK including | 
 | 449 | 		 * chunks acknowledged by the new Cumulative TSN Ack and by | 
 | 450 | 		 * Gap Ack Blocks. | 
 | 451 | 		 * | 
 | 452 | 		 * When partial_bytes_acked is equal to or greater than cwnd | 
 | 453 | 		 * and before the arrival of the SACK the sender had cwnd or | 
 | 454 | 		 * more bytes of data outstanding (i.e., before arrival of the | 
 | 455 | 		 * SACK, flightsize was greater than or equal to cwnd), | 
 | 456 | 		 * increase cwnd by MTU, and reset partial_bytes_acked to | 
 | 457 | 		 * (partial_bytes_acked - cwnd). | 
 | 458 | 		 */ | 
 | 459 | 		pba += bytes_acked; | 
 | 460 | 		if (pba >= cwnd) { | 
 | 461 | 			cwnd += pmtu; | 
 | 462 | 			pba = ((cwnd < pba) ? (pba - cwnd) : 0); | 
 | 463 | 		} | 
 | 464 | 		SCTP_DEBUG_PRINTK("%s: CONGESTION AVOIDANCE: " | 
 | 465 | 				  "transport: %p, bytes_acked: %d, cwnd: %d, " | 
 | 466 | 				  "ssthresh: %d, flight_size: %d, pba: %d\n", | 
 | 467 | 				  __FUNCTION__, | 
 | 468 | 				  transport, bytes_acked, cwnd, | 
 | 469 | 				  ssthresh, flight_size, pba); | 
 | 470 | 	} | 
 | 471 |  | 
 | 472 | 	transport->cwnd = cwnd; | 
 | 473 | 	transport->partial_bytes_acked = pba; | 
 | 474 | } | 
 | 475 |  | 
 | 476 | /* This routine is used to lower the transport's cwnd when congestion is | 
 | 477 |  * detected. | 
 | 478 |  */ | 
 | 479 | void sctp_transport_lower_cwnd(struct sctp_transport *transport, | 
 | 480 | 			       sctp_lower_cwnd_t reason) | 
 | 481 | { | 
 | 482 | 	switch (reason) { | 
 | 483 | 	case SCTP_LOWER_CWND_T3_RTX: | 
 | 484 | 		/* RFC 2960 Section 7.2.3, sctpimpguide | 
 | 485 | 		 * When the T3-rtx timer expires on an address, SCTP should | 
 | 486 | 		 * perform slow start by: | 
 | 487 | 		 *      ssthresh = max(cwnd/2, 4*MTU) | 
 | 488 | 		 *      cwnd = 1*MTU | 
 | 489 | 		 *      partial_bytes_acked = 0 | 
 | 490 | 		 */ | 
 | 491 | 		transport->ssthresh = max(transport->cwnd/2, | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 492 | 					  4*transport->asoc->pathmtu); | 
 | 493 | 		transport->cwnd = transport->asoc->pathmtu; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 494 | 		break; | 
 | 495 |  | 
 | 496 | 	case SCTP_LOWER_CWND_FAST_RTX: | 
 | 497 | 		/* RFC 2960 7.2.4 Adjust the ssthresh and cwnd of the | 
 | 498 | 		 * destination address(es) to which the missing DATA chunks | 
 | 499 | 		 * were last sent, according to the formula described in | 
 | 500 | 		 * Section 7.2.3. | 
| YOSHIFUJI Hideaki | d808ad9 | 2007-02-09 23:25:18 +0900 | [diff] [blame] | 501 | 		 * | 
 | 502 | 		 * RFC 2960 7.2.3, sctpimpguide Upon detection of packet | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 503 | 		 * losses from SACK (see Section 7.2.4), An endpoint | 
 | 504 | 		 * should do the following: | 
 | 505 | 		 *      ssthresh = max(cwnd/2, 4*MTU) | 
 | 506 | 		 *      cwnd = ssthresh | 
 | 507 | 		 *      partial_bytes_acked = 0 | 
 | 508 | 		 */ | 
 | 509 | 		transport->ssthresh = max(transport->cwnd/2, | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 510 | 					  4*transport->asoc->pathmtu); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 511 | 		transport->cwnd = transport->ssthresh; | 
 | 512 | 		break; | 
 | 513 |  | 
 | 514 | 	case SCTP_LOWER_CWND_ECNE: | 
 | 515 | 		/* RFC 2481 Section 6.1.2. | 
 | 516 | 		 * If the sender receives an ECN-Echo ACK packet | 
 | 517 | 		 * then the sender knows that congestion was encountered in the | 
 | 518 | 		 * network on the path from the sender to the receiver. The | 
 | 519 | 		 * indication of congestion should be treated just as a | 
 | 520 | 		 * congestion loss in non-ECN Capable TCP. That is, the TCP | 
 | 521 | 		 * source halves the congestion window "cwnd" and reduces the | 
 | 522 | 		 * slow start threshold "ssthresh". | 
 | 523 | 		 * A critical condition is that TCP does not react to | 
 | 524 | 		 * congestion indications more than once every window of | 
 | 525 | 		 * data (or more loosely more than once every round-trip time). | 
 | 526 | 		 */ | 
 | 527 | 		if ((jiffies - transport->last_time_ecne_reduced) > | 
 | 528 | 		    transport->rtt) { | 
 | 529 | 			transport->ssthresh = max(transport->cwnd/2, | 
| YOSHIFUJI Hideaki | d808ad9 | 2007-02-09 23:25:18 +0900 | [diff] [blame] | 530 | 						  4*transport->asoc->pathmtu); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 531 | 			transport->cwnd = transport->ssthresh; | 
 | 532 | 			transport->last_time_ecne_reduced = jiffies; | 
 | 533 | 		} | 
 | 534 | 		break; | 
 | 535 |  | 
 | 536 | 	case SCTP_LOWER_CWND_INACTIVE: | 
 | 537 | 		/* RFC 2960 Section 7.2.1, sctpimpguide | 
 | 538 | 		 * When the endpoint does not transmit data on a given | 
 | 539 | 		 * transport address, the cwnd of the transport address | 
 | 540 | 		 * should be adjusted to max(cwnd/2, 4*MTU) per RTO. | 
 | 541 | 		 * NOTE: Although the draft recommends that this check needs | 
 | 542 | 		 * to be done every RTO interval, we do it every hearbeat | 
 | 543 | 		 * interval. | 
 | 544 | 		 */ | 
 | 545 | 		if ((jiffies - transport->last_time_used) > transport->rto) | 
 | 546 | 			transport->cwnd = max(transport->cwnd/2, | 
| Frank Filz | 52ccb8e | 2005-12-22 11:36:46 -0800 | [diff] [blame] | 547 | 						 4*transport->asoc->pathmtu); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 548 | 		break; | 
| Stephen Hemminger | 3ff50b7 | 2007-04-20 17:09:22 -0700 | [diff] [blame] | 549 | 	} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 550 |  | 
 | 551 | 	transport->partial_bytes_acked = 0; | 
 | 552 | 	SCTP_DEBUG_PRINTK("%s: transport: %p reason: %d cwnd: " | 
 | 553 | 			  "%d ssthresh: %d\n", __FUNCTION__, | 
 | 554 | 			  transport, reason, | 
 | 555 | 			  transport->cwnd, transport->ssthresh); | 
 | 556 | } | 
 | 557 |  | 
 | 558 | /* What is the next timeout value for this transport? */ | 
 | 559 | unsigned long sctp_transport_timeout(struct sctp_transport *t) | 
 | 560 | { | 
 | 561 | 	unsigned long timeout; | 
| Sridhar Samudrala | ad8fec1 | 2006-07-21 14:48:50 -0700 | [diff] [blame] | 562 | 	timeout = t->rto + sctp_jitter(t->rto); | 
 | 563 | 	if (t->state != SCTP_UNCONFIRMED) | 
 | 564 | 		timeout += t->hbinterval; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 565 | 	timeout += jiffies; | 
 | 566 | 	return timeout; | 
 | 567 | } | 
| Vlad Yasevich | 749bf92 | 2007-03-19 17:02:30 -0700 | [diff] [blame] | 568 |  | 
 | 569 | /* Reset transport variables to their initial values */ | 
 | 570 | void sctp_transport_reset(struct sctp_transport *t) | 
 | 571 | { | 
 | 572 | 	struct sctp_association *asoc = t->asoc; | 
 | 573 |  | 
 | 574 | 	/* RFC 2960 (bis), Section 5.2.4 | 
 | 575 | 	 * All the congestion control parameters (e.g., cwnd, ssthresh) | 
 | 576 | 	 * related to this peer MUST be reset to their initial values | 
 | 577 | 	 * (see Section 6.2.1) | 
 | 578 | 	 */ | 
 | 579 | 	t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); | 
| Vlad Yasevich | 289f424 | 2007-03-22 12:26:25 -0700 | [diff] [blame] | 580 | 	t->ssthresh = asoc->peer.i.a_rwnd; | 
| Vlad Yasevich | 749bf92 | 2007-03-19 17:02:30 -0700 | [diff] [blame] | 581 | 	t->rto = asoc->rto_initial; | 
 | 582 | 	t->rtt = 0; | 
 | 583 | 	t->srtt = 0; | 
 | 584 | 	t->rttvar = 0; | 
 | 585 |  | 
 | 586 | 	/* Reset these additional varibles so that we have a clean | 
 | 587 | 	 * slate. | 
 | 588 | 	 */ | 
 | 589 | 	t->partial_bytes_acked = 0; | 
 | 590 | 	t->flight_size = 0; | 
 | 591 | 	t->error_count = 0; | 
 | 592 | 	t->rto_pending = 0; | 
 | 593 |  | 
 | 594 | 	/* Initialize the state information for SFR-CACC */ | 
 | 595 | 	t->cacc.changeover_active = 0; | 
 | 596 | 	t->cacc.cycling_changeover = 0; | 
 | 597 | 	t->cacc.next_tsn_at_change = 0; | 
 | 598 | 	t->cacc.cacc_saw_newack = 0; | 
 | 599 | } |