| Wong Hoi Sing Edison | 7c106d7 | 2006-06-05 17:27:58 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * TCP Low Priority (TCP-LP) | 
 | 3 |  * | 
 | 4 |  * TCP Low Priority is a distributed algorithm whose goal is to utilize only | 
 | 5 |  *   the excess network bandwidth as compared to the ``fair share`` of | 
| Wong Hoi Sing Edison | 3795da4 | 2006-09-13 20:30:30 -0700 | [diff] [blame] | 6 |  *   bandwidth as targeted by TCP. | 
| Wong Hoi Sing Edison | 7c106d7 | 2006-06-05 17:27:58 -0700 | [diff] [blame] | 7 |  * | 
| Wong Hoi Sing Edison | 7c106d7 | 2006-06-05 17:27:58 -0700 | [diff] [blame] | 8 |  * As of 2.6.13, Linux supports pluggable congestion control algorithms. | 
 | 9 |  * Due to the limitation of the API, we take the following changes from | 
 | 10 |  * the original TCP-LP implementation: | 
 | 11 |  *   o We use newReno in most core CA handling. Only add some checking | 
 | 12 |  *     within cong_avoid. | 
 | 13 |  *   o Error correcting in remote HZ, therefore remote HZ will be keeped | 
 | 14 |  *     on checking and updating. | 
 | 15 |  *   o Handling calculation of One-Way-Delay (OWD) within rtt_sample, sicne | 
 | 16 |  *     OWD have a similar meaning as RTT. Also correct the buggy formular. | 
 | 17 |  *   o Handle reaction for Early Congestion Indication (ECI) within | 
 | 18 |  *     pkts_acked, as mentioned within pseudo code. | 
 | 19 |  *   o OWD is handled in relative format, where local time stamp will in | 
 | 20 |  *     tcp_time_stamp format. | 
 | 21 |  * | 
| Wong Hoi Sing Edison | 3795da4 | 2006-09-13 20:30:30 -0700 | [diff] [blame] | 22 |  * Original Author: | 
 | 23 |  *   Aleksandar Kuzmanovic <akuzma@northwestern.edu> | 
 | 24 |  * Available from: | 
 | 25 |  *   http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf | 
 | 26 |  * Original implementation for 2.4.19: | 
 | 27 |  *   http://www-ece.rice.edu/networks/TCP-LP/ | 
| Wong Hoi Sing Edison | 7c106d7 | 2006-06-05 17:27:58 -0700 | [diff] [blame] | 28 |  * | 
| Wong Hoi Sing Edison | 3795da4 | 2006-09-13 20:30:30 -0700 | [diff] [blame] | 29 |  * 2.6.x module Authors: | 
 | 30 |  *   Wong Hoi Sing, Edison <hswong3i@gmail.com> | 
 | 31 |  *   Hung Hing Lun, Mike <hlhung3i@gmail.com> | 
 | 32 |  * SourceForge project page: | 
 | 33 |  *   http://tcp-lp-mod.sourceforge.net/ | 
| Wong Hoi Sing Edison | 7c106d7 | 2006-06-05 17:27:58 -0700 | [diff] [blame] | 34 |  */ | 
 | 35 |  | 
| Wong Hoi Sing Edison | 7c106d7 | 2006-06-05 17:27:58 -0700 | [diff] [blame] | 36 | #include <linux/module.h> | 
 | 37 | #include <net/tcp.h> | 
 | 38 |  | 
 | 39 | /* resolution of owd */ | 
 | 40 | #define LP_RESOL       1000 | 
 | 41 |  | 
 | 42 | /** | 
 | 43 |  * enum tcp_lp_state | 
 | 44 |  * @LP_VALID_RHZ: is remote HZ valid? | 
 | 45 |  * @LP_VALID_OWD: is OWD valid? | 
 | 46 |  * @LP_WITHIN_THR: are we within threshold? | 
 | 47 |  * @LP_WITHIN_INF: are we within inference? | 
 | 48 |  * | 
 | 49 |  * TCP-LP's state flags. | 
 | 50 |  * We create this set of state flag mainly for debugging. | 
 | 51 |  */ | 
 | 52 | enum tcp_lp_state { | 
 | 53 | 	LP_VALID_RHZ = (1 << 0), | 
 | 54 | 	LP_VALID_OWD = (1 << 1), | 
 | 55 | 	LP_WITHIN_THR = (1 << 3), | 
 | 56 | 	LP_WITHIN_INF = (1 << 4), | 
 | 57 | }; | 
 | 58 |  | 
 | 59 | /** | 
 | 60 |  * struct lp | 
 | 61 |  * @flag: TCP-LP state flag | 
 | 62 |  * @sowd: smoothed OWD << 3 | 
 | 63 |  * @owd_min: min OWD | 
 | 64 |  * @owd_max: max OWD | 
 | 65 |  * @owd_max_rsv: resrved max owd | 
 | 66 |  * @remote_hz: estimated remote HZ | 
 | 67 |  * @remote_ref_time: remote reference time | 
 | 68 |  * @local_ref_time: local reference time | 
 | 69 |  * @last_drop: time for last active drop | 
 | 70 |  * @inference: current inference | 
 | 71 |  * | 
 | 72 |  * TCP-LP's private struct. | 
 | 73 |  * We get the idea from original TCP-LP implementation where only left those we | 
 | 74 |  * found are really useful. | 
 | 75 |  */ | 
 | 76 | struct lp { | 
 | 77 | 	u32 flag; | 
 | 78 | 	u32 sowd; | 
 | 79 | 	u32 owd_min; | 
 | 80 | 	u32 owd_max; | 
 | 81 | 	u32 owd_max_rsv; | 
 | 82 | 	u32 remote_hz; | 
 | 83 | 	u32 remote_ref_time; | 
 | 84 | 	u32 local_ref_time; | 
 | 85 | 	u32 last_drop; | 
 | 86 | 	u32 inference; | 
 | 87 | }; | 
 | 88 |  | 
 | 89 | /** | 
 | 90 |  * tcp_lp_init | 
 | 91 |  * | 
 | 92 |  * Init all required variables. | 
 | 93 |  * Clone the handling from Vegas module implementation. | 
 | 94 |  */ | 
 | 95 | static void tcp_lp_init(struct sock *sk) | 
 | 96 | { | 
 | 97 | 	struct lp *lp = inet_csk_ca(sk); | 
 | 98 |  | 
 | 99 | 	lp->flag = 0; | 
 | 100 | 	lp->sowd = 0; | 
 | 101 | 	lp->owd_min = 0xffffffff; | 
 | 102 | 	lp->owd_max = 0; | 
 | 103 | 	lp->owd_max_rsv = 0; | 
 | 104 | 	lp->remote_hz = 0; | 
 | 105 | 	lp->remote_ref_time = 0; | 
 | 106 | 	lp->local_ref_time = 0; | 
 | 107 | 	lp->last_drop = 0; | 
 | 108 | 	lp->inference = 0; | 
 | 109 | } | 
 | 110 |  | 
 | 111 | /** | 
 | 112 |  * tcp_lp_cong_avoid | 
 | 113 |  * | 
 | 114 |  * Implementation of cong_avoid. | 
 | 115 |  * Will only call newReno CA when away from inference. | 
 | 116 |  * From TCP-LP's paper, this will be handled in additive increasement. | 
 | 117 |  */ | 
 | 118 | static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, | 
 | 119 | 			      int flag) | 
 | 120 | { | 
 | 121 | 	struct lp *lp = inet_csk_ca(sk); | 
 | 122 |  | 
 | 123 | 	if (!(lp->flag & LP_WITHIN_INF)) | 
 | 124 | 		tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); | 
 | 125 | } | 
 | 126 |  | 
 | 127 | /** | 
 | 128 |  * tcp_lp_remote_hz_estimator | 
 | 129 |  * | 
 | 130 |  * Estimate remote HZ. | 
 | 131 |  * We keep on updating the estimated value, where original TCP-LP | 
 | 132 |  * implementation only guest it for once and use forever. | 
 | 133 |  */ | 
 | 134 | static u32 tcp_lp_remote_hz_estimator(struct sock *sk) | 
 | 135 | { | 
 | 136 | 	struct tcp_sock *tp = tcp_sk(sk); | 
 | 137 | 	struct lp *lp = inet_csk_ca(sk); | 
 | 138 | 	s64 rhz = lp->remote_hz << 6;	/* remote HZ << 6 */ | 
 | 139 | 	s64 m = 0; | 
 | 140 |  | 
 | 141 | 	/* not yet record reference time | 
 | 142 | 	 * go away!! record it before come back!! */ | 
 | 143 | 	if (lp->remote_ref_time == 0 || lp->local_ref_time == 0) | 
 | 144 | 		goto out; | 
 | 145 |  | 
 | 146 | 	/* we can't calc remote HZ with no different!! */ | 
 | 147 | 	if (tp->rx_opt.rcv_tsval == lp->remote_ref_time | 
 | 148 | 	    || tp->rx_opt.rcv_tsecr == lp->local_ref_time) | 
 | 149 | 		goto out; | 
 | 150 |  | 
 | 151 | 	m = HZ * (tp->rx_opt.rcv_tsval - | 
 | 152 | 		  lp->remote_ref_time) / (tp->rx_opt.rcv_tsecr - | 
 | 153 | 					  lp->local_ref_time); | 
 | 154 | 	if (m < 0) | 
 | 155 | 		m = -m; | 
 | 156 |  | 
| Wong Hoi Sing Edison | 3795da4 | 2006-09-13 20:30:30 -0700 | [diff] [blame] | 157 | 	if (rhz > 0) { | 
| Wong Hoi Sing Edison | 7c106d7 | 2006-06-05 17:27:58 -0700 | [diff] [blame] | 158 | 		m -= rhz >> 6;	/* m is now error in remote HZ est */ | 
 | 159 | 		rhz += m;	/* 63/64 old + 1/64 new */ | 
 | 160 | 	} else | 
 | 161 | 		rhz = m << 6; | 
 | 162 |  | 
| Wong Hoi Sing Edison | 7c106d7 | 2006-06-05 17:27:58 -0700 | [diff] [blame] | 163 |  out: | 
| Wong Hoi Sing Edison | 3795da4 | 2006-09-13 20:30:30 -0700 | [diff] [blame] | 164 | 	/* record time for successful remote HZ calc */ | 
| Wong Hoi Sing Edison | bfbea8a | 2006-09-28 14:40:21 -0700 | [diff] [blame] | 165 | 	if ((rhz >> 6) > 0) | 
| Wong Hoi Sing Edison | 3795da4 | 2006-09-13 20:30:30 -0700 | [diff] [blame] | 166 | 		lp->flag |= LP_VALID_RHZ; | 
 | 167 | 	else | 
 | 168 | 		lp->flag &= ~LP_VALID_RHZ; | 
 | 169 |  | 
| Wong Hoi Sing Edison | 7c106d7 | 2006-06-05 17:27:58 -0700 | [diff] [blame] | 170 | 	/* record reference time stamp */ | 
 | 171 | 	lp->remote_ref_time = tp->rx_opt.rcv_tsval; | 
 | 172 | 	lp->local_ref_time = tp->rx_opt.rcv_tsecr; | 
 | 173 |  | 
 | 174 | 	return rhz >> 6; | 
 | 175 | } | 
 | 176 |  | 
 | 177 | /** | 
 | 178 |  * tcp_lp_owd_calculator | 
 | 179 |  * | 
 | 180 |  * Calculate one way delay (in relative format). | 
 | 181 |  * Original implement OWD as minus of remote time difference to local time | 
 | 182 |  * difference directly. As this time difference just simply equal to RTT, when | 
 | 183 |  * the network status is stable, remote RTT will equal to local RTT, and result | 
 | 184 |  * OWD into zero. | 
 | 185 |  * It seems to be a bug and so we fixed it. | 
 | 186 |  */ | 
 | 187 | static u32 tcp_lp_owd_calculator(struct sock *sk) | 
 | 188 | { | 
 | 189 | 	struct tcp_sock *tp = tcp_sk(sk); | 
 | 190 | 	struct lp *lp = inet_csk_ca(sk); | 
 | 191 | 	s64 owd = 0; | 
 | 192 |  | 
 | 193 | 	lp->remote_hz = tcp_lp_remote_hz_estimator(sk); | 
 | 194 |  | 
 | 195 | 	if (lp->flag & LP_VALID_RHZ) { | 
 | 196 | 		owd = | 
 | 197 | 		    tp->rx_opt.rcv_tsval * (LP_RESOL / lp->remote_hz) - | 
 | 198 | 		    tp->rx_opt.rcv_tsecr * (LP_RESOL / HZ); | 
 | 199 | 		if (owd < 0) | 
 | 200 | 			owd = -owd; | 
 | 201 | 	} | 
 | 202 |  | 
 | 203 | 	if (owd > 0) | 
 | 204 | 		lp->flag |= LP_VALID_OWD; | 
 | 205 | 	else | 
 | 206 | 		lp->flag &= ~LP_VALID_OWD; | 
 | 207 |  | 
 | 208 | 	return owd; | 
 | 209 | } | 
 | 210 |  | 
 | 211 | /** | 
 | 212 |  * tcp_lp_rtt_sample | 
 | 213 |  * | 
 | 214 |  * Implementation or rtt_sample. | 
 | 215 |  * Will take the following action, | 
 | 216 |  *   1. calc OWD, | 
 | 217 |  *   2. record the min/max OWD, | 
 | 218 |  *   3. calc smoothed OWD (SOWD). | 
 | 219 |  * Most ideas come from the original TCP-LP implementation. | 
 | 220 |  */ | 
 | 221 | static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt) | 
 | 222 | { | 
 | 223 | 	struct lp *lp = inet_csk_ca(sk); | 
 | 224 | 	s64 mowd = tcp_lp_owd_calculator(sk); | 
 | 225 |  | 
 | 226 | 	/* sorry that we don't have valid data */ | 
 | 227 | 	if (!(lp->flag & LP_VALID_RHZ) || !(lp->flag & LP_VALID_OWD)) | 
 | 228 | 		return; | 
 | 229 |  | 
 | 230 | 	/* record the next min owd */ | 
 | 231 | 	if (mowd < lp->owd_min) | 
 | 232 | 		lp->owd_min = mowd; | 
 | 233 |  | 
 | 234 | 	/* always forget the max of the max | 
 | 235 | 	 * we just set owd_max as one below it */ | 
 | 236 | 	if (mowd > lp->owd_max) { | 
 | 237 | 		if (mowd > lp->owd_max_rsv) { | 
 | 238 | 			if (lp->owd_max_rsv == 0) | 
 | 239 | 				lp->owd_max = mowd; | 
 | 240 | 			else | 
 | 241 | 				lp->owd_max = lp->owd_max_rsv; | 
 | 242 | 			lp->owd_max_rsv = mowd; | 
 | 243 | 		} else | 
 | 244 | 			lp->owd_max = mowd; | 
 | 245 | 	} | 
 | 246 |  | 
 | 247 | 	/* calc for smoothed owd */ | 
 | 248 | 	if (lp->sowd != 0) { | 
 | 249 | 		mowd -= lp->sowd >> 3;	/* m is now error in owd est */ | 
 | 250 | 		lp->sowd += mowd;	/* owd = 7/8 owd + 1/8 new */ | 
 | 251 | 	} else | 
 | 252 | 		lp->sowd = mowd << 3;	/* take the measured time be owd */ | 
 | 253 | } | 
 | 254 |  | 
 | 255 | /** | 
 | 256 |  * tcp_lp_pkts_acked | 
 | 257 |  * | 
 | 258 |  * Implementation of pkts_acked. | 
 | 259 |  * Deal with active drop under Early Congestion Indication. | 
 | 260 |  * Only drop to half and 1 will be handle, because we hope to use back | 
 | 261 |  * newReno in increase case. | 
 | 262 |  * We work it out by following the idea from TCP-LP's paper directly | 
 | 263 |  */ | 
 | 264 | static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked) | 
 | 265 | { | 
 | 266 | 	struct tcp_sock *tp = tcp_sk(sk); | 
 | 267 | 	struct lp *lp = inet_csk_ca(sk); | 
 | 268 |  | 
 | 269 | 	/* calc inference */ | 
 | 270 | 	if (tcp_time_stamp > tp->rx_opt.rcv_tsecr) | 
 | 271 | 		lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr); | 
 | 272 |  | 
 | 273 | 	/* test if within inference */ | 
 | 274 | 	if (lp->last_drop && (tcp_time_stamp - lp->last_drop < lp->inference)) | 
 | 275 | 		lp->flag |= LP_WITHIN_INF; | 
 | 276 | 	else | 
 | 277 | 		lp->flag &= ~LP_WITHIN_INF; | 
 | 278 |  | 
 | 279 | 	/* test if within threshold */ | 
 | 280 | 	if (lp->sowd >> 3 < | 
 | 281 | 	    lp->owd_min + 15 * (lp->owd_max - lp->owd_min) / 100) | 
 | 282 | 		lp->flag |= LP_WITHIN_THR; | 
 | 283 | 	else | 
 | 284 | 		lp->flag &= ~LP_WITHIN_THR; | 
 | 285 |  | 
 | 286 | 	pr_debug("TCP-LP: %05o|%5u|%5u|%15u|%15u|%15u\n", lp->flag, | 
 | 287 | 		 tp->snd_cwnd, lp->remote_hz, lp->owd_min, lp->owd_max, | 
 | 288 | 		 lp->sowd >> 3); | 
 | 289 |  | 
 | 290 | 	if (lp->flag & LP_WITHIN_THR) | 
 | 291 | 		return; | 
 | 292 |  | 
 | 293 | 	/* FIXME: try to reset owd_min and owd_max here | 
 | 294 | 	 * so decrease the chance the min/max is no longer suitable | 
 | 295 | 	 * and will usually within threshold when whithin inference */ | 
 | 296 | 	lp->owd_min = lp->sowd >> 3; | 
 | 297 | 	lp->owd_max = lp->sowd >> 2; | 
 | 298 | 	lp->owd_max_rsv = lp->sowd >> 2; | 
 | 299 |  | 
 | 300 | 	/* happened within inference | 
 | 301 | 	 * drop snd_cwnd into 1 */ | 
 | 302 | 	if (lp->flag & LP_WITHIN_INF) | 
 | 303 | 		tp->snd_cwnd = 1U; | 
 | 304 |  | 
 | 305 | 	/* happened after inference | 
 | 306 | 	 * cut snd_cwnd into half */ | 
 | 307 | 	else | 
 | 308 | 		tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U); | 
 | 309 |  | 
 | 310 | 	/* record this drop time */ | 
 | 311 | 	lp->last_drop = tcp_time_stamp; | 
 | 312 | } | 
 | 313 |  | 
 | 314 | static struct tcp_congestion_ops tcp_lp = { | 
 | 315 | 	.init = tcp_lp_init, | 
 | 316 | 	.ssthresh = tcp_reno_ssthresh, | 
 | 317 | 	.cong_avoid = tcp_lp_cong_avoid, | 
 | 318 | 	.min_cwnd = tcp_reno_min_cwnd, | 
 | 319 | 	.rtt_sample = tcp_lp_rtt_sample, | 
 | 320 | 	.pkts_acked = tcp_lp_pkts_acked, | 
 | 321 |  | 
 | 322 | 	.owner = THIS_MODULE, | 
 | 323 | 	.name = "lp" | 
 | 324 | }; | 
 | 325 |  | 
 | 326 | static int __init tcp_lp_register(void) | 
 | 327 | { | 
| Alexey Dobriyan | 2989697 | 2006-08-25 00:37:24 -0700 | [diff] [blame] | 328 | 	BUILD_BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); | 
| Wong Hoi Sing Edison | 7c106d7 | 2006-06-05 17:27:58 -0700 | [diff] [blame] | 329 | 	return tcp_register_congestion_control(&tcp_lp); | 
 | 330 | } | 
 | 331 |  | 
 | 332 | static void __exit tcp_lp_unregister(void) | 
 | 333 | { | 
 | 334 | 	tcp_unregister_congestion_control(&tcp_lp); | 
 | 335 | } | 
 | 336 |  | 
 | 337 | module_init(tcp_lp_register); | 
 | 338 | module_exit(tcp_lp_unregister); | 
 | 339 |  | 
| Wong Hoi Sing Edison | 3795da4 | 2006-09-13 20:30:30 -0700 | [diff] [blame] | 340 | MODULE_AUTHOR("Wong Hoi Sing Edison, Hung Hing Lun Mike"); | 
| Wong Hoi Sing Edison | 7c106d7 | 2006-06-05 17:27:58 -0700 | [diff] [blame] | 341 | MODULE_LICENSE("GPL"); | 
 | 342 | MODULE_DESCRIPTION("TCP Low Priority"); |