| Arnaldo Carvalho de Melo | 5324a04 | 2005-08-12 09:26:18 -0300 | [diff] [blame] | 1 | /* | 
 | 2 |  * INET		An implementation of the TCP/IP protocol suite for the LINUX | 
 | 3 |  *		operating system.  INET is implemented using the BSD Socket | 
 | 4 |  *		interface as the means of communication with the user level. | 
 | 5 |  * | 
 | 6 |  *		Generic INET6 transport hashtables | 
 | 7 |  * | 
| Arnaldo Carvalho de Melo | d8313f5 | 2005-12-13 23:25:44 -0800 | [diff] [blame] | 8 |  * Authors:	Lotsa people, from code originally in tcp, generalised here | 
 | 9 |  * 		by Arnaldo Carvalho de Melo <acme@mandriva.com> | 
| Arnaldo Carvalho de Melo | 5324a04 | 2005-08-12 09:26:18 -0300 | [diff] [blame] | 10 |  * | 
 | 11 |  *	This program is free software; you can redistribute it and/or | 
 | 12 |  *      modify it under the terms of the GNU General Public License | 
 | 13 |  *      as published by the Free Software Foundation; either version | 
 | 14 |  *      2 of the License, or (at your option) any later version. | 
 | 15 |  */ | 
 | 16 |  | 
 | 17 | #include <linux/config.h> | 
| Arnaldo Carvalho de Melo | 5324a04 | 2005-08-12 09:26:18 -0300 | [diff] [blame] | 18 | #include <linux/module.h> | 
| Arnaldo Carvalho de Melo | d8313f5 | 2005-12-13 23:25:44 -0800 | [diff] [blame] | 19 | #include <linux/random.h> | 
| Arnaldo Carvalho de Melo | 5324a04 | 2005-08-12 09:26:18 -0300 | [diff] [blame] | 20 |  | 
 | 21 | #include <net/inet_connection_sock.h> | 
 | 22 | #include <net/inet_hashtables.h> | 
 | 23 | #include <net/inet6_hashtables.h> | 
| Arnaldo Carvalho de Melo | d8313f5 | 2005-12-13 23:25:44 -0800 | [diff] [blame] | 24 | #include <net/ip.h> | 
| Arnaldo Carvalho de Melo | 5324a04 | 2005-08-12 09:26:18 -0300 | [diff] [blame] | 25 |  | 
| Denis Vlasenko | b1a7ffc | 2006-04-09 22:48:59 -0700 | [diff] [blame] | 26 | void __inet6_hash(struct inet_hashinfo *hashinfo, | 
 | 27 | 				struct sock *sk) | 
 | 28 | { | 
 | 29 | 	struct hlist_head *list; | 
 | 30 | 	rwlock_t *lock; | 
 | 31 |  | 
 | 32 | 	BUG_TRAP(sk_unhashed(sk)); | 
 | 33 |  | 
 | 34 | 	if (sk->sk_state == TCP_LISTEN) { | 
 | 35 | 		list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; | 
 | 36 | 		lock = &hashinfo->lhash_lock; | 
 | 37 | 		inet_listen_wlock(hashinfo); | 
 | 38 | 	} else { | 
 | 39 | 		unsigned int hash; | 
 | 40 | 		sk->sk_hash = hash = inet6_sk_ehashfn(sk); | 
 | 41 | 		hash &= (hashinfo->ehash_size - 1); | 
 | 42 | 		list = &hashinfo->ehash[hash].chain; | 
 | 43 | 		lock = &hashinfo->ehash[hash].lock; | 
 | 44 | 		write_lock(lock); | 
 | 45 | 	} | 
 | 46 |  | 
 | 47 | 	__sk_add_node(sk, list); | 
 | 48 | 	sock_prot_inc_use(sk->sk_prot); | 
 | 49 | 	write_unlock(lock); | 
 | 50 | } | 
 | 51 | EXPORT_SYMBOL(__inet6_hash); | 
 | 52 |  | 
 | 53 | /* | 
 | 54 |  * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so | 
 | 55 |  * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM | 
 | 56 |  * | 
 | 57 |  * The sockhash lock must be held as a reader here. | 
 | 58 |  */ | 
 | 59 | struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, | 
 | 60 | 					   const struct in6_addr *saddr, | 
 | 61 | 					   const u16 sport, | 
 | 62 | 					   const struct in6_addr *daddr, | 
 | 63 | 					   const u16 hnum, | 
 | 64 | 					   const int dif) | 
 | 65 | { | 
 | 66 | 	struct sock *sk; | 
 | 67 | 	const struct hlist_node *node; | 
 | 68 | 	const __u32 ports = INET_COMBINED_PORTS(sport, hnum); | 
 | 69 | 	/* Optimize here for direct hit, only listening connections can | 
 | 70 | 	 * have wildcards anyways. | 
 | 71 | 	 */ | 
 | 72 | 	unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport); | 
 | 73 | 	struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); | 
 | 74 |  | 
 | 75 | 	prefetch(head->chain.first); | 
 | 76 | 	read_lock(&head->lock); | 
 | 77 | 	sk_for_each(sk, node, &head->chain) { | 
 | 78 | 		/* For IPV6 do the cheaper port and family tests first. */ | 
 | 79 | 		if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif)) | 
 | 80 | 			goto hit; /* You sunk my battleship! */ | 
 | 81 | 	} | 
 | 82 | 	/* Must check for a TIME_WAIT'er before going to listener hash. */ | 
 | 83 | 	sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { | 
 | 84 | 		const struct inet_timewait_sock *tw = inet_twsk(sk); | 
 | 85 |  | 
 | 86 | 		if(*((__u32 *)&(tw->tw_dport))	== ports	&& | 
 | 87 | 		   sk->sk_family		== PF_INET6) { | 
 | 88 | 			const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); | 
 | 89 |  | 
 | 90 | 			if (ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	&& | 
 | 91 | 			    ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr)	&& | 
 | 92 | 			    (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) | 
 | 93 | 				goto hit; | 
 | 94 | 		} | 
 | 95 | 	} | 
 | 96 | 	read_unlock(&head->lock); | 
 | 97 | 	return NULL; | 
 | 98 |  | 
 | 99 | hit: | 
 | 100 | 	sock_hold(sk); | 
 | 101 | 	read_unlock(&head->lock); | 
 | 102 | 	return sk; | 
 | 103 | } | 
 | 104 | EXPORT_SYMBOL(__inet6_lookup_established); | 
 | 105 |  | 
| Arnaldo Carvalho de Melo | 5324a04 | 2005-08-12 09:26:18 -0300 | [diff] [blame] | 106 | struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, | 
 | 107 | 				   const struct in6_addr *daddr, | 
 | 108 | 				   const unsigned short hnum, const int dif) | 
 | 109 | { | 
 | 110 | 	struct sock *sk; | 
 | 111 | 	const struct hlist_node *node; | 
 | 112 | 	struct sock *result = NULL; | 
 | 113 | 	int score, hiscore = 0; | 
 | 114 |  | 
 | 115 | 	read_lock(&hashinfo->lhash_lock); | 
 | 116 | 	sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { | 
 | 117 | 		if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { | 
 | 118 | 			const struct ipv6_pinfo *np = inet6_sk(sk); | 
 | 119 | 			 | 
 | 120 | 			score = 1; | 
 | 121 | 			if (!ipv6_addr_any(&np->rcv_saddr)) { | 
 | 122 | 				if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) | 
 | 123 | 					continue; | 
 | 124 | 				score++; | 
 | 125 | 			} | 
 | 126 | 			if (sk->sk_bound_dev_if) { | 
 | 127 | 				if (sk->sk_bound_dev_if != dif) | 
 | 128 | 					continue; | 
 | 129 | 				score++; | 
 | 130 | 			} | 
 | 131 | 			if (score == 3) { | 
 | 132 | 				result = sk; | 
 | 133 | 				break; | 
 | 134 | 			} | 
 | 135 | 			if (score > hiscore) { | 
 | 136 | 				hiscore = score; | 
 | 137 | 				result = sk; | 
 | 138 | 			} | 
 | 139 | 		} | 
 | 140 | 	} | 
 | 141 | 	if (result) | 
 | 142 | 		sock_hold(result); | 
 | 143 | 	read_unlock(&hashinfo->lhash_lock); | 
 | 144 | 	return result; | 
 | 145 | } | 
 | 146 |  | 
 | 147 | EXPORT_SYMBOL_GPL(inet6_lookup_listener); | 
 | 148 |  | 
 | 149 | struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, | 
 | 150 | 			  const struct in6_addr *saddr, const u16 sport, | 
 | 151 | 			  const struct in6_addr *daddr, const u16 dport, | 
 | 152 | 			  const int dif) | 
 | 153 | { | 
 | 154 | 	struct sock *sk; | 
 | 155 |  | 
 | 156 | 	local_bh_disable(); | 
 | 157 | 	sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); | 
 | 158 | 	local_bh_enable(); | 
 | 159 |  | 
 | 160 | 	return sk; | 
 | 161 | } | 
 | 162 |  | 
 | 163 | EXPORT_SYMBOL_GPL(inet6_lookup); | 
| Arnaldo Carvalho de Melo | d8313f5 | 2005-12-13 23:25:44 -0800 | [diff] [blame] | 164 |  | 
 | 165 | static int __inet6_check_established(struct inet_timewait_death_row *death_row, | 
 | 166 | 				     struct sock *sk, const __u16 lport, | 
 | 167 | 				     struct inet_timewait_sock **twp) | 
 | 168 | { | 
 | 169 | 	struct inet_hashinfo *hinfo = death_row->hashinfo; | 
| Herbert Xu | 3759fa9 | 2006-03-13 14:26:12 -0800 | [diff] [blame] | 170 | 	struct inet_sock *inet = inet_sk(sk); | 
| Arnaldo Carvalho de Melo | d8313f5 | 2005-12-13 23:25:44 -0800 | [diff] [blame] | 171 | 	const struct ipv6_pinfo *np = inet6_sk(sk); | 
 | 172 | 	const struct in6_addr *daddr = &np->rcv_saddr; | 
 | 173 | 	const struct in6_addr *saddr = &np->daddr; | 
 | 174 | 	const int dif = sk->sk_bound_dev_if; | 
 | 175 | 	const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); | 
 | 176 | 	const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, | 
 | 177 | 						inet->dport); | 
 | 178 | 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); | 
 | 179 | 	struct sock *sk2; | 
 | 180 | 	const struct hlist_node *node; | 
 | 181 | 	struct inet_timewait_sock *tw; | 
 | 182 |  | 
 | 183 | 	prefetch(head->chain.first); | 
 | 184 | 	write_lock(&head->lock); | 
 | 185 |  | 
 | 186 | 	/* Check TIME-WAIT sockets first. */ | 
 | 187 | 	sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) { | 
 | 188 | 		const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2); | 
 | 189 |  | 
 | 190 | 		tw = inet_twsk(sk2); | 
 | 191 |  | 
 | 192 | 		if(*((__u32 *)&(tw->tw_dport)) == ports		 && | 
 | 193 | 		   sk2->sk_family	       == PF_INET6	 && | 
 | 194 | 		   ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	 && | 
 | 195 | 		   ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && | 
 | 196 | 		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { | 
 | 197 | 			if (twsk_unique(sk, sk2, twp)) | 
 | 198 | 				goto unique; | 
 | 199 | 			else | 
 | 200 | 				goto not_unique; | 
 | 201 | 		} | 
 | 202 | 	} | 
 | 203 | 	tw = NULL; | 
 | 204 |  | 
 | 205 | 	/* And established part... */ | 
 | 206 | 	sk_for_each(sk2, node, &head->chain) { | 
 | 207 | 		if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif)) | 
 | 208 | 			goto not_unique; | 
 | 209 | 	} | 
 | 210 |  | 
 | 211 | unique: | 
| Herbert Xu | 3759fa9 | 2006-03-13 14:26:12 -0800 | [diff] [blame] | 212 | 	/* Must record num and sport now. Otherwise we will see | 
 | 213 | 	 * in hash table socket with a funny identity. */ | 
 | 214 | 	inet->num = lport; | 
 | 215 | 	inet->sport = htons(lport); | 
| Arnaldo Carvalho de Melo | d8313f5 | 2005-12-13 23:25:44 -0800 | [diff] [blame] | 216 | 	BUG_TRAP(sk_unhashed(sk)); | 
 | 217 | 	__sk_add_node(sk, &head->chain); | 
 | 218 | 	sk->sk_hash = hash; | 
 | 219 | 	sock_prot_inc_use(sk->sk_prot); | 
 | 220 | 	write_unlock(&head->lock); | 
 | 221 |  | 
 | 222 | 	if (twp != NULL) { | 
 | 223 | 		*twp = tw; | 
 | 224 | 		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | 
 | 225 | 	} else if (tw != NULL) { | 
 | 226 | 		/* Silly. Should hash-dance instead... */ | 
 | 227 | 		inet_twsk_deschedule(tw, death_row); | 
 | 228 | 		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | 
 | 229 |  | 
 | 230 | 		inet_twsk_put(tw); | 
 | 231 | 	} | 
 | 232 | 	return 0; | 
 | 233 |  | 
 | 234 | not_unique: | 
 | 235 | 	write_unlock(&head->lock); | 
 | 236 | 	return -EADDRNOTAVAIL; | 
 | 237 | } | 
 | 238 |  | 
 | 239 | static inline u32 inet6_sk_port_offset(const struct sock *sk) | 
 | 240 | { | 
 | 241 | 	const struct inet_sock *inet = inet_sk(sk); | 
 | 242 | 	const struct ipv6_pinfo *np = inet6_sk(sk); | 
 | 243 | 	return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32, | 
 | 244 | 					  np->daddr.s6_addr32, | 
 | 245 | 					  inet->dport); | 
 | 246 | } | 
 | 247 |  | 
 | 248 | int inet6_hash_connect(struct inet_timewait_death_row *death_row, | 
 | 249 | 		       struct sock *sk) | 
 | 250 | { | 
 | 251 | 	struct inet_hashinfo *hinfo = death_row->hashinfo; | 
 | 252 | 	const unsigned short snum = inet_sk(sk)->num; | 
 | 253 |  	struct inet_bind_hashbucket *head; | 
 | 254 |  	struct inet_bind_bucket *tb; | 
 | 255 | 	int ret; | 
 | 256 |  | 
 | 257 |  	if (snum == 0) { | 
 | 258 |  		const int low = sysctl_local_port_range[0]; | 
 | 259 |  		const int high = sysctl_local_port_range[1]; | 
 | 260 | 		const int range = high - low; | 
 | 261 |  		int i, port; | 
 | 262 | 		static u32 hint; | 
 | 263 | 		const u32 offset = hint + inet6_sk_port_offset(sk); | 
 | 264 | 		struct hlist_node *node; | 
 | 265 |  		struct inet_timewait_sock *tw = NULL; | 
 | 266 |  | 
 | 267 |  		local_bh_disable(); | 
 | 268 | 		for (i = 1; i <= range; i++) { | 
 | 269 | 			port = low + (i + offset) % range; | 
 | 270 |  			head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; | 
 | 271 |  			spin_lock(&head->lock); | 
 | 272 |  | 
 | 273 |  			/* Does not bother with rcv_saddr checks, | 
 | 274 |  			 * because the established check is already | 
 | 275 |  			 * unique enough. | 
 | 276 |  			 */ | 
 | 277 | 			inet_bind_bucket_for_each(tb, node, &head->chain) { | 
 | 278 |  				if (tb->port == port) { | 
 | 279 |  					BUG_TRAP(!hlist_empty(&tb->owners)); | 
 | 280 |  					if (tb->fastreuse >= 0) | 
 | 281 |  						goto next_port; | 
 | 282 |  					if (!__inet6_check_established(death_row, | 
 | 283 | 								       sk, port, | 
 | 284 | 								       &tw)) | 
 | 285 |  						goto ok; | 
 | 286 |  					goto next_port; | 
 | 287 |  				} | 
 | 288 |  			} | 
 | 289 |  | 
 | 290 |  			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, | 
 | 291 | 						     head, port); | 
 | 292 |  			if (!tb) { | 
 | 293 |  				spin_unlock(&head->lock); | 
 | 294 |  				break; | 
 | 295 |  			} | 
 | 296 |  			tb->fastreuse = -1; | 
 | 297 |  			goto ok; | 
 | 298 |  | 
 | 299 |  		next_port: | 
 | 300 |  			spin_unlock(&head->lock); | 
 | 301 |  		} | 
 | 302 |  		local_bh_enable(); | 
 | 303 |  | 
 | 304 |  		return -EADDRNOTAVAIL; | 
 | 305 |  | 
 | 306 | ok: | 
 | 307 | 		hint += i; | 
 | 308 |  | 
 | 309 |  		/* Head lock still held and bh's disabled */ | 
 | 310 |  		inet_bind_hash(sk, tb, port); | 
 | 311 | 		if (sk_unhashed(sk)) { | 
 | 312 |  			inet_sk(sk)->sport = htons(port); | 
 | 313 |  			__inet6_hash(hinfo, sk); | 
 | 314 |  		} | 
 | 315 |  		spin_unlock(&head->lock); | 
 | 316 |  | 
 | 317 |  		if (tw) { | 
 | 318 |  			inet_twsk_deschedule(tw, death_row); | 
 | 319 |  			inet_twsk_put(tw); | 
 | 320 |  		} | 
 | 321 |  | 
 | 322 | 		ret = 0; | 
 | 323 | 		goto out; | 
 | 324 |  	} | 
 | 325 |  | 
 | 326 |  	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; | 
 | 327 |  	tb   = inet_csk(sk)->icsk_bind_hash; | 
 | 328 | 	spin_lock_bh(&head->lock); | 
 | 329 |  | 
 | 330 | 	if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { | 
 | 331 | 		__inet6_hash(hinfo, sk); | 
 | 332 | 		spin_unlock_bh(&head->lock); | 
 | 333 | 		return 0; | 
 | 334 | 	} else { | 
 | 335 | 		spin_unlock(&head->lock); | 
 | 336 | 		/* No definite answer... Walk to established hash table */ | 
 | 337 | 		ret = __inet6_check_established(death_row, sk, snum, NULL); | 
 | 338 | out: | 
 | 339 | 		local_bh_enable(); | 
 | 340 | 		return ret; | 
 | 341 | 	} | 
 | 342 | } | 
 | 343 |  | 
 | 344 | EXPORT_SYMBOL_GPL(inet6_hash_connect); |