| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * arch/x86_64/lib/csum-partial.c | 
 | 3 |  * | 
 | 4 |  * This file contains network checksum routines that are better done | 
 | 5 |  * in an architecture-specific manner due to speed. | 
 | 6 |  */ | 
 | 7 |   | 
 | 8 | #include <linux/compiler.h> | 
 | 9 | #include <linux/module.h> | 
 | 10 | #include <asm/checksum.h> | 
 | 11 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 12 | static inline unsigned short from32to16(unsigned a)  | 
 | 13 | { | 
 | 14 | 	unsigned short b = a >> 16;  | 
 | 15 | 	asm("addw %w2,%w0\n\t" | 
 | 16 | 	    "adcw $0,%w0\n"  | 
 | 17 | 	    : "=r" (b) | 
 | 18 | 	    : "0" (b), "r" (a)); | 
 | 19 | 	return b; | 
 | 20 | } | 
 | 21 |  | 
 | 22 | /* | 
 | 23 |  * Do a 64-bit checksum on an arbitrary memory area. | 
 | 24 |  * Returns a 32bit checksum. | 
 | 25 |  * | 
 | 26 |  * This isn't as time critical as it used to be because many NICs | 
 | 27 |  * do hardware checksumming these days. | 
 | 28 |  *  | 
 | 29 |  * Things tried and found to not make it faster: | 
 | 30 |  * Manual Prefetching | 
 | 31 |  * Unrolling to an 128 bytes inner loop. | 
 | 32 |  * Using interleaving with more registers to break the carry chains. | 
 | 33 |  */ | 
| Andi Kleen | b6bcc4b | 2006-12-07 02:14:07 +0100 | [diff] [blame] | 34 | static unsigned do_csum(const unsigned char *buff, unsigned len) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 35 | { | 
 | 36 | 	unsigned odd, count; | 
 | 37 | 	unsigned long result = 0; | 
 | 38 |  | 
 | 39 | 	if (unlikely(len == 0)) | 
 | 40 | 		return result;  | 
 | 41 | 	odd = 1 & (unsigned long) buff; | 
 | 42 | 	if (unlikely(odd)) { | 
 | 43 | 		result = *buff << 8; | 
 | 44 | 		len--; | 
 | 45 | 		buff++; | 
 | 46 | 	} | 
 | 47 | 	count = len >> 1;		/* nr of 16-bit words.. */ | 
 | 48 | 	if (count) { | 
 | 49 | 		if (2 & (unsigned long) buff) { | 
 | 50 | 			result += *(unsigned short *)buff; | 
 | 51 | 			count--; | 
 | 52 | 			len -= 2; | 
 | 53 | 			buff += 2; | 
 | 54 | 		} | 
 | 55 | 		count >>= 1;		/* nr of 32-bit words.. */ | 
 | 56 | 		if (count) { | 
 | 57 | 			unsigned long zero; | 
 | 58 | 			unsigned count64; | 
 | 59 | 			if (4 & (unsigned long) buff) { | 
 | 60 | 				result += *(unsigned int *) buff; | 
 | 61 | 				count--; | 
 | 62 | 				len -= 4; | 
 | 63 | 				buff += 4; | 
 | 64 | 			} | 
 | 65 | 			count >>= 1;	/* nr of 64-bit words.. */ | 
 | 66 |  | 
 | 67 | 			/* main loop using 64byte blocks */ | 
 | 68 | 			zero = 0; | 
 | 69 | 			count64 = count >> 3; | 
 | 70 | 			while (count64) {  | 
 | 71 | 				asm("addq 0*8(%[src]),%[res]\n\t" | 
 | 72 | 				    "adcq 1*8(%[src]),%[res]\n\t" | 
 | 73 | 				    "adcq 2*8(%[src]),%[res]\n\t" | 
 | 74 | 				    "adcq 3*8(%[src]),%[res]\n\t" | 
 | 75 | 				    "adcq 4*8(%[src]),%[res]\n\t" | 
 | 76 | 				    "adcq 5*8(%[src]),%[res]\n\t" | 
 | 77 | 				    "adcq 6*8(%[src]),%[res]\n\t" | 
 | 78 | 				    "adcq 7*8(%[src]),%[res]\n\t" | 
 | 79 | 				    "adcq %[zero],%[res]" | 
 | 80 | 				    : [res] "=r" (result) | 
 | 81 | 				    : [src] "r" (buff), [zero] "r" (zero), | 
 | 82 | 				    "[res]" (result)); | 
 | 83 | 				buff += 64; | 
 | 84 | 				count64--; | 
 | 85 | 			} | 
 | 86 |  | 
 | 87 | 			/* last upto 7 8byte blocks */ | 
 | 88 | 			count %= 8;  | 
 | 89 | 			while (count) {  | 
 | 90 | 				asm("addq %1,%0\n\t" | 
 | 91 | 				    "adcq %2,%0\n"  | 
 | 92 | 					    : "=r" (result) | 
 | 93 | 				    : "m" (*(unsigned long *)buff),  | 
 | 94 | 				    "r" (zero),  "0" (result)); | 
 | 95 | 				--count;  | 
 | 96 | 					buff += 8; | 
 | 97 | 			} | 
 | 98 | 			result = add32_with_carry(result>>32, | 
 | 99 | 						  result&0xffffffff);  | 
 | 100 |  | 
 | 101 | 			if (len & 4) { | 
 | 102 | 				result += *(unsigned int *) buff; | 
 | 103 | 				buff += 4; | 
 | 104 | 			} | 
 | 105 | 		} | 
 | 106 | 		if (len & 2) { | 
 | 107 | 			result += *(unsigned short *) buff; | 
 | 108 | 			buff += 2; | 
 | 109 | 		} | 
 | 110 | 	} | 
 | 111 | 	if (len & 1) | 
 | 112 | 		result += *buff; | 
 | 113 | 	result = add32_with_carry(result>>32, result & 0xffffffff);  | 
 | 114 | 	if (unlikely(odd)) {  | 
 | 115 | 		result = from32to16(result); | 
 | 116 | 		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); | 
 | 117 | 	} | 
 | 118 | 	return result; | 
 | 119 | } | 
 | 120 |  | 
 | 121 | /* | 
 | 122 |  * computes the checksum of a memory block at buff, length len, | 
 | 123 |  * and adds in "sum" (32-bit) | 
 | 124 |  * | 
 | 125 |  * returns a 32-bit number suitable for feeding into itself | 
 | 126 |  * or csum_tcpudp_magic | 
 | 127 |  * | 
 | 128 |  * this function must be called with even lengths, except | 
 | 129 |  * for the last fragment, which may be odd | 
 | 130 |  * | 
 | 131 |  * it's best to have buff aligned on a 64-bit boundary | 
 | 132 |  */ | 
| Al Viro | a4f89fb | 2006-11-14 21:20:08 -0800 | [diff] [blame] | 133 | __wsum csum_partial(const void *buff, int len, __wsum sum) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 134 | { | 
| Al Viro | a4f89fb | 2006-11-14 21:20:08 -0800 | [diff] [blame] | 135 | 	return (__force __wsum)add32_with_carry(do_csum(buff, len), | 
 | 136 | 						(__force u32)sum); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 137 | } | 
 | 138 |  | 
 | 139 | EXPORT_SYMBOL(csum_partial); | 
 | 140 |  | 
 | 141 | /* | 
 | 142 |  * this routine is used for miscellaneous IP-like checksums, mainly | 
 | 143 |  * in icmp.c | 
 | 144 |  */ | 
| Al Viro | a4f89fb | 2006-11-14 21:20:08 -0800 | [diff] [blame] | 145 | __sum16 ip_compute_csum(const void *buff, int len) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 146 | { | 
 | 147 | 	return csum_fold(csum_partial(buff,len,0)); | 
 | 148 | } | 
| Andi Kleen | 2ee60e17 | 2006-06-26 13:59:44 +0200 | [diff] [blame] | 149 | EXPORT_SYMBOL(ip_compute_csum); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 150 |  |