| Aurelien Jacquiot | 09831ca | 2011-10-04 11:15:51 -0400 | [diff] [blame] | 1 | ; | 
|  | 2 | ;  linux/arch/c6x/lib/csum_64plus.s | 
|  | 3 | ; | 
|  | 4 | ;  Port on Texas Instruments TMS320C6x architecture | 
|  | 5 | ; | 
|  | 6 | ;  Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated | 
|  | 7 | ;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) | 
|  | 8 | ; | 
|  | 9 | ;  This program is free software; you can redistribute it and/or modify | 
|  | 10 | ;  it under the terms of the GNU General Public License version 2 as | 
|  | 11 | ;  published by the Free Software Foundation. | 
|  | 12 | ; | 
|  | 13 | #include <linux/linkage.h> | 
|  | 14 |  | 
|  | 15 | ; | 
|  | 16 | ;unsigned int csum_partial_copy(const char *src, char * dst, | 
|  | 17 | ;				int len, int sum) | 
|  | 18 | ; | 
|  | 19 | ; A4:	src | 
|  | 20 | ; B4:	dst | 
|  | 21 | ; A6:	len | 
|  | 22 | ; B6:	sum | 
|  | 23 | ; return csum in A4 | 
|  | 24 | ; | 
|  | 25 |  | 
|  | 26 | .text | 
|  | 27 | ENTRY(csum_partial_copy) | 
|  | 28 | MVC	.S2	ILC,B30 | 
|  | 29 |  | 
|  | 30 | MV	.D1X	B6,A31		; given csum | 
|  | 31 | ZERO	.D1	A9		; csum (a side) | 
|  | 32 | ||	ZERO	.D2	B9		; csum (b side) | 
|  | 33 | ||	SHRU	.S2X	A6,2,B5		; len / 4 | 
|  | 34 |  | 
|  | 35 | ;; Check alignment and size | 
|  | 36 | AND	.S1	3,A4,A1 | 
|  | 37 | ||	AND	.S2	3,B4,B0 | 
|  | 38 | OR	.L2X	B0,A1,B0	; non aligned condition | 
|  | 39 | ||	MVC	.S2	B5,ILC | 
|  | 40 | ||	MVK	.D2	1,B2 | 
|  | 41 | ||	MV	.D1X	B5,A1		; words condition | 
|  | 42 | [!A1]	B	.S1	L8 | 
|  | 43 | [B0] BNOP	.S1	L6,5 | 
|  | 44 |  | 
|  | 45 | SPLOOP		1 | 
|  | 46 |  | 
|  | 47 | ;; Main loop for aligned words | 
|  | 48 | LDW	.D1T1	*A4++,A7 | 
|  | 49 | NOP	4 | 
|  | 50 | MV	.S2X	A7,B7 | 
|  | 51 | ||	EXTU	.S1	A7,0,16,A16 | 
|  | 52 | STW	.D2T2	B7,*B4++ | 
|  | 53 | ||	MPYU	.M2	B7,B2,B8 | 
|  | 54 | ||	ADD	.L1	A16,A9,A9 | 
|  | 55 | NOP | 
|  | 56 | SPKERNEL	8,0 | 
|  | 57 | ||	ADD	.L2	B8,B9,B9 | 
|  | 58 |  | 
|  | 59 | ZERO	.D1	A1 | 
|  | 60 | ||	ADD	.L1X	A9,B9,A9	;  add csum from a and b sides | 
|  | 61 |  | 
|  | 62 | L6: | 
|  | 63 | [!A1]	BNOP	.S1	L8,5 | 
|  | 64 |  | 
|  | 65 | ;; Main loop for non-aligned words | 
|  | 66 | SPLOOP		2 | 
|  | 67 | ||	MVK	.L1	1,A2 | 
|  | 68 |  | 
|  | 69 | LDNW	.D1T1	*A4++,A7 | 
|  | 70 | NOP		3 | 
|  | 71 |  | 
|  | 72 | NOP | 
|  | 73 | MV	.S2X	A7,B7 | 
|  | 74 | ||	EXTU	.S1	A7,0,16,A16 | 
|  | 75 | ||	MPYU	.M1	A7,A2,A8 | 
|  | 76 |  | 
|  | 77 | ADD	.L1	A16,A9,A9 | 
|  | 78 | SPKERNEL	6,0 | 
|  | 79 | ||	STNW	.D2T2	B7,*B4++ | 
|  | 80 | ||	ADD	.L1	A8,A9,A9 | 
|  | 81 |  | 
|  | 82 | L8:	AND	.S2X	2,A6,B5 | 
|  | 83 | CMPGT	.L2	B5,0,B0 | 
|  | 84 | [!B0]	BNOP	.S1	L82,4 | 
|  | 85 |  | 
|  | 86 | ;; Manage half-word | 
|  | 87 | ZERO	.L1	A7 | 
|  | 88 | ||	ZERO	.D1	A8 | 
|  | 89 |  | 
|  | 90 | #ifdef CONFIG_CPU_BIG_ENDIAN | 
|  | 91 |  | 
|  | 92 | LDBU	.D1T1	*A4++,A7 | 
|  | 93 | LDBU	.D1T1	*A4++,A8 | 
|  | 94 | NOP		3 | 
|  | 95 | SHL	.S1	A7,8,A0 | 
|  | 96 | ADD	.S1	A8,A9,A9 | 
|  | 97 | STB	.D2T1	A7,*B4++ | 
|  | 98 | ||	ADD	.S1	A0,A9,A9 | 
|  | 99 | STB	.D2T1	A8,*B4++ | 
|  | 100 |  | 
|  | 101 | #else | 
|  | 102 |  | 
|  | 103 | LDBU	.D1T1	*A4++,A7 | 
|  | 104 | LDBU	.D1T1	*A4++,A8 | 
|  | 105 | NOP		3 | 
|  | 106 | ADD	.S1	A7,A9,A9 | 
|  | 107 | SHL	.S1	A8,8,A0 | 
|  | 108 |  | 
|  | 109 | STB	.D2T1	A7,*B4++ | 
|  | 110 | ||	ADD	.S1	A0,A9,A9 | 
|  | 111 | STB	.D2T1	A8,*B4++ | 
|  | 112 |  | 
|  | 113 | #endif | 
|  | 114 |  | 
|  | 115 | ;; Manage eventually the last byte | 
|  | 116 | L82:	AND	.S2X	1,A6,B0 | 
|  | 117 | [!B0]	BNOP	.S1	L9,5 | 
|  | 118 |  | 
|  | 119 | ||	ZERO	.L1	A7 | 
|  | 120 |  | 
|  | 121 | L83:	LDBU	.D1T1	*A4++,A7 | 
|  | 122 | NOP		4 | 
|  | 123 |  | 
|  | 124 | MV	.L2X	A7,B7 | 
|  | 125 |  | 
|  | 126 | #ifdef CONFIG_CPU_BIG_ENDIAN | 
|  | 127 |  | 
|  | 128 | STB	.D2T2	B7,*B4++ | 
|  | 129 | ||	SHL	.S1	A7,8,A7 | 
|  | 130 | ADD	.S1	A7,A9,A9 | 
|  | 131 |  | 
|  | 132 | #else | 
|  | 133 |  | 
|  | 134 | STB	.D2T2	B7,*B4++ | 
|  | 135 | ||	ADD	.S1	A7,A9,A9 | 
|  | 136 |  | 
|  | 137 | #endif | 
|  | 138 |  | 
|  | 139 | ;; Fold the csum | 
|  | 140 | L9:	SHRU	.S2X	A9,16,B0 | 
|  | 141 | [!B0]	BNOP	.S1	L10,5 | 
|  | 142 |  | 
|  | 143 | L91:	SHRU	.S2X	A9,16,B4 | 
|  | 144 | ||	EXTU	.S1	A9,16,16,A3 | 
|  | 145 | ADD	.D1X	A3,B4,A9 | 
|  | 146 |  | 
|  | 147 | SHRU	.S1	A9,16,A0 | 
|  | 148 | [A0]	BNOP	.S1	L91,5 | 
|  | 149 |  | 
|  | 150 | L10:	ADD	.D1	A31,A9,A9 | 
|  | 151 | MV	.D1	A9,A4 | 
|  | 152 |  | 
|  | 153 | BNOP	.S2	B3,4 | 
|  | 154 | MVC	.S2	B30,ILC | 
|  | 155 | ENDPROC(csum_partial_copy) | 
|  | 156 |  | 
|  | 157 | ; | 
|  | 158 | ;unsigned short | 
|  | 159 | ;ip_fast_csum(unsigned char *iph, unsigned int ihl) | 
|  | 160 | ;{ | 
|  | 161 | ;	unsigned int checksum = 0; | 
|  | 162 | ;	unsigned short *tosum = (unsigned short *) iph; | 
|  | 163 | ;	int len; | 
|  | 164 | ; | 
|  | 165 | ;	len = ihl*4; | 
|  | 166 | ; | 
|  | 167 | ;	if (len <= 0) | 
|  | 168 | ;		return 0; | 
|  | 169 | ; | 
|  | 170 | ;	while(len) { | 
|  | 171 | ;		len -= 2; | 
|  | 172 | ;		checksum += *tosum++; | 
|  | 173 | ;	} | 
|  | 174 | ;	if (len & 1) | 
|  | 175 | ;		checksum += *(unsigned char*) tosum; | 
|  | 176 | ; | 
|  | 177 | ;	while(checksum >> 16) | 
|  | 178 | ;		checksum = (checksum & 0xffff) + (checksum >> 16); | 
|  | 179 | ; | 
|  | 180 | ;	return ~checksum; | 
|  | 181 | ;} | 
|  | 182 | ; | 
|  | 183 | ; A4:	iph | 
|  | 184 | ; B4:	ihl | 
|  | 185 | ; return checksum in A4 | 
|  | 186 | ; | 
|  | 187 | .text | 
|  | 188 |  | 
|  | 189 | ENTRY(ip_fast_csum) | 
|  | 190 | ZERO	.D1	A5 | 
|  | 191 | ||	MVC	.S2	ILC,B30 | 
|  | 192 | SHL	.S2	B4,2,B0 | 
|  | 193 | CMPGT	.L2	B0,0,B1 | 
|  | 194 | [!B1] BNOP	.S1	L15,4 | 
|  | 195 | [!B1]	ZERO	.D1	A3 | 
|  | 196 |  | 
|  | 197 | [!B0]	B	.S1	L12 | 
|  | 198 | SHRU	.S2	B0,1,B0 | 
|  | 199 | MVC	.S2	B0,ILC | 
|  | 200 | NOP	3 | 
|  | 201 |  | 
|  | 202 | SPLOOP	1 | 
|  | 203 | LDHU	.D1T1	*A4++,A3 | 
|  | 204 | NOP	3 | 
|  | 205 | NOP | 
|  | 206 | SPKERNEL	5,0 | 
|  | 207 | ||	ADD	.L1	A3,A5,A5 | 
|  | 208 |  | 
|  | 209 | L12:	SHRU	.S1	A5,16,A0 | 
|  | 210 | [!A0]	BNOP	.S1	L14,5 | 
|  | 211 |  | 
|  | 212 | L13:	SHRU	.S2X	A5,16,B4 | 
|  | 213 | EXTU	.S1	A5,16,16,A3 | 
|  | 214 | ADD	.D1X	A3,B4,A5 | 
|  | 215 | SHRU	.S1	A5,16,A0 | 
|  | 216 | [A0]	BNOP	.S1	L13,5 | 
|  | 217 |  | 
|  | 218 | L14:	NOT	.D1	A5,A3 | 
|  | 219 | EXTU	.S1	A3,16,16,A3 | 
|  | 220 |  | 
|  | 221 | L15:	BNOP	.S2	B3,3 | 
|  | 222 | MVC	.S2	B30,ILC | 
|  | 223 | MV	.D1	A3,A4 | 
|  | 224 | ENDPROC(ip_fast_csum) | 
|  | 225 |  | 
|  | 226 | ; | 
|  | 227 | ;unsigned short | 
|  | 228 | ;do_csum(unsigned char *buff, unsigned int len) | 
|  | 229 | ;{ | 
|  | 230 | ;	int odd, count; | 
|  | 231 | ;	unsigned int result = 0; | 
|  | 232 | ; | 
|  | 233 | ;	if (len <= 0) | 
|  | 234 | ;		goto out; | 
|  | 235 | ;	odd = 1 & (unsigned long) buff; | 
|  | 236 | ;	if (odd) { | 
|  | 237 | ;#ifdef __LITTLE_ENDIAN | 
|  | 238 | ;		result += (*buff << 8); | 
|  | 239 | ;#else | 
|  | 240 | ;		result = *buff; | 
|  | 241 | ;#endif | 
|  | 242 | ;		len--; | 
|  | 243 | ;		buff++; | 
|  | 244 | ;	} | 
|  | 245 | ;	count = len >> 1;		/* nr of 16-bit words.. */ | 
|  | 246 | ;	if (count) { | 
|  | 247 | ;		if (2 & (unsigned long) buff) { | 
|  | 248 | ;			result += *(unsigned short *) buff; | 
|  | 249 | ;			count--; | 
|  | 250 | ;			len -= 2; | 
|  | 251 | ;			buff += 2; | 
|  | 252 | ;		} | 
|  | 253 | ;		count >>= 1;		/* nr of 32-bit words.. */ | 
|  | 254 | ;		if (count) { | 
|  | 255 | ;			unsigned int carry = 0; | 
|  | 256 | ;			do { | 
|  | 257 | ;				unsigned int w = *(unsigned int *) buff; | 
|  | 258 | ;				count--; | 
|  | 259 | ;				buff += 4; | 
|  | 260 | ;				result += carry; | 
|  | 261 | ;				result += w; | 
|  | 262 | ;				carry = (w > result); | 
|  | 263 | ;			} while (count); | 
|  | 264 | ;			result += carry; | 
|  | 265 | ;			result = (result & 0xffff) + (result >> 16); | 
|  | 266 | ;		} | 
|  | 267 | ;		if (len & 2) { | 
|  | 268 | ;			result += *(unsigned short *) buff; | 
|  | 269 | ;			buff += 2; | 
|  | 270 | ;		} | 
|  | 271 | ;	} | 
|  | 272 | ;	if (len & 1) | 
|  | 273 | ;#ifdef __LITTLE_ENDIAN | 
|  | 274 | ;		result += *buff; | 
|  | 275 | ;#else | 
|  | 276 | ;		result += (*buff << 8); | 
|  | 277 | ;#endif | 
|  | 278 | ;	result = (result & 0xffff) + (result >> 16); | 
|  | 279 | ;	/* add up carry.. */ | 
|  | 280 | ;	result = (result & 0xffff) + (result >> 16); | 
|  | 281 | ;	if (odd) | 
|  | 282 | ;		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); | 
|  | 283 | ;out: | 
|  | 284 | ;	return result; | 
|  | 285 | ;} | 
|  | 286 | ; | 
|  | 287 | ; A4:	buff | 
|  | 288 | ; B4:	len | 
|  | 289 | ; return checksum in A4 | 
|  | 290 | ; | 
|  | 291 |  | 
|  | 292 | ENTRY(do_csum) | 
|  | 293 | CMPGT   .L2	   B4,0,B0 | 
|  | 294 | [!B0]   BNOP    .S1	   L26,3 | 
|  | 295 | EXTU    .S1	   A4,31,31,A0 | 
|  | 296 |  | 
|  | 297 | MV	   .L1	   A0,A3 | 
|  | 298 | ||	   MV	   .S1X    B3,A5 | 
|  | 299 | ||	   MV	   .L2	   B4,B3 | 
|  | 300 | ||	   ZERO    .D1	   A1 | 
|  | 301 |  | 
|  | 302 | #ifdef CONFIG_CPU_BIG_ENDIAN | 
|  | 303 | [A0]    SUB	   .L2	   B3,1,B3 | 
|  | 304 | || [A0]    LDBU    .D1T1   *A4++,A1 | 
|  | 305 | #else | 
|  | 306 | [!A0]   BNOP    .S1	   L21,5 | 
|  | 307 | || [A0]    LDBU    .D1T1   *A4++,A0 | 
|  | 308 | SUB	   .L2	   B3,1,B3 | 
|  | 309 | ||	   SHL	   .S1	   A0,8,A1 | 
|  | 310 | L21: | 
|  | 311 | #endif | 
|  | 312 | SHR	   .S2	   B3,1,B0 | 
|  | 313 | [!B0]   BNOP    .S1	   L24,3 | 
|  | 314 | MVK	   .L1	   2,A0 | 
|  | 315 | AND	   .L1	   A4,A0,A0 | 
|  | 316 |  | 
|  | 317 | [!A0]   BNOP    .S1	   L22,5 | 
|  | 318 | || [A0]    LDHU    .D1T1   *A4++,A0 | 
|  | 319 | SUB	   .L2	   B0,1,B0 | 
|  | 320 | ||	   SUB	   .S2	   B3,2,B3 | 
|  | 321 | ||	   ADD	   .L1	   A0,A1,A1 | 
|  | 322 | L22: | 
|  | 323 | SHR	   .S2	   B0,1,B0 | 
|  | 324 | ||	   ZERO    .L1	   A0 | 
|  | 325 |  | 
|  | 326 | [!B0]   BNOP    .S1	   L23,5 | 
|  | 327 | || [B0]    MVC	   .S2	   B0,ILC | 
|  | 328 |  | 
|  | 329 | SPLOOP  3 | 
|  | 330 | SPMASK  L1 | 
|  | 331 | ||	   MV	   .L1	   A1,A2 | 
|  | 332 | ||	   LDW	   .D1T1   *A4++,A1 | 
|  | 333 |  | 
|  | 334 | NOP	   4 | 
|  | 335 | ADD	   .L1	   A0,A1,A0 | 
|  | 336 | ADD	   .L1	   A2,A0,A2 | 
|  | 337 |  | 
|  | 338 | SPKERNEL 1,2 | 
|  | 339 | ||	   CMPGTU  .L1	   A1,A2,A0 | 
|  | 340 |  | 
|  | 341 | ADD	   .L1	   A0,A2,A6 | 
|  | 342 | EXTU    .S1	   A6,16,16,A7 | 
|  | 343 | SHRU    .S2X    A6,16,B0 | 
|  | 344 | NOP		   1 | 
|  | 345 | ADD	   .L1X    A7,B0,A1 | 
|  | 346 | L23: | 
|  | 347 | MVK	   .L2	   2,B0 | 
|  | 348 | AND	   .L2	   B3,B0,B0 | 
|  | 349 | [B0]    LDHU    .D1T1   *A4++,A0 | 
|  | 350 | NOP	   4 | 
|  | 351 | [B0]    ADD	   .L1	   A0,A1,A1 | 
|  | 352 | L24: | 
|  | 353 | EXTU    .S2	   B3,31,31,B0 | 
|  | 354 | #ifdef CONFIG_CPU_BIG_ENDIAN | 
|  | 355 | [!B0]   BNOP    .S1	   L25,4 | 
|  | 356 | || [B0]    LDBU    .D1T1   *A4,A0 | 
|  | 357 | SHL	   .S1	   A0,8,A0 | 
|  | 358 | ADD	   .L1	   A0,A1,A1 | 
|  | 359 | L25: | 
|  | 360 | #else | 
|  | 361 | [B0]    LDBU    .D1T1   *A4,A0 | 
|  | 362 | NOP	   4 | 
|  | 363 | [B0]    ADD	   .L1	   A0,A1,A1 | 
|  | 364 | #endif | 
|  | 365 | EXTU    .S1	   A1,16,16,A0 | 
|  | 366 | SHRU    .S2X    A1,16,B0 | 
|  | 367 | NOP	   1 | 
|  | 368 | ADD	   .L1X    A0,B0,A0 | 
|  | 369 | SHRU    .S1	   A0,16,A1 | 
|  | 370 | ADD	   .L1	   A0,A1,A0 | 
|  | 371 | EXTU    .S1	   A0,16,16,A1 | 
|  | 372 | EXTU    .S1	   A1,16,24,A2 | 
|  | 373 |  | 
|  | 374 | EXTU    .S1	   A1,24,16,A0 | 
|  | 375 | ||	   MV	   .L2X    A3,B0 | 
|  | 376 |  | 
|  | 377 | [B0]    OR	   .L1	   A0,A2,A1 | 
|  | 378 | L26: | 
|  | 379 | NOP	   1 | 
|  | 380 | BNOP    .S2X    A5,4 | 
|  | 381 | MV	   .L1	   A1,A4 | 
|  | 382 | ENDPROC(do_csum) | 
|  | 383 |  | 
|  | 384 | ;__wsum csum_partial(const void *buff, int len, __wsum wsum) | 
|  | 385 | ;{ | 
|  | 386 | ;	unsigned int sum = (__force unsigned int)wsum; | 
|  | 387 | ;	unsigned int result = do_csum(buff, len); | 
|  | 388 | ; | 
|  | 389 | ;	/* add in old sum, and carry.. */ | 
|  | 390 | ;	result += sum; | 
|  | 391 | ;	if (sum > result) | 
|  | 392 | ;		result += 1; | 
|  | 393 | ;	return (__force __wsum)result; | 
|  | 394 | ;} | 
|  | 395 | ; | 
|  | 396 | ENTRY(csum_partial) | 
|  | 397 | MV	   .L1X    B3,A9 | 
|  | 398 | ||	   CALLP   .S2	   do_csum,B3 | 
|  | 399 | ||	   MV	   .S1	   A6,A8 | 
|  | 400 | BNOP    .S2X    A9,2 | 
|  | 401 | ADD	   .L1	   A8,A4,A1 | 
|  | 402 | CMPGTU  .L1	   A8,A1,A0 | 
|  | 403 | ADD	   .L1	   A1,A0,A4 | 
|  | 404 | ENDPROC(csum_partial) | 
|  | 405 |  | 
|  | 406 | ;unsigned short | 
|  | 407 | ;ip_compute_csum(unsigned char *buff, unsigned int len) | 
|  | 408 | ; | 
|  | 409 | ; A4:	buff | 
|  | 410 | ; B4:	len | 
|  | 411 | ; return checksum in A4 | 
|  | 412 |  | 
|  | 413 | ENTRY(ip_compute_csum) | 
|  | 414 | MV	   .L1X    B3,A9 | 
|  | 415 | ||	   CALLP   .S2	   do_csum,B3 | 
|  | 416 | BNOP    .S2X    A9,3 | 
|  | 417 | NOT	   .S1	   A4,A4 | 
|  | 418 | CLR     .S1	   A4,16,31,A4 | 
|  | 419 | ENDPROC(ip_compute_csum) |