| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 1 | /* Optimised simple memory checksum | 
 | 2 |  * | 
 | 3 |  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | 
 | 4 |  * Written by David Howells (dhowells@redhat.com) | 
 | 5 |  * | 
 | 6 |  * This program is free software; you can redistribute it and/or | 
 | 7 |  * modify it under the terms of the GNU General Public Licence | 
 | 8 |  * as published by the Free Software Foundation; either version | 
 | 9 |  * 2 of the Licence, or (at your option) any later version. | 
 | 10 |  */ | 
 | 11 | #include <asm/cache.h> | 
 | 12 |  | 
| Akira Takeuchi | ab244c1 | 2010-10-27 17:28:53 +0100 | [diff] [blame] | 13 | 	.section .text | 
 | 14 | 	.balign	L1_CACHE_BYTES | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 15 |  | 
 | 16 | ############################################################################### | 
 | 17 | # | 
| Akira Takeuchi | ab244c1 | 2010-10-27 17:28:53 +0100 | [diff] [blame] | 18 | # unsigned int do_csum(const unsigned char *buff, int len) | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 19 | # | 
 | 20 | ############################################################################### | 
 | 21 | 	.globl	do_csum | 
| Akira Takeuchi | ab244c1 | 2010-10-27 17:28:53 +0100 | [diff] [blame] | 22 | 	.type	do_csum,@function | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 23 | do_csum: | 
 | 24 | 	movm	[d2,d3],(sp) | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 25 | 	mov	d1,d2				# count | 
 | 26 | 	mov	d0,a0				# buff | 
| Akira Takeuchi | ab244c1 | 2010-10-27 17:28:53 +0100 | [diff] [blame] | 27 | 	mov	a0,a1 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 28 | 	clr	d1				# accumulator | 
 | 29 |  | 
 | 30 | 	cmp	+0,d2 | 
| Akira Takeuchi | ab244c1 | 2010-10-27 17:28:53 +0100 | [diff] [blame] | 31 | 	ble	do_csum_done			# check for zero length or negative | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 32 |  | 
 | 33 | 	# 4-byte align the buffer pointer | 
 | 34 | 	btst	+3,a0 | 
 | 35 | 	beq	do_csum_now_4b_aligned | 
 | 36 |  | 
 | 37 | 	btst	+1,a0 | 
 | 38 | 	beq	do_csum_addr_not_odd | 
 | 39 | 	movbu	(a0),d0 | 
 | 40 | 	inc	a0 | 
 | 41 | 	asl	+8,d0 | 
 | 42 | 	add	d0,d1 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 43 | 	add	-1,d2 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 44 |  | 
| Akira Takeuchi | ab244c1 | 2010-10-27 17:28:53 +0100 | [diff] [blame] | 45 | do_csum_addr_not_odd: | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 46 | 	cmp	+2,d2 | 
 | 47 | 	bcs	do_csum_fewer_than_4 | 
 | 48 | 	btst	+2,a0 | 
 | 49 | 	beq	do_csum_now_4b_aligned | 
 | 50 | 	movhu	(a0+),d0 | 
 | 51 | 	add	d0,d1 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 52 | 	add	-2,d2 | 
 | 53 | 	cmp	+4,d2 | 
 | 54 | 	bcs	do_csum_fewer_than_4 | 
 | 55 |  | 
 | 56 | do_csum_now_4b_aligned: | 
 | 57 | 	# we want to checksum as much as we can in chunks of 32 bytes | 
 | 58 | 	cmp	+31,d2 | 
 | 59 | 	bls	do_csum_remainder		# 4-byte aligned remainder | 
 | 60 |  | 
 | 61 | 	add	-32,d2 | 
 | 62 | 	mov	+32,d3 | 
 | 63 |  | 
 | 64 | do_csum_loop: | 
 | 65 | 	mov	(a0+),d0 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 66 | 	mov	(a0+),e0 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 67 | 	mov	(a0+),e1 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 68 | 	mov	(a0+),e3 | 
| Akira Takeuchi | ab244c1 | 2010-10-27 17:28:53 +0100 | [diff] [blame] | 69 | 	add	d0,d1 | 
 | 70 | 	addc	e0,d1 | 
 | 71 | 	addc	e1,d1 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 72 | 	addc	e3,d1 | 
 | 73 | 	mov	(a0+),d0 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 74 | 	mov	(a0+),e0 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 75 | 	mov	(a0+),e1 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 76 | 	mov	(a0+),e3 | 
| Akira Takeuchi | ab244c1 | 2010-10-27 17:28:53 +0100 | [diff] [blame] | 77 | 	addc	d0,d1 | 
 | 78 | 	addc	e0,d1 | 
 | 79 | 	addc	e1,d1 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 80 | 	addc	e3,d1 | 
 | 81 | 	addc	+0,d1 | 
 | 82 |  | 
 | 83 | 	sub	d3,d2 | 
 | 84 | 	bcc	do_csum_loop | 
 | 85 |  | 
 | 86 | 	add	d3,d2 | 
 | 87 | 	beq	do_csum_done | 
 | 88 |  | 
 | 89 | do_csum_remainder: | 
 | 90 | 	# cut 16-31 bytes down to 0-15 | 
 | 91 | 	cmp	+16,d2 | 
 | 92 | 	bcs	do_csum_fewer_than_16 | 
 | 93 | 	mov	(a0+),d0 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 94 | 	mov	(a0+),e0 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 95 | 	mov	(a0+),e1 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 96 | 	mov	(a0+),e3 | 
| Akira Takeuchi | ab244c1 | 2010-10-27 17:28:53 +0100 | [diff] [blame] | 97 | 	add	d0,d1 | 
 | 98 | 	addc	e0,d1 | 
 | 99 | 	addc	e1,d1 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 100 | 	addc	e3,d1 | 
 | 101 | 	addc	+0,d1 | 
 | 102 | 	add	-16,d2 | 
 | 103 | 	beq	do_csum_done | 
 | 104 |  | 
 | 105 | do_csum_fewer_than_16: | 
 | 106 | 	# copy the remaining whole words | 
 | 107 | 	cmp	+4,d2 | 
 | 108 | 	bcs	do_csum_fewer_than_4 | 
 | 109 | 	cmp	+8,d2 | 
 | 110 | 	bcs	do_csum_one_word | 
 | 111 | 	cmp	+12,d2 | 
 | 112 | 	bcs	do_csum_two_words | 
 | 113 | 	mov	(a0+),d0 | 
 | 114 | 	add	d0,d1 | 
 | 115 | 	addc	+0,d1 | 
 | 116 | do_csum_two_words: | 
 | 117 | 	mov	(a0+),d0 | 
 | 118 | 	add	d0,d1 | 
 | 119 | 	addc	+0,d1 | 
 | 120 | do_csum_one_word: | 
 | 121 | 	mov	(a0+),d0 | 
 | 122 | 	add	d0,d1 | 
 | 123 | 	addc	+0,d1 | 
 | 124 |  | 
 | 125 | do_csum_fewer_than_4: | 
 | 126 | 	and	+3,d2 | 
 | 127 | 	beq	do_csum_done | 
 | 128 | 	xor_cmp	d0,d0,+2,d2 | 
 | 129 | 	bcs	do_csum_fewer_than_2 | 
 | 130 | 	movhu	(a0+),d0 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 131 | 	and	+1,d2 | 
 | 132 | 	beq	do_csum_add_last_bit | 
| Akira Takeuchi | ab244c1 | 2010-10-27 17:28:53 +0100 | [diff] [blame] | 133 | do_csum_fewer_than_2: | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 134 | 	movbu	(a0),d3 | 
 | 135 | 	add	d3,d0 | 
 | 136 | do_csum_add_last_bit: | 
 | 137 | 	add	d0,d1 | 
 | 138 | 	addc	+0,d1 | 
 | 139 |  | 
 | 140 | do_csum_done: | 
 | 141 | 	# compress the checksum down to 16 bits | 
| Akira Takeuchi | ab244c1 | 2010-10-27 17:28:53 +0100 | [diff] [blame] | 142 | 	mov	+0xffff0000,d0 | 
 | 143 | 	and	d1,d0 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 144 | 	asl	+16,d1 | 
| Akira Takeuchi | ab244c1 | 2010-10-27 17:28:53 +0100 | [diff] [blame] | 145 | 	add	d1,d0 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 146 | 	addc	+0xffff,d0 | 
 | 147 | 	lsr	+16,d0 | 
 | 148 |  | 
 | 149 | 	# flip the halves of the word result if the buffer was oddly aligned | 
| Akira Takeuchi | ab244c1 | 2010-10-27 17:28:53 +0100 | [diff] [blame] | 150 | 	and	+1,a1 | 
| David Howells | b920de1 | 2008-02-08 04:19:31 -0800 | [diff] [blame] | 151 | 	beq	do_csum_not_oddly_aligned | 
 | 152 | 	swaph	d0,d0				# exchange bits 15:8 with 7:0 | 
 | 153 |  | 
 | 154 | do_csum_not_oddly_aligned: | 
 | 155 | 	ret	[d2,d3],8 | 
 | 156 |  | 
| Akira Takeuchi | ab244c1 | 2010-10-27 17:28:53 +0100 | [diff] [blame] | 157 | 	.size	do_csum, .-do_csum |