| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * Copyright 2002,2003 Andi Kleen, SuSE Labs. | 
 | 3 |  *	 | 
 | 4 |  * This file is subject to the terms and conditions of the GNU General Public | 
 | 5 |  * License.  See the file COPYING in the main directory of this archive | 
 | 6 |  * for more details. No warranty for anything given at all. | 
 | 7 |  */ | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 8 | #include <linux/linkage.h> | 
 | 9 | #include <asm/dwarf2.h> | 
 | 10 | #include <asm/errno.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 11 |  | 
 | 12 | /* | 
 | 13 |  * Checksum copy with exception handling. | 
 | 14 |  * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the  | 
 | 15 |  * destination is zeroed. | 
 | 16 |  *  | 
 | 17 |  * Input | 
 | 18 |  * rdi  source | 
 | 19 |  * rsi  destination | 
 | 20 |  * edx  len (32bit) | 
 | 21 |  * ecx  sum (32bit)  | 
 | 22 |  * r8   src_err_ptr (int) | 
 | 23 |  * r9   dst_err_ptr (int) | 
 | 24 |  * | 
 | 25 |  * Output | 
 | 26 |  * eax  64bit sum. undefined in case of exception. | 
 | 27 |  *  | 
 | 28 |  * Wrappers need to take care of valid exception sum and zeroing.		  | 
 | 29 |  * They also should align source or destination to 8 bytes. | 
 | 30 |  */ | 
 | 31 |  | 
 | 32 | 	.macro source | 
 | 33 | 10: | 
 | 34 | 	.section __ex_table,"a" | 
 | 35 | 	.align 8 | 
 | 36 | 	.quad 10b,.Lbad_source | 
 | 37 | 	.previous | 
 | 38 | 	.endm | 
 | 39 | 		 | 
 | 40 | 	.macro dest | 
 | 41 | 20: | 
 | 42 | 	.section __ex_table,"a" | 
 | 43 | 	.align 8 | 
 | 44 | 	.quad 20b,.Lbad_dest | 
 | 45 | 	.previous | 
 | 46 | 	.endm | 
 | 47 | 			 | 
 | 48 | 	.macro ignore L=.Lignore | 
 | 49 | 30: | 
 | 50 | 	.section __ex_table,"a" | 
 | 51 | 	.align 8 | 
 | 52 | 	.quad 30b,\L | 
 | 53 | 	.previous | 
 | 54 | 	.endm | 
 | 55 | 	 | 
 | 56 | 				 | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 57 | ENTRY(csum_partial_copy_generic) | 
 | 58 | 	CFI_STARTPROC | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 59 | 	cmpl	 $3*64,%edx | 
 | 60 | 	jle	 .Lignore | 
 | 61 |  | 
 | 62 | .Lignore:		 | 
 | 63 | 	subq  $7*8,%rsp | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 64 | 	CFI_ADJUST_CFA_OFFSET 7*8 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 65 | 	movq  %rbx,2*8(%rsp) | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 66 | 	CFI_REL_OFFSET rbx, 2*8 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 67 | 	movq  %r12,3*8(%rsp) | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 68 | 	CFI_REL_OFFSET r12, 3*8 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 69 | 	movq  %r14,4*8(%rsp) | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 70 | 	CFI_REL_OFFSET r14, 4*8 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 71 | 	movq  %r13,5*8(%rsp) | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 72 | 	CFI_REL_OFFSET r13, 5*8 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 73 | 	movq  %rbp,6*8(%rsp) | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 74 | 	CFI_REL_OFFSET rbp, 6*8 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 75 |  | 
 | 76 | 	movq  %r8,(%rsp) | 
 | 77 | 	movq  %r9,1*8(%rsp) | 
 | 78 | 	 | 
 | 79 | 	movl  %ecx,%eax | 
 | 80 | 	movl  %edx,%ecx | 
 | 81 |  | 
 | 82 | 	xorl  %r9d,%r9d | 
 | 83 | 	movq  %rcx,%r12 | 
 | 84 |  | 
 | 85 | 	shrq  $6,%r12 | 
 | 86 | 	jz    .Lhandle_tail       /* < 64 */ | 
 | 87 |  | 
 | 88 | 	clc | 
 | 89 | 	 | 
 | 90 | 	/* main loop. clear in 64 byte blocks */ | 
 | 91 | 	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ | 
 | 92 | 	/* r11:	temp3, rdx: temp4, r12 loopcnt */ | 
 | 93 | 	/* r10:	temp5, rbp: temp6, r14 temp7, r13 temp8 */ | 
 | 94 | 	.p2align 4 | 
 | 95 | .Lloop: | 
 | 96 | 	source | 
 | 97 | 	movq  (%rdi),%rbx | 
 | 98 | 	source | 
 | 99 | 	movq  8(%rdi),%r8 | 
 | 100 | 	source | 
 | 101 | 	movq  16(%rdi),%r11 | 
 | 102 | 	source | 
 | 103 | 	movq  24(%rdi),%rdx | 
 | 104 |  | 
 | 105 | 	source | 
 | 106 | 	movq  32(%rdi),%r10 | 
 | 107 | 	source | 
 | 108 | 	movq  40(%rdi),%rbp | 
 | 109 | 	source | 
 | 110 | 	movq  48(%rdi),%r14 | 
 | 111 | 	source | 
 | 112 | 	movq  56(%rdi),%r13 | 
 | 113 | 		 | 
 | 114 | 	ignore 2f | 
 | 115 | 	prefetcht0 5*64(%rdi) | 
 | 116 | 2:							 | 
 | 117 | 	adcq  %rbx,%rax | 
 | 118 | 	adcq  %r8,%rax | 
 | 119 | 	adcq  %r11,%rax | 
 | 120 | 	adcq  %rdx,%rax | 
 | 121 | 	adcq  %r10,%rax | 
 | 122 | 	adcq  %rbp,%rax | 
 | 123 | 	adcq  %r14,%rax | 
 | 124 | 	adcq  %r13,%rax | 
 | 125 |  | 
 | 126 | 	decl %r12d | 
 | 127 | 	 | 
 | 128 | 	dest | 
 | 129 | 	movq %rbx,(%rsi) | 
 | 130 | 	dest | 
 | 131 | 	movq %r8,8(%rsi) | 
 | 132 | 	dest | 
 | 133 | 	movq %r11,16(%rsi) | 
 | 134 | 	dest | 
 | 135 | 	movq %rdx,24(%rsi) | 
 | 136 |  | 
 | 137 | 	dest | 
 | 138 | 	movq %r10,32(%rsi) | 
 | 139 | 	dest | 
 | 140 | 	movq %rbp,40(%rsi) | 
 | 141 | 	dest | 
 | 142 | 	movq %r14,48(%rsi) | 
 | 143 | 	dest | 
 | 144 | 	movq %r13,56(%rsi) | 
 | 145 | 	 | 
 | 146 | 3: | 
 | 147 | 	 | 
 | 148 | 	leaq 64(%rdi),%rdi | 
 | 149 | 	leaq 64(%rsi),%rsi | 
 | 150 |  | 
 | 151 | 	jnz   .Lloop | 
 | 152 |  | 
 | 153 | 	adcq  %r9,%rax | 
 | 154 |  | 
 | 155 | 	/* do last upto 56 bytes */ | 
 | 156 | .Lhandle_tail: | 
 | 157 | 	/* ecx:	count */ | 
 | 158 | 	movl %ecx,%r10d | 
 | 159 | 	andl $63,%ecx | 
 | 160 | 	shrl $3,%ecx | 
 | 161 | 	jz 	 .Lfold | 
 | 162 | 	clc | 
 | 163 | 	.p2align 4 | 
 | 164 | .Lloop_8:	 | 
 | 165 | 	source | 
 | 166 | 	movq (%rdi),%rbx | 
 | 167 | 	adcq %rbx,%rax | 
 | 168 | 	decl %ecx | 
 | 169 | 	dest | 
 | 170 | 	movq %rbx,(%rsi) | 
 | 171 | 	leaq 8(%rsi),%rsi /* preserve carry */ | 
 | 172 | 	leaq 8(%rdi),%rdi | 
 | 173 | 	jnz	.Lloop_8 | 
 | 174 | 	adcq %r9,%rax	/* add in carry */ | 
 | 175 |  | 
 | 176 | .Lfold: | 
 | 177 | 	/* reduce checksum to 32bits */ | 
 | 178 | 	movl %eax,%ebx | 
 | 179 | 	shrq $32,%rax | 
 | 180 | 	addl %ebx,%eax | 
 | 181 | 	adcl %r9d,%eax | 
 | 182 |  | 
 | 183 | 	/* do last upto 6 bytes */	 | 
 | 184 | .Lhandle_7: | 
 | 185 | 	movl %r10d,%ecx | 
 | 186 | 	andl $7,%ecx | 
 | 187 | 	shrl $1,%ecx | 
 | 188 | 	jz   .Lhandle_1 | 
 | 189 | 	movl $2,%edx | 
 | 190 | 	xorl %ebx,%ebx | 
 | 191 | 	clc   | 
 | 192 | 	.p2align 4 | 
 | 193 | .Lloop_1:	 | 
 | 194 | 	source | 
 | 195 | 	movw (%rdi),%bx | 
 | 196 | 	adcl %ebx,%eax | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 197 | 	decl %ecx | 
| Dave Peterson | 92ed022 | 2005-07-29 22:59:20 -0700 | [diff] [blame] | 198 | 	dest | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 199 | 	movw %bx,(%rsi) | 
 | 200 | 	leaq 2(%rdi),%rdi | 
 | 201 | 	leaq 2(%rsi),%rsi | 
 | 202 | 	jnz .Lloop_1 | 
 | 203 | 	adcl %r9d,%eax	/* add in carry */ | 
 | 204 | 	 | 
 | 205 | 	/* handle last odd byte */ | 
 | 206 | .Lhandle_1: | 
 | 207 | 	testl $1,%r10d | 
 | 208 | 	jz    .Lende | 
 | 209 | 	xorl  %ebx,%ebx | 
 | 210 | 	source | 
 | 211 | 	movb (%rdi),%bl | 
 | 212 | 	dest | 
 | 213 | 	movb %bl,(%rsi) | 
 | 214 | 	addl %ebx,%eax | 
 | 215 | 	adcl %r9d,%eax		/* carry */ | 
 | 216 | 			 | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 217 | 	CFI_REMEMBER_STATE | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 218 | .Lende: | 
 | 219 | 	movq 2*8(%rsp),%rbx | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 220 | 	CFI_RESTORE rbx | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 221 | 	movq 3*8(%rsp),%r12 | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 222 | 	CFI_RESTORE r12 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 223 | 	movq 4*8(%rsp),%r14 | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 224 | 	CFI_RESTORE r14 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 225 | 	movq 5*8(%rsp),%r13 | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 226 | 	CFI_RESTORE r13 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 227 | 	movq 6*8(%rsp),%rbp | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 228 | 	CFI_RESTORE rbp | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 229 | 	addq $7*8,%rsp | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 230 | 	CFI_ADJUST_CFA_OFFSET -7*8 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 231 | 	ret | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 232 | 	CFI_RESTORE_STATE | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 233 |  | 
 | 234 | 	/* Exception handlers. Very simple, zeroing is done in the wrappers */ | 
 | 235 | .Lbad_source: | 
 | 236 | 	movq (%rsp),%rax | 
 | 237 | 	testq %rax,%rax | 
 | 238 | 	jz   .Lende | 
 | 239 | 	movl $-EFAULT,(%rax) | 
 | 240 | 	jmp  .Lende | 
 | 241 | 	 | 
 | 242 | .Lbad_dest: | 
 | 243 | 	movq 8(%rsp),%rax | 
 | 244 | 	testq %rax,%rax | 
 | 245 | 	jz   .Lende	 | 
 | 246 | 	movl $-EFAULT,(%rax) | 
 | 247 | 	jmp .Lende | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 248 | 	CFI_ENDPROC | 
 | 249 | ENDPROC(csum_partial_copy_generic) |