| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright 2002,2003 Andi Kleen, SuSE Labs. | 
|  | 3 | * | 
|  | 4 | * This file is subject to the terms and conditions of the GNU General Public | 
|  | 5 | * License.  See the file COPYING in the main directory of this archive | 
|  | 6 | * for more details. No warranty for anything given at all. | 
|  | 7 | */ | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 8 | #include <linux/linkage.h> | 
|  | 9 | #include <asm/dwarf2.h> | 
|  | 10 | #include <asm/errno.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 11 |  | 
|  | 12 | /* | 
|  | 13 | * Checksum copy with exception handling. | 
|  | 14 | * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the | 
|  | 15 | * destination is zeroed. | 
|  | 16 | * | 
|  | 17 | * Input | 
|  | 18 | * rdi  source | 
|  | 19 | * rsi  destination | 
|  | 20 | * edx  len (32bit) | 
|  | 21 | * ecx  sum (32bit) | 
|  | 22 | * r8   src_err_ptr (int) | 
|  | 23 | * r9   dst_err_ptr (int) | 
|  | 24 | * | 
|  | 25 | * Output | 
|  | 26 | * eax  64bit sum. undefined in case of exception. | 
|  | 27 | * | 
|  | 28 | * Wrappers need to take care of valid exception sum and zeroing. | 
|  | 29 | * They also should align source or destination to 8 bytes. | 
|  | 30 | */ | 
|  | 31 |  | 
|  | 32 | .macro source | 
|  | 33 | 10: | 
|  | 34 | .section __ex_table,"a" | 
|  | 35 | .align 8 | 
|  | 36 | .quad 10b,.Lbad_source | 
|  | 37 | .previous | 
|  | 38 | .endm | 
|  | 39 |  | 
|  | 40 | .macro dest | 
|  | 41 | 20: | 
|  | 42 | .section __ex_table,"a" | 
|  | 43 | .align 8 | 
|  | 44 | .quad 20b,.Lbad_dest | 
|  | 45 | .previous | 
|  | 46 | .endm | 
|  | 47 |  | 
|  | 48 | .macro ignore L=.Lignore | 
|  | 49 | 30: | 
|  | 50 | .section __ex_table,"a" | 
|  | 51 | .align 8 | 
|  | 52 | .quad 30b,\L | 
|  | 53 | .previous | 
|  | 54 | .endm | 
|  | 55 |  | 
|  | 56 |  | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 57 | ENTRY(csum_partial_copy_generic) | 
|  | 58 | CFI_STARTPROC | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 59 | cmpl	 $3*64,%edx | 
|  | 60 | jle	 .Lignore | 
|  | 61 |  | 
|  | 62 | .Lignore: | 
|  | 63 | subq  $7*8,%rsp | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 64 | CFI_ADJUST_CFA_OFFSET 7*8 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 65 | movq  %rbx,2*8(%rsp) | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 66 | CFI_REL_OFFSET rbx, 2*8 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 67 | movq  %r12,3*8(%rsp) | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 68 | CFI_REL_OFFSET r12, 3*8 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 69 | movq  %r14,4*8(%rsp) | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 70 | CFI_REL_OFFSET r14, 4*8 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 71 | movq  %r13,5*8(%rsp) | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 72 | CFI_REL_OFFSET r13, 5*8 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 73 | movq  %rbp,6*8(%rsp) | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 74 | CFI_REL_OFFSET rbp, 6*8 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 75 |  | 
|  | 76 | movq  %r8,(%rsp) | 
|  | 77 | movq  %r9,1*8(%rsp) | 
|  | 78 |  | 
|  | 79 | movl  %ecx,%eax | 
|  | 80 | movl  %edx,%ecx | 
|  | 81 |  | 
|  | 82 | xorl  %r9d,%r9d | 
|  | 83 | movq  %rcx,%r12 | 
|  | 84 |  | 
|  | 85 | shrq  $6,%r12 | 
|  | 86 | jz    .Lhandle_tail       /* < 64 */ | 
|  | 87 |  | 
|  | 88 | clc | 
|  | 89 |  | 
|  | 90 | /* main loop. clear in 64 byte blocks */ | 
|  | 91 | /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ | 
|  | 92 | /* r11:	temp3, rdx: temp4, r12 loopcnt */ | 
|  | 93 | /* r10:	temp5, rbp: temp6, r14 temp7, r13 temp8 */ | 
|  | 94 | .p2align 4 | 
|  | 95 | .Lloop: | 
|  | 96 | source | 
|  | 97 | movq  (%rdi),%rbx | 
|  | 98 | source | 
|  | 99 | movq  8(%rdi),%r8 | 
|  | 100 | source | 
|  | 101 | movq  16(%rdi),%r11 | 
|  | 102 | source | 
|  | 103 | movq  24(%rdi),%rdx | 
|  | 104 |  | 
|  | 105 | source | 
|  | 106 | movq  32(%rdi),%r10 | 
|  | 107 | source | 
|  | 108 | movq  40(%rdi),%rbp | 
|  | 109 | source | 
|  | 110 | movq  48(%rdi),%r14 | 
|  | 111 | source | 
|  | 112 | movq  56(%rdi),%r13 | 
|  | 113 |  | 
|  | 114 | ignore 2f | 
|  | 115 | prefetcht0 5*64(%rdi) | 
|  | 116 | 2: | 
|  | 117 | adcq  %rbx,%rax | 
|  | 118 | adcq  %r8,%rax | 
|  | 119 | adcq  %r11,%rax | 
|  | 120 | adcq  %rdx,%rax | 
|  | 121 | adcq  %r10,%rax | 
|  | 122 | adcq  %rbp,%rax | 
|  | 123 | adcq  %r14,%rax | 
|  | 124 | adcq  %r13,%rax | 
|  | 125 |  | 
|  | 126 | decl %r12d | 
|  | 127 |  | 
|  | 128 | dest | 
|  | 129 | movq %rbx,(%rsi) | 
|  | 130 | dest | 
|  | 131 | movq %r8,8(%rsi) | 
|  | 132 | dest | 
|  | 133 | movq %r11,16(%rsi) | 
|  | 134 | dest | 
|  | 135 | movq %rdx,24(%rsi) | 
|  | 136 |  | 
|  | 137 | dest | 
|  | 138 | movq %r10,32(%rsi) | 
|  | 139 | dest | 
|  | 140 | movq %rbp,40(%rsi) | 
|  | 141 | dest | 
|  | 142 | movq %r14,48(%rsi) | 
|  | 143 | dest | 
|  | 144 | movq %r13,56(%rsi) | 
|  | 145 |  | 
|  | 146 | 3: | 
|  | 147 |  | 
|  | 148 | leaq 64(%rdi),%rdi | 
|  | 149 | leaq 64(%rsi),%rsi | 
|  | 150 |  | 
|  | 151 | jnz   .Lloop | 
|  | 152 |  | 
|  | 153 | adcq  %r9,%rax | 
|  | 154 |  | 
|  | 155 | /* do last upto 56 bytes */ | 
|  | 156 | .Lhandle_tail: | 
|  | 157 | /* ecx:	count */ | 
|  | 158 | movl %ecx,%r10d | 
|  | 159 | andl $63,%ecx | 
|  | 160 | shrl $3,%ecx | 
|  | 161 | jz 	 .Lfold | 
|  | 162 | clc | 
|  | 163 | .p2align 4 | 
|  | 164 | .Lloop_8: | 
|  | 165 | source | 
|  | 166 | movq (%rdi),%rbx | 
|  | 167 | adcq %rbx,%rax | 
|  | 168 | decl %ecx | 
|  | 169 | dest | 
|  | 170 | movq %rbx,(%rsi) | 
|  | 171 | leaq 8(%rsi),%rsi /* preserve carry */ | 
|  | 172 | leaq 8(%rdi),%rdi | 
|  | 173 | jnz	.Lloop_8 | 
|  | 174 | adcq %r9,%rax	/* add in carry */ | 
|  | 175 |  | 
|  | 176 | .Lfold: | 
|  | 177 | /* reduce checksum to 32bits */ | 
|  | 178 | movl %eax,%ebx | 
|  | 179 | shrq $32,%rax | 
|  | 180 | addl %ebx,%eax | 
|  | 181 | adcl %r9d,%eax | 
|  | 182 |  | 
|  | 183 | /* do last upto 6 bytes */ | 
|  | 184 | .Lhandle_7: | 
|  | 185 | movl %r10d,%ecx | 
|  | 186 | andl $7,%ecx | 
|  | 187 | shrl $1,%ecx | 
|  | 188 | jz   .Lhandle_1 | 
|  | 189 | movl $2,%edx | 
|  | 190 | xorl %ebx,%ebx | 
|  | 191 | clc | 
|  | 192 | .p2align 4 | 
|  | 193 | .Lloop_1: | 
|  | 194 | source | 
|  | 195 | movw (%rdi),%bx | 
|  | 196 | adcl %ebx,%eax | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 197 | decl %ecx | 
| Dave Peterson | 92ed022 | 2005-07-29 22:59:20 -0700 | [diff] [blame] | 198 | dest | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 199 | movw %bx,(%rsi) | 
|  | 200 | leaq 2(%rdi),%rdi | 
|  | 201 | leaq 2(%rsi),%rsi | 
|  | 202 | jnz .Lloop_1 | 
|  | 203 | adcl %r9d,%eax	/* add in carry */ | 
|  | 204 |  | 
|  | 205 | /* handle last odd byte */ | 
|  | 206 | .Lhandle_1: | 
|  | 207 | testl $1,%r10d | 
|  | 208 | jz    .Lende | 
|  | 209 | xorl  %ebx,%ebx | 
|  | 210 | source | 
|  | 211 | movb (%rdi),%bl | 
|  | 212 | dest | 
|  | 213 | movb %bl,(%rsi) | 
|  | 214 | addl %ebx,%eax | 
|  | 215 | adcl %r9d,%eax		/* carry */ | 
|  | 216 |  | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 217 | CFI_REMEMBER_STATE | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 218 | .Lende: | 
|  | 219 | movq 2*8(%rsp),%rbx | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 220 | CFI_RESTORE rbx | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 221 | movq 3*8(%rsp),%r12 | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 222 | CFI_RESTORE r12 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 223 | movq 4*8(%rsp),%r14 | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 224 | CFI_RESTORE r14 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 225 | movq 5*8(%rsp),%r13 | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 226 | CFI_RESTORE r13 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 227 | movq 6*8(%rsp),%rbp | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 228 | CFI_RESTORE rbp | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 229 | addq $7*8,%rsp | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 230 | CFI_ADJUST_CFA_OFFSET -7*8 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 231 | ret | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 232 | CFI_RESTORE_STATE | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 233 |  | 
|  | 234 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ | 
|  | 235 | .Lbad_source: | 
|  | 236 | movq (%rsp),%rax | 
|  | 237 | testq %rax,%rax | 
|  | 238 | jz   .Lende | 
|  | 239 | movl $-EFAULT,(%rax) | 
|  | 240 | jmp  .Lende | 
|  | 241 |  | 
|  | 242 | .Lbad_dest: | 
|  | 243 | movq 8(%rsp),%rax | 
|  | 244 | testq %rax,%rax | 
|  | 245 | jz   .Lende | 
|  | 246 | movl $-EFAULT,(%rax) | 
|  | 247 | jmp .Lende | 
| Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 248 | CFI_ENDPROC | 
|  | 249 | ENDPROC(csum_partial_copy_generic) |