| /* | 
 |  * Copyright 2002, 2003 Andi Kleen, SuSE Labs. | 
 |  * | 
 |  * This file is subject to the terms and conditions of the GNU General Public | 
 |  * License.  See the file COPYING in the main directory of this archive | 
 |  * for more details. No warranty for anything given at all. | 
 |  */ | 
 | #include <linux/linkage.h> | 
 | #include <asm/dwarf2.h> | 
 | #include <asm/errno.h> | 
 |  | 
 | /* | 
 |  * Checksum copy with exception handling. | 
 |  * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the | 
 |  * destination is zeroed. | 
 |  * | 
 |  * Input | 
 |  * rdi  source | 
 |  * rsi  destination | 
 |  * edx  len (32bit) | 
 |  * ecx  sum (32bit) | 
 |  * r8   src_err_ptr (int) | 
 |  * r9   dst_err_ptr (int) | 
 |  * | 
 |  * Output | 
 |  * eax  64bit sum. undefined in case of exception. | 
 |  * | 
 |  * Wrappers need to take care of valid exception sum and zeroing. | 
 |  * They also should align source or destination to 8 bytes. | 
 |  */ | 
 |  | 
 | 	.macro source | 
 | 10: | 
 | 	.section __ex_table, "a" | 
 | 	.align 8 | 
 | 	.quad 10b, .Lbad_source | 
 | 	.previous | 
 | 	.endm | 
 |  | 
 | 	.macro dest | 
 | 20: | 
 | 	.section __ex_table, "a" | 
 | 	.align 8 | 
 | 	.quad 20b, .Lbad_dest | 
 | 	.previous | 
 | 	.endm | 
 |  | 
 | 	.macro ignore L=.Lignore | 
 | 30: | 
 | 	.section __ex_table, "a" | 
 | 	.align 8 | 
 | 	.quad 30b, \L | 
 | 	.previous | 
 | 	.endm | 
 |  | 
 |  | 
 | ENTRY(csum_partial_copy_generic) | 
 | 	CFI_STARTPROC | 
 | 	cmpl	$3*64, %edx | 
 | 	jle	.Lignore | 
 |  | 
 | .Lignore: | 
 | 	subq  $7*8, %rsp | 
 | 	CFI_ADJUST_CFA_OFFSET 7*8 | 
 | 	movq  %rbx, 2*8(%rsp) | 
 | 	CFI_REL_OFFSET rbx, 2*8 | 
 | 	movq  %r12, 3*8(%rsp) | 
 | 	CFI_REL_OFFSET r12, 3*8 | 
 | 	movq  %r14, 4*8(%rsp) | 
 | 	CFI_REL_OFFSET r14, 4*8 | 
 | 	movq  %r13, 5*8(%rsp) | 
 | 	CFI_REL_OFFSET r13, 5*8 | 
 | 	movq  %rbp, 6*8(%rsp) | 
 | 	CFI_REL_OFFSET rbp, 6*8 | 
 |  | 
 | 	movq  %r8, (%rsp) | 
 | 	movq  %r9, 1*8(%rsp) | 
 |  | 
 | 	movl  %ecx, %eax | 
 | 	movl  %edx, %ecx | 
 |  | 
 | 	xorl  %r9d, %r9d | 
 | 	movq  %rcx, %r12 | 
 |  | 
 | 	shrq  $6, %r12 | 
 | 	jz	.Lhandle_tail       /* < 64 */ | 
 |  | 
 | 	clc | 
 |  | 
 | 	/* main loop. clear in 64 byte blocks */ | 
 | 	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ | 
 | 	/* r11:	temp3, rdx: temp4, r12 loopcnt */ | 
 | 	/* r10:	temp5, rbp: temp6, r14 temp7, r13 temp8 */ | 
 | 	.p2align 4 | 
 | .Lloop: | 
 | 	source | 
 | 	movq  (%rdi), %rbx | 
 | 	source | 
 | 	movq  8(%rdi), %r8 | 
 | 	source | 
 | 	movq  16(%rdi), %r11 | 
 | 	source | 
 | 	movq  24(%rdi), %rdx | 
 |  | 
 | 	source | 
 | 	movq  32(%rdi), %r10 | 
 | 	source | 
 | 	movq  40(%rdi), %rbp | 
 | 	source | 
 | 	movq  48(%rdi), %r14 | 
 | 	source | 
 | 	movq  56(%rdi), %r13 | 
 |  | 
 | 	ignore 2f | 
 | 	prefetcht0 5*64(%rdi) | 
 | 2: | 
 | 	adcq  %rbx, %rax | 
 | 	adcq  %r8, %rax | 
 | 	adcq  %r11, %rax | 
 | 	adcq  %rdx, %rax | 
 | 	adcq  %r10, %rax | 
 | 	adcq  %rbp, %rax | 
 | 	adcq  %r14, %rax | 
 | 	adcq  %r13, %rax | 
 |  | 
 | 	decl %r12d | 
 |  | 
 | 	dest | 
 | 	movq %rbx, (%rsi) | 
 | 	dest | 
 | 	movq %r8, 8(%rsi) | 
 | 	dest | 
 | 	movq %r11, 16(%rsi) | 
 | 	dest | 
 | 	movq %rdx, 24(%rsi) | 
 |  | 
 | 	dest | 
 | 	movq %r10, 32(%rsi) | 
 | 	dest | 
 | 	movq %rbp, 40(%rsi) | 
 | 	dest | 
 | 	movq %r14, 48(%rsi) | 
 | 	dest | 
 | 	movq %r13, 56(%rsi) | 
 |  | 
 | 3: | 
 |  | 
 | 	leaq 64(%rdi), %rdi | 
 | 	leaq 64(%rsi), %rsi | 
 |  | 
 | 	jnz	.Lloop | 
 |  | 
 | 	adcq  %r9, %rax | 
 |  | 
 | 	/* do last up to 56 bytes */ | 
 | .Lhandle_tail: | 
 | 	/* ecx:	count */ | 
 | 	movl %ecx, %r10d | 
 | 	andl $63, %ecx | 
 | 	shrl $3, %ecx | 
 | 	jz	.Lfold | 
 | 	clc | 
 | 	.p2align 4 | 
 | .Lloop_8: | 
 | 	source | 
 | 	movq (%rdi), %rbx | 
 | 	adcq %rbx, %rax | 
 | 	decl %ecx | 
 | 	dest | 
 | 	movq %rbx, (%rsi) | 
 | 	leaq 8(%rsi), %rsi /* preserve carry */ | 
 | 	leaq 8(%rdi), %rdi | 
 | 	jnz	.Lloop_8 | 
 | 	adcq %r9, %rax	/* add in carry */ | 
 |  | 
 | .Lfold: | 
 | 	/* reduce checksum to 32bits */ | 
 | 	movl %eax, %ebx | 
 | 	shrq $32, %rax | 
 | 	addl %ebx, %eax | 
 | 	adcl %r9d, %eax | 
 |  | 
 | 	/* do last up to 6 bytes */ | 
 | .Lhandle_7: | 
 | 	movl %r10d, %ecx | 
 | 	andl $7, %ecx | 
 | 	shrl $1, %ecx | 
 | 	jz   .Lhandle_1 | 
 | 	movl $2, %edx | 
 | 	xorl %ebx, %ebx | 
 | 	clc | 
 | 	.p2align 4 | 
 | .Lloop_1: | 
 | 	source | 
 | 	movw (%rdi), %bx | 
 | 	adcl %ebx, %eax | 
 | 	decl %ecx | 
 | 	dest | 
 | 	movw %bx, (%rsi) | 
 | 	leaq 2(%rdi), %rdi | 
 | 	leaq 2(%rsi), %rsi | 
 | 	jnz .Lloop_1 | 
 | 	adcl %r9d, %eax	/* add in carry */ | 
 |  | 
 | 	/* handle last odd byte */ | 
 | .Lhandle_1: | 
 | 	testl $1, %r10d | 
 | 	jz    .Lende | 
 | 	xorl  %ebx, %ebx | 
 | 	source | 
 | 	movb (%rdi), %bl | 
 | 	dest | 
 | 	movb %bl, (%rsi) | 
 | 	addl %ebx, %eax | 
 | 	adcl %r9d, %eax		/* carry */ | 
 |  | 
 | 	CFI_REMEMBER_STATE | 
 | .Lende: | 
 | 	movq 2*8(%rsp), %rbx | 
 | 	CFI_RESTORE rbx | 
 | 	movq 3*8(%rsp), %r12 | 
 | 	CFI_RESTORE r12 | 
 | 	movq 4*8(%rsp), %r14 | 
 | 	CFI_RESTORE r14 | 
 | 	movq 5*8(%rsp), %r13 | 
 | 	CFI_RESTORE r13 | 
 | 	movq 6*8(%rsp), %rbp | 
 | 	CFI_RESTORE rbp | 
 | 	addq $7*8, %rsp | 
 | 	CFI_ADJUST_CFA_OFFSET -7*8 | 
 | 	ret | 
 | 	CFI_RESTORE_STATE | 
 |  | 
 | 	/* Exception handlers. Very simple, zeroing is done in the wrappers */ | 
 | .Lbad_source: | 
 | 	movq (%rsp), %rax | 
 | 	testq %rax, %rax | 
 | 	jz   .Lende | 
 | 	movl $-EFAULT, (%rax) | 
 | 	jmp  .Lende | 
 |  | 
 | .Lbad_dest: | 
 | 	movq 8(%rsp), %rax | 
 | 	testq %rax, %rax | 
 | 	jz   .Lende | 
 | 	movl $-EFAULT, (%rax) | 
 | 	jmp .Lende | 
 | 	CFI_ENDPROC | 
 | ENDPROC(csum_partial_copy_generic) |