|  | /* Copyright 2002 Andi Kleen */ | 
|  |  | 
|  | #include <linux/linkage.h> | 
|  | #include <asm/dwarf2.h> | 
|  | #include <asm/cpufeature.h> | 
|  |  | 
|  | /* | 
|  | * memcpy - Copy a memory block. | 
|  | * | 
|  | * Input: | 
|  | * rdi destination | 
|  | * rsi source | 
|  | * rdx count | 
|  | * | 
|  | * Output: | 
|  | * rax original destination | 
|  | */ | 
|  |  | 
|  | ALIGN | 
|  | memcpy_c: | 
|  | CFI_STARTPROC | 
|  | movq %rdi,%rax | 
|  | movl %edx,%ecx | 
|  | shrl $3,%ecx | 
|  | andl $7,%edx | 
|  | rep movsq | 
|  | movl %edx,%ecx | 
|  | rep movsb | 
|  | ret | 
|  | CFI_ENDPROC | 
|  | ENDPROC(memcpy_c) | 
|  |  | 
|  | ENTRY(__memcpy) | 
|  | ENTRY(memcpy) | 
|  | CFI_STARTPROC | 
|  | pushq %rbx | 
|  | CFI_ADJUST_CFA_OFFSET 8 | 
|  | CFI_REL_OFFSET rbx, 0 | 
|  | movq %rdi,%rax | 
|  |  | 
|  | movl %edx,%ecx | 
|  | shrl $6,%ecx | 
|  | jz .Lhandle_tail | 
|  |  | 
|  | .p2align 4 | 
|  | .Lloop_64: | 
|  | decl %ecx | 
|  |  | 
|  | movq (%rsi),%r11 | 
|  | movq 8(%rsi),%r8 | 
|  |  | 
|  | movq %r11,(%rdi) | 
|  | movq %r8,1*8(%rdi) | 
|  |  | 
|  | movq 2*8(%rsi),%r9 | 
|  | movq 3*8(%rsi),%r10 | 
|  |  | 
|  | movq %r9,2*8(%rdi) | 
|  | movq %r10,3*8(%rdi) | 
|  |  | 
|  | movq 4*8(%rsi),%r11 | 
|  | movq 5*8(%rsi),%r8 | 
|  |  | 
|  | movq %r11,4*8(%rdi) | 
|  | movq %r8,5*8(%rdi) | 
|  |  | 
|  | movq 6*8(%rsi),%r9 | 
|  | movq 7*8(%rsi),%r10 | 
|  |  | 
|  | movq %r9,6*8(%rdi) | 
|  | movq %r10,7*8(%rdi) | 
|  |  | 
|  | leaq 64(%rsi),%rsi | 
|  | leaq 64(%rdi),%rdi | 
|  | jnz  .Lloop_64 | 
|  |  | 
|  | .Lhandle_tail: | 
|  | movl %edx,%ecx | 
|  | andl $63,%ecx | 
|  | shrl $3,%ecx | 
|  | jz   .Lhandle_7 | 
|  | .p2align 4 | 
|  | .Lloop_8: | 
|  | decl %ecx | 
|  | movq (%rsi),%r8 | 
|  | movq %r8,(%rdi) | 
|  | leaq 8(%rdi),%rdi | 
|  | leaq 8(%rsi),%rsi | 
|  | jnz  .Lloop_8 | 
|  |  | 
|  | .Lhandle_7: | 
|  | movl %edx,%ecx | 
|  | andl $7,%ecx | 
|  | jz .Lende | 
|  | .p2align 4 | 
|  | .Lloop_1: | 
|  | movb (%rsi),%r8b | 
|  | movb %r8b,(%rdi) | 
|  | incq %rdi | 
|  | incq %rsi | 
|  | decl %ecx | 
|  | jnz .Lloop_1 | 
|  |  | 
|  | .Lende: | 
|  | popq %rbx | 
|  | CFI_ADJUST_CFA_OFFSET -8 | 
|  | CFI_RESTORE rbx | 
|  | ret | 
|  | .Lfinal: | 
|  | CFI_ENDPROC | 
|  | ENDPROC(memcpy) | 
|  | ENDPROC(__memcpy) | 
|  |  | 
|  | /* Some CPUs run faster using the string copy instructions. | 
|  | It is also a lot simpler. Use this when possible */ | 
|  |  | 
|  | .section .altinstr_replacement,"ax" | 
|  | 1:	.byte 0xeb				/* jmp <disp8> */ | 
|  | .byte (memcpy_c - memcpy) - (2f - 1b)	/* offset */ | 
|  | 2: | 
|  | .previous | 
|  | .section .altinstructions,"a" | 
|  | .align 8 | 
|  | .quad memcpy | 
|  | .quad 1b | 
|  | .byte X86_FEATURE_REP_GOOD | 
|  | /* Replace only beginning, memcpy is used to apply alternatives, so it | 
|  | * is silly to overwrite itself with nops - reboot is only outcome... */ | 
|  | .byte 2b - 1b | 
|  | .byte 2b - 1b | 
|  | .previous |