Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!
diff --git a/arch/x86_64/lib/csum-copy.S b/arch/x86_64/lib/csum-copy.S
new file mode 100644
index 0000000..01808ec
--- /dev/null
+++ b/arch/x86_64/lib/csum-copy.S
@@ -0,0 +1,233 @@
+/*
+ * Copyright 2002,2003 Andi Kleen, SuSE Labs.
+ *	
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details. No warranty for anything given at all.
+ */
+ 	#include <linux/linkage.h>
+	#include <asm/errno.h>
+
+/*
+ * Checksum copy with exception handling.
+ * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the 
+ * destination is zeroed.
+ * 
+ * Input
+ * rdi  source
+ * rsi  destination
+ * edx  len (32bit)
+ * ecx  sum (32bit) 
+ * r8   src_err_ptr (int)
+ * r9   dst_err_ptr (int)
+ *
+ * Output
+ * eax  64bit sum. undefined in case of exception.
+ * 
+ * Wrappers need to take care of valid exception sum and zeroing.		 
+ * They also should align source or destination to 8 bytes.
+ */
+
+	.macro source
+10:
+	.section __ex_table,"a"
+	.align 8
+	.quad 10b,.Lbad_source
+	.previous
+	.endm
+		
+	.macro dest
+20:
+	.section __ex_table,"a"
+	.align 8
+	.quad 20b,.Lbad_dest
+	.previous
+	.endm
+			
+	.macro ignore L=.Lignore
+30:
+	.section __ex_table,"a"
+	.align 8
+	.quad 30b,\L
+	.previous
+	.endm
+	
+				
+	.globl csum_partial_copy_generic
+	.p2align 4
+csum_partial_copy_generic:
+	cmpl	 $3*64,%edx
+	jle	 .Lignore
+
+.Lignore:		
+	subq  $7*8,%rsp
+	movq  %rbx,2*8(%rsp)
+	movq  %r12,3*8(%rsp)
+	movq  %r14,4*8(%rsp)
+	movq  %r13,5*8(%rsp)
+	movq  %rbp,6*8(%rsp)
+
+	movq  %r8,(%rsp)
+	movq  %r9,1*8(%rsp)
+	
+	movl  %ecx,%eax
+	movl  %edx,%ecx
+
+	xorl  %r9d,%r9d
+	movq  %rcx,%r12
+
+	shrq  $6,%r12
+	jz    .Lhandle_tail       /* < 64 */
+
+	clc
+	
+	/* main loop. clear in 64 byte blocks */
+	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
+	/* r11:	temp3, rdx: temp4, r12 loopcnt */
+	/* r10:	temp5, rbp: temp6, r14 temp7, r13 temp8 */
+	.p2align 4
+.Lloop:
+	source
+	movq  (%rdi),%rbx
+	source
+	movq  8(%rdi),%r8
+	source
+	movq  16(%rdi),%r11
+	source
+	movq  24(%rdi),%rdx
+
+	source
+	movq  32(%rdi),%r10
+	source
+	movq  40(%rdi),%rbp
+	source
+	movq  48(%rdi),%r14
+	source
+	movq  56(%rdi),%r13
+		
+	ignore 2f
+	prefetcht0 5*64(%rdi)
+2:							
+	adcq  %rbx,%rax
+	adcq  %r8,%rax
+	adcq  %r11,%rax
+	adcq  %rdx,%rax
+	adcq  %r10,%rax
+	adcq  %rbp,%rax
+	adcq  %r14,%rax
+	adcq  %r13,%rax
+
+	decl %r12d
+	
+	dest
+	movq %rbx,(%rsi)
+	dest
+	movq %r8,8(%rsi)
+	dest
+	movq %r11,16(%rsi)
+	dest
+	movq %rdx,24(%rsi)
+
+	dest
+	movq %r10,32(%rsi)
+	dest
+	movq %rbp,40(%rsi)
+	dest
+	movq %r14,48(%rsi)
+	dest
+	movq %r13,56(%rsi)
+	
+3:
+	
+	leaq 64(%rdi),%rdi
+	leaq 64(%rsi),%rsi
+
+	jnz   .Lloop
+
+	adcq  %r9,%rax
+
+	/* do last upto 56 bytes */
+.Lhandle_tail:
+	/* ecx:	count */
+	movl %ecx,%r10d
+	andl $63,%ecx
+	shrl $3,%ecx
+	jz 	 .Lfold
+	clc
+	.p2align 4
+.Lloop_8:	
+	source
+	movq (%rdi),%rbx
+	adcq %rbx,%rax
+	decl %ecx
+	dest
+	movq %rbx,(%rsi)
+	leaq 8(%rsi),%rsi /* preserve carry */
+	leaq 8(%rdi),%rdi
+	jnz	.Lloop_8
+	adcq %r9,%rax	/* add in carry */
+
+.Lfold:
+	/* reduce checksum to 32bits */
+	movl %eax,%ebx
+	shrq $32,%rax
+	addl %ebx,%eax
+	adcl %r9d,%eax
+
+	/* do last upto 6 bytes */	
+.Lhandle_7:
+	movl %r10d,%ecx
+	andl $7,%ecx
+	shrl $1,%ecx
+	jz   .Lhandle_1
+	movl $2,%edx
+	xorl %ebx,%ebx
+	clc  
+	.p2align 4
+.Lloop_1:	
+	source
+	movw (%rdi),%bx
+	adcl %ebx,%eax
+	dest
+	decl %ecx
+	movw %bx,(%rsi)
+	leaq 2(%rdi),%rdi
+	leaq 2(%rsi),%rsi
+	jnz .Lloop_1
+	adcl %r9d,%eax	/* add in carry */
+	
+	/* handle last odd byte */
+.Lhandle_1:
+	testl $1,%r10d
+	jz    .Lende
+	xorl  %ebx,%ebx
+	source
+	movb (%rdi),%bl
+	dest
+	movb %bl,(%rsi)
+	addl %ebx,%eax
+	adcl %r9d,%eax		/* carry */
+			
+.Lende:
+	movq 2*8(%rsp),%rbx
+	movq 3*8(%rsp),%r12
+	movq 4*8(%rsp),%r14
+	movq 5*8(%rsp),%r13
+	movq 6*8(%rsp),%rbp
+	addq $7*8,%rsp
+	ret
+
+	/* Exception handlers. Very simple, zeroing is done in the wrappers */
+.Lbad_source:
+	movq (%rsp),%rax
+	testq %rax,%rax
+	jz   .Lende
+	movl $-EFAULT,(%rax)
+	jmp  .Lende
+	
+.Lbad_dest:
+	movq 8(%rsp),%rax
+	testq %rax,%rax
+	jz   .Lende	
+	movl $-EFAULT,(%rax)
+	jmp .Lende