| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * Copyright (C) 1999-2002 Hewlett-Packard Co | 
 | 3 |  *	Stephane Eranian <eranian@hpl.hp.com> | 
 | 4 |  *	David Mosberger-Tang <davidm@hpl.hp.com> | 
 | 5 |  * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com> | 
 | 6 |  * | 
 | 7 |  * 1/06/01 davidm	Tuned for Itanium. | 
 | 8 |  * 2/12/02 kchen	Tuned for both Itanium and McKinley | 
 | 9 |  * 3/08/02 davidm	Some more tweaking | 
 | 10 |  */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 11 |  | 
 | 12 | #include <asm/asmmacro.h> | 
 | 13 | #include <asm/page.h> | 
 | 14 |  | 
 | 15 | #ifdef CONFIG_ITANIUM | 
 | 16 | # define L3_LINE_SIZE	64	// Itanium L3 line size | 
 | 17 | # define PREFETCH_LINES	9	// magic number | 
 | 18 | #else | 
 | 19 | # define L3_LINE_SIZE	128	// McKinley L3 line size | 
 | 20 | # define PREFETCH_LINES	12	// magic number | 
 | 21 | #endif | 
 | 22 |  | 
 | 23 | #define saved_lc	r2 | 
 | 24 | #define dst_fetch	r3 | 
 | 25 | #define dst1		r8 | 
 | 26 | #define dst2		r9 | 
 | 27 | #define dst3		r10 | 
 | 28 | #define dst4		r11 | 
 | 29 |  | 
 | 30 | #define dst_last	r31 | 
 | 31 |  | 
 | 32 | GLOBAL_ENTRY(clear_page) | 
 | 33 | 	.prologue | 
 | 34 | 	.regstk 1,0,0,0 | 
 | 35 | 	mov r16 = PAGE_SIZE/L3_LINE_SIZE-1	// main loop count, -1=repeat/until | 
 | 36 | 	.save ar.lc, saved_lc | 
 | 37 | 	mov saved_lc = ar.lc | 
 | 38 |  | 
 | 39 | 	.body | 
 | 40 | 	mov ar.lc = (PREFETCH_LINES - 1) | 
 | 41 | 	mov dst_fetch = in0 | 
 | 42 | 	adds dst1 = 16, in0 | 
 | 43 | 	adds dst2 = 32, in0 | 
 | 44 | 	;; | 
 | 45 | .fetch:	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE | 
 | 46 | 	adds dst3 = 48, in0		// executing this multiple times is harmless | 
 | 47 | 	br.cloop.sptk.few .fetch | 
 | 48 | 	;; | 
 | 49 | 	addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch | 
 | 50 | 	mov ar.lc = r16			// one L3 line per iteration | 
 | 51 | 	adds dst4 = 64, in0 | 
 | 52 | 	;; | 
 | 53 | #ifdef CONFIG_ITANIUM | 
 | 54 | 	// Optimized for Itanium | 
 | 55 | 1:	stf.spill.nta [dst1] = f0, 64 | 
 | 56 | 	stf.spill.nta [dst2] = f0, 64 | 
 | 57 | 	cmp.lt p8,p0=dst_fetch, dst_last | 
 | 58 | 	;; | 
 | 59 | #else | 
 | 60 | 	// Optimized for McKinley | 
 | 61 | 1:	stf.spill.nta [dst1] = f0, 64 | 
 | 62 | 	stf.spill.nta [dst2] = f0, 64 | 
 | 63 | 	stf.spill.nta [dst3] = f0, 64 | 
 | 64 | 	stf.spill.nta [dst4] = f0, 128 | 
 | 65 | 	cmp.lt p8,p0=dst_fetch, dst_last | 
 | 66 | 	;; | 
 | 67 | 	stf.spill.nta [dst1] = f0, 64 | 
 | 68 | 	stf.spill.nta [dst2] = f0, 64 | 
 | 69 | #endif | 
 | 70 | 	stf.spill.nta [dst3] = f0, 64 | 
 | 71 | (p8)	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE | 
 | 72 | 	br.cloop.sptk.few 1b | 
 | 73 | 	;; | 
 | 74 | 	mov ar.lc = saved_lc		// restore lc | 
 | 75 | 	br.ret.sptk.many rp | 
 | 76 | END(clear_page) |