| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright (C) 1999-2002 Hewlett-Packard Co | 
|  | 3 | *	Stephane Eranian <eranian@hpl.hp.com> | 
|  | 4 | *	David Mosberger-Tang <davidm@hpl.hp.com> | 
|  | 5 | * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com> | 
|  | 6 | * | 
|  | 7 | * 1/06/01 davidm	Tuned for Itanium. | 
|  | 8 | * 2/12/02 kchen	Tuned for both Itanium and McKinley | 
|  | 9 | * 3/08/02 davidm	Some more tweaking | 
|  | 10 | */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 11 |  | 
|  | 12 | #include <asm/asmmacro.h> | 
|  | 13 | #include <asm/page.h> | 
|  | 14 |  | 
|  | 15 | #ifdef CONFIG_ITANIUM | 
|  | 16 | # define L3_LINE_SIZE	64	// Itanium L3 line size | 
|  | 17 | # define PREFETCH_LINES	9	// magic number | 
|  | 18 | #else | 
|  | 19 | # define L3_LINE_SIZE	128	// McKinley L3 line size | 
|  | 20 | # define PREFETCH_LINES	12	// magic number | 
|  | 21 | #endif | 
|  | 22 |  | 
|  | 23 | #define saved_lc	r2 | 
|  | 24 | #define dst_fetch	r3 | 
|  | 25 | #define dst1		r8 | 
|  | 26 | #define dst2		r9 | 
|  | 27 | #define dst3		r10 | 
|  | 28 | #define dst4		r11 | 
|  | 29 |  | 
|  | 30 | #define dst_last	r31 | 
|  | 31 |  | 
|  | 32 | GLOBAL_ENTRY(clear_page) | 
|  | 33 | .prologue | 
|  | 34 | .regstk 1,0,0,0 | 
|  | 35 | mov r16 = PAGE_SIZE/L3_LINE_SIZE-1	// main loop count, -1=repeat/until | 
|  | 36 | .save ar.lc, saved_lc | 
|  | 37 | mov saved_lc = ar.lc | 
|  | 38 |  | 
|  | 39 | .body | 
|  | 40 | mov ar.lc = (PREFETCH_LINES - 1) | 
|  | 41 | mov dst_fetch = in0 | 
|  | 42 | adds dst1 = 16, in0 | 
|  | 43 | adds dst2 = 32, in0 | 
|  | 44 | ;; | 
|  | 45 | .fetch:	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE | 
|  | 46 | adds dst3 = 48, in0		// executing this multiple times is harmless | 
|  | 47 | br.cloop.sptk.few .fetch | 
|  | 48 | ;; | 
|  | 49 | addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch | 
|  | 50 | mov ar.lc = r16			// one L3 line per iteration | 
|  | 51 | adds dst4 = 64, in0 | 
|  | 52 | ;; | 
|  | 53 | #ifdef CONFIG_ITANIUM | 
|  | 54 | // Optimized for Itanium | 
|  | 55 | 1:	stf.spill.nta [dst1] = f0, 64 | 
|  | 56 | stf.spill.nta [dst2] = f0, 64 | 
|  | 57 | cmp.lt p8,p0=dst_fetch, dst_last | 
|  | 58 | ;; | 
|  | 59 | #else | 
|  | 60 | // Optimized for McKinley | 
|  | 61 | 1:	stf.spill.nta [dst1] = f0, 64 | 
|  | 62 | stf.spill.nta [dst2] = f0, 64 | 
|  | 63 | stf.spill.nta [dst3] = f0, 64 | 
|  | 64 | stf.spill.nta [dst4] = f0, 128 | 
|  | 65 | cmp.lt p8,p0=dst_fetch, dst_last | 
|  | 66 | ;; | 
|  | 67 | stf.spill.nta [dst1] = f0, 64 | 
|  | 68 | stf.spill.nta [dst2] = f0, 64 | 
|  | 69 | #endif | 
|  | 70 | stf.spill.nta [dst3] = f0, 64 | 
|  | 71 | (p8)	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE | 
|  | 72 | br.cloop.sptk.few 1b | 
|  | 73 | ;; | 
|  | 74 | mov ar.lc = saved_lc		// restore lc | 
|  | 75 | br.ret.sptk.many rp | 
|  | 76 | END(clear_page) |