| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* $Id: dtlb_backend.S,v 1.16 2001/10/09 04:02:11 davem Exp $ | 
|  | 2 | * dtlb_backend.S: Back end to DTLB miss replacement strategy. | 
|  | 3 | *                 This is included directly into the trap table. | 
|  | 4 | * | 
|  | 5 | * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com) | 
|  | 6 | * Copyright (C) 1997,1998 Jakub Jelinek   (jj@ultra.linux.cz) | 
|  | 7 | */ | 
|  | 8 |  | 
|  | 9 | #include <asm/pgtable.h> | 
|  | 10 | #include <asm/mmu.h> | 
|  | 11 |  | 
|  | 12 | #if PAGE_SHIFT == 13 | 
|  | 13 | #define SZ_BITS		_PAGE_SZ8K | 
|  | 14 | #elif PAGE_SHIFT == 16 | 
|  | 15 | #define SZ_BITS		_PAGE_SZ64K | 
|  | 16 | #elif PAGE_SHIFT == 19 | 
|  | 17 | #define SZ_BITS		_PAGE_SZ512K | 
|  | 18 | #elif PAGE_SHIFT == 22 | 
| David S. Miller | dcc83a0 | 2005-07-08 13:33:10 -0700 | [diff] [blame] | 19 | #define SZ_BITS		_PAGE_SZ4MB | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 20 | #endif | 
|  | 21 |  | 
|  | 22 | #define VALID_SZ_BITS	(_PAGE_VALID | SZ_BITS) | 
|  | 23 |  | 
|  | 24 | #define VPTE_BITS		(_PAGE_CP | _PAGE_CV | _PAGE_P ) | 
|  | 25 | #define VPTE_SHIFT		(PAGE_SHIFT - 3) | 
|  | 26 |  | 
|  | 27 | /* Ways we can get here: | 
|  | 28 | * | 
|  | 29 | * 1) Nucleus loads and stores to/from PA-->VA direct mappings at tl>1. | 
|  | 30 | * 2) Nucleus loads and stores to/from user/kernel window save areas. | 
|  | 31 | * 3) VPTE misses from dtlb_base and itlb_base. | 
|  | 32 | * | 
|  | 33 | * We need to extract out the PMD and PGDIR indexes from the | 
|  | 34 | * linear virtual page table access address.  The PTE index | 
|  | 35 | * is at the bottom, but we are not concerned with it.  Bits | 
|  | 36 | * 0 to 2 are clear since each PTE is 8 bytes in size.  Each | 
|  | 37 | * PMD and PGDIR entry are 4 bytes in size.   Thus, this | 
|  | 38 | * address looks something like: | 
|  | 39 | * | 
|  | 40 | * |---------------------------------------------------------------| | 
|  | 41 | * |  ...   |    PGDIR index    |    PMD index    | PTE index  |   | | 
|  | 42 | * |---------------------------------------------------------------| | 
|  | 43 | *   63   F   E               D   C             B   A         3 2 0  <- bit nr | 
|  | 44 | * | 
|  | 45 | *  The variable bits above are defined as: | 
|  | 46 | *  A --> 3 + (PAGE_SHIFT - log2(8)) | 
|  | 47 | *    --> 3 + (PAGE_SHIFT - 3) - 1 | 
|  | 48 | *        (ie. this is "bit 3" + PAGE_SIZE - size of PTE entry in bits - 1) | 
|  | 49 | *  B --> A + 1 | 
|  | 50 | *  C --> B + (PAGE_SHIFT - log2(4)) | 
|  | 51 | *    -->  B + (PAGE_SHIFT - 2) - 1 | 
|  | 52 | *        (ie. this is "bit B" + PAGE_SIZE - size of PMD entry in bits - 1) | 
|  | 53 | *  D --> C + 1 | 
|  | 54 | *  E --> D + (PAGE_SHIFT - log2(4)) | 
|  | 55 | *    --> D + (PAGE_SHIFT - 2) - 1 | 
|  | 56 | *        (ie. this is "bit D" + PAGE_SIZE - size of PGDIR entry in bits - 1) | 
|  | 57 | *  F --> E + 1 | 
|  | 58 | * | 
|  | 59 | * (Note how "B" always evalutes to PAGE_SHIFT, all the other constants | 
|  | 60 | *  cancel out.) | 
|  | 61 | * | 
|  | 62 | * For 8K PAGE_SIZE (thus, PAGE_SHIFT of 13) the bit numbers are: | 
|  | 63 | * A --> 12 | 
|  | 64 | * B --> 13 | 
|  | 65 | * C --> 23 | 
|  | 66 | * D --> 24 | 
|  | 67 | * E --> 34 | 
|  | 68 | * F --> 35 | 
|  | 69 | * | 
|  | 70 | * For 64K PAGE_SIZE (thus, PAGE_SHIFT of 16) the bit numbers are: | 
|  | 71 | * A --> 15 | 
|  | 72 | * B --> 16 | 
|  | 73 | * C --> 29 | 
|  | 74 | * D --> 30 | 
|  | 75 | * E --> 43 | 
|  | 76 | * F --> 44 | 
|  | 77 | * | 
|  | 78 | * Because bits both above and below each PGDIR and PMD index need to | 
|  | 79 | * be masked out, and the index can be as long as 14 bits (when using a | 
|  | 80 | * 64K PAGE_SIZE, and thus a PAGE_SHIFT of 16), we need 3 instructions | 
|  | 81 | * to extract each index out. | 
|  | 82 | * | 
|  | 83 | * Shifts do not pair very well on UltraSPARC-I, II, IIi, and IIe, so | 
|  | 84 | * we try to avoid using them for the entire operation.  We could setup | 
|  | 85 | * a mask anywhere from bit 31 down to bit 10 using the sethi instruction. | 
|  | 86 | * | 
|  | 87 | * We need a mask covering bits B --> C and one covering D --> E. | 
|  | 88 | * For 8K PAGE_SIZE these masks are 0x00ffe000 and 0x7ff000000. | 
|  | 89 | * For 64K PAGE_SIZE these masks are 0x3fff0000 and 0xfffc0000000. | 
|  | 90 | * The second in each set cannot be loaded with a single sethi | 
|  | 91 | * instruction, because the upper bits are past bit 32.  We would | 
|  | 92 | * need to use a sethi + a shift. | 
|  | 93 | * | 
|  | 94 | * For the time being, we use 2 shifts and a simple "and" mask. | 
|  | 95 | * We shift left to clear the bits above the index, we shift down | 
|  | 96 | * to clear the bits below the index (sans the log2(4 or 8) bits) | 
|  | 97 | * and a mask to clear the log2(4 or 8) bits.  We need therefore | 
|  | 98 | * define 4 shift counts, all of which are relative to PAGE_SHIFT. | 
|  | 99 | * | 
|  | 100 | * Although unsupportable for other reasons, this does mean that | 
|  | 101 | * 512K and 4MB page sizes would be generaally supported by the | 
|  | 102 | * kernel.  (ELF binaries would break with > 64K PAGE_SIZE since | 
|  | 103 | * the sections are only aligned that strongly). | 
|  | 104 | * | 
|  | 105 | * The operations performed for extraction are thus: | 
|  | 106 | * | 
|  | 107 | *      ((X << FOO_SHIFT_LEFT) >> FOO_SHIFT_RIGHT) & ~0x3 | 
|  | 108 | * | 
|  | 109 | */ | 
|  | 110 |  | 
|  | 111 | #define A (3 + (PAGE_SHIFT - 3) - 1) | 
|  | 112 | #define B (A + 1) | 
|  | 113 | #define C (B + (PAGE_SHIFT - 2) - 1) | 
|  | 114 | #define D (C + 1) | 
|  | 115 | #define E (D + (PAGE_SHIFT - 2) - 1) | 
|  | 116 | #define F (E + 1) | 
|  | 117 |  | 
|  | 118 | #define PMD_SHIFT_LEFT		(64 - D) | 
|  | 119 | #define PMD_SHIFT_RIGHT		(64 - (D - B) - 2) | 
|  | 120 | #define PGDIR_SHIFT_LEFT 	(64 - F) | 
|  | 121 | #define PGDIR_SHIFT_RIGHT	(64 - (F - D) - 2) | 
|  | 122 | #define LOW_MASK_BITS		0x3 | 
|  | 123 |  | 
|  | 124 | /* TLB1 ** ICACHE line 1: tl1 DTLB and quick VPTE miss	*/ | 
|  | 125 | ldxa		[%g1 + %g1] ASI_DMMU, %g4	! Get TAG_ACCESS | 
|  | 126 | add		%g3, %g3, %g5			! Compute VPTE base | 
|  | 127 | cmp		%g4, %g5			! VPTE miss? | 
|  | 128 | bgeu,pt		%xcc, 1f			! Continue here | 
|  | 129 | andcc		%g4, TAG_CONTEXT_BITS, %g5	! tl0 miss Nucleus test | 
|  | 130 | ba,a,pt		%xcc, from_tl1_trap		! Fall to tl0 miss | 
|  | 131 | 1:	sllx		%g6, VPTE_SHIFT, %g4		! Position TAG_ACCESS | 
|  | 132 | or		%g4, %g5, %g4			! Prepare TAG_ACCESS | 
|  | 133 |  | 
|  | 134 | /* TLB1 ** ICACHE line 2: Quick VPTE miss	  	*/ | 
|  | 135 | mov		TSB_REG, %g1			! Grab TSB reg | 
|  | 136 | ldxa		[%g1] ASI_DMMU, %g5		! Doing PGD caching? | 
|  | 137 | sllx		%g6, PMD_SHIFT_LEFT, %g1	! Position PMD offset | 
|  | 138 | be,pn		%xcc, sparc64_vpte_nucleus	! Is it from Nucleus? | 
|  | 139 | srlx		%g1, PMD_SHIFT_RIGHT, %g1	! Mask PMD offset bits | 
|  | 140 | brnz,pt		%g5, sparc64_vpte_continue	! Yep, go like smoke | 
|  | 141 | andn		%g1, LOW_MASK_BITS, %g1		! Final PMD mask | 
|  | 142 | sllx		%g6, PGDIR_SHIFT_LEFT, %g5	! Position PGD offset | 
|  | 143 |  | 
|  | 144 | /* TLB1 ** ICACHE line 3: Quick VPTE miss	  	*/ | 
|  | 145 | srlx		%g5, PGDIR_SHIFT_RIGHT, %g5	! Mask PGD offset bits | 
|  | 146 | andn		%g5, LOW_MASK_BITS, %g5		! Final PGD mask | 
|  | 147 | lduwa		[%g7 + %g5] ASI_PHYS_USE_EC, %g5! Load PGD | 
|  | 148 | brz,pn		%g5, vpte_noent			! Valid? | 
|  | 149 | sparc64_kpte_continue: | 
|  | 150 | sllx		%g5, 11, %g5			! Shift into place | 
|  | 151 | sparc64_vpte_continue: | 
|  | 152 | lduwa		[%g5 + %g1] ASI_PHYS_USE_EC, %g5! Load PMD | 
|  | 153 | sllx		%g5, 11, %g5			! Shift into place | 
|  | 154 | brz,pn		%g5, vpte_noent			! Valid? | 
|  | 155 |  | 
|  | 156 | /* TLB1 ** ICACHE line 4: Quick VPTE miss	  	*/ | 
|  | 157 | mov		(VALID_SZ_BITS >> 61), %g1	! upper vpte into %g1 | 
|  | 158 | sllx		%g1, 61, %g1			! finish calc | 
|  | 159 | or		%g5, VPTE_BITS, %g5		! Prepare VPTE data | 
|  | 160 | or		%g5, %g1, %g5			! ... | 
|  | 161 | mov		TLB_SFSR, %g1			! Restore %g1 value | 
|  | 162 | stxa		%g5, [%g0] ASI_DTLB_DATA_IN	! Load VPTE into TLB | 
|  | 163 | stxa		%g4, [%g1 + %g1] ASI_DMMU	! Restore previous TAG_ACCESS | 
|  | 164 | retry						! Load PTE once again | 
|  | 165 |  | 
|  | 166 | #undef SZ_BITS | 
|  | 167 | #undef VALID_SZ_BITS | 
|  | 168 | #undef VPTE_SHIFT | 
|  | 169 | #undef VPTE_BITS | 
|  | 170 | #undef A | 
|  | 171 | #undef B | 
|  | 172 | #undef C | 
|  | 173 | #undef D | 
|  | 174 | #undef E | 
|  | 175 | #undef F | 
|  | 176 | #undef PMD_SHIFT_LEFT | 
|  | 177 | #undef PMD_SHIFT_RIGHT | 
|  | 178 | #undef PGDIR_SHIFT_LEFT | 
|  | 179 | #undef PGDIR_SHIFT_RIGHT | 
|  | 180 | #undef LOW_MASK_BITS | 
|  | 181 |  |