| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | * This file is subject to the terms and conditions of the GNU General Public | 
|  | 3 | * License.  See the file "COPYING" in the main directory of this archive | 
|  | 4 | * for more details. | 
|  | 5 | * | 
|  | 6 | * arch/sh64/mm/cache.c | 
|  | 7 | * | 
|  | 8 | * Original version Copyright (C) 2000, 2001  Paolo Alberelli | 
|  | 9 | * Second version Copyright (C) benedict.gaster@superh.com 2002 | 
|  | 10 | * Third version Copyright Richard.Curnow@superh.com 2003 | 
|  | 11 | * Hacks to third version Copyright (C) 2003 Paul Mundt | 
|  | 12 | */ | 
|  | 13 |  | 
|  | 14 | /****************************************************************************/ | 
|  | 15 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 16 | #include <linux/init.h> | 
|  | 17 | #include <linux/mman.h> | 
|  | 18 | #include <linux/mm.h> | 
|  | 19 | #include <linux/threads.h> | 
|  | 20 | #include <asm/page.h> | 
|  | 21 | #include <asm/pgtable.h> | 
|  | 22 | #include <asm/processor.h> | 
|  | 23 | #include <asm/cache.h> | 
|  | 24 | #include <asm/tlb.h> | 
|  | 25 | #include <asm/io.h> | 
|  | 26 | #include <asm/uaccess.h> | 
|  | 27 | #include <asm/mmu_context.h> | 
|  | 28 | #include <asm/pgalloc.h> /* for flush_itlb_range */ | 
|  | 29 |  | 
|  | 30 | #include <linux/proc_fs.h> | 
|  | 31 |  | 
|  | 32 | /* This function is in entry.S */ | 
|  | 33 | extern unsigned long switch_and_save_asid(unsigned long new_asid); | 
|  | 34 |  | 
|  | 35 | /* Wired TLB entry for the D-cache */ | 
|  | 36 | static unsigned long long dtlb_cache_slot; | 
|  | 37 |  | 
|  | 38 | /** | 
|  | 39 | * sh64_cache_init() | 
|  | 40 | * | 
|  | 41 | * This is pretty much just a straightforward clone of the SH | 
|  | 42 | * detect_cpu_and_cache_system(). | 
|  | 43 | * | 
|  | 44 | * This function is responsible for setting up all of the cache | 
|  | 45 | * info dynamically as well as taking care of CPU probing and | 
|  | 46 | * setting up the relevant subtype data. | 
|  | 47 | * | 
|  | 48 | * FIXME: For the time being, we only really support the SH5-101 | 
|  | 49 | * out of the box, and don't support dynamic probing for things | 
|  | 50 | * like the SH5-103 or even cut2 of the SH5-101. Implement this | 
|  | 51 | * later! | 
|  | 52 | */ | 
|  | 53 | int __init sh64_cache_init(void) | 
|  | 54 | { | 
|  | 55 | /* | 
|  | 56 | * First, setup some sane values for the I-cache. | 
|  | 57 | */ | 
|  | 58 | cpu_data->icache.ways		= 4; | 
|  | 59 | cpu_data->icache.sets		= 256; | 
|  | 60 | cpu_data->icache.linesz		= L1_CACHE_BYTES; | 
|  | 61 |  | 
|  | 62 | /* | 
|  | 63 | * FIXME: This can probably be cleaned up a bit as well.. for example, | 
|  | 64 | * do we really need the way shift _and_ the way_step_shift ?? Judging | 
|  | 65 | * by the existing code, I would guess no.. is there any valid reason | 
|  | 66 | * why we need to be tracking this around? | 
|  | 67 | */ | 
|  | 68 | cpu_data->icache.way_shift	= 13; | 
|  | 69 | cpu_data->icache.entry_shift	= 5; | 
|  | 70 | cpu_data->icache.set_shift	= 4; | 
|  | 71 | cpu_data->icache.way_step_shift	= 16; | 
|  | 72 | cpu_data->icache.asid_shift	= 2; | 
|  | 73 |  | 
|  | 74 | /* | 
|  | 75 | * way offset = cache size / associativity, so just don't factor in | 
|  | 76 | * associativity in the first place.. | 
|  | 77 | */ | 
|  | 78 | cpu_data->icache.way_ofs	= cpu_data->icache.sets * | 
|  | 79 | cpu_data->icache.linesz; | 
|  | 80 |  | 
|  | 81 | cpu_data->icache.asid_mask	= 0x3fc; | 
|  | 82 | cpu_data->icache.idx_mask	= 0x1fe0; | 
|  | 83 | cpu_data->icache.epn_mask	= 0xffffe000; | 
|  | 84 | cpu_data->icache.flags		= 0; | 
|  | 85 |  | 
|  | 86 | /* | 
|  | 87 | * Next, setup some sane values for the D-cache. | 
|  | 88 | * | 
|  | 89 | * On the SH5, these are pretty consistent with the I-cache settings, | 
|  | 90 | * so we just copy over the existing definitions.. these can be fixed | 
|  | 91 | * up later, especially if we add runtime CPU probing. | 
|  | 92 | * | 
|  | 93 | * Though in the meantime it saves us from having to duplicate all of | 
|  | 94 | * the above definitions.. | 
|  | 95 | */ | 
|  | 96 | cpu_data->dcache		= cpu_data->icache; | 
|  | 97 |  | 
|  | 98 | /* | 
|  | 99 | * Setup any cache-related flags here | 
|  | 100 | */ | 
|  | 101 | #if defined(CONFIG_DCACHE_WRITE_THROUGH) | 
|  | 102 | set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)); | 
|  | 103 | #elif defined(CONFIG_DCACHE_WRITE_BACK) | 
|  | 104 | set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags)); | 
|  | 105 | #endif | 
|  | 106 |  | 
|  | 107 | /* | 
|  | 108 | * We also need to reserve a slot for the D-cache in the DTLB, so we | 
|  | 109 | * do this now .. | 
|  | 110 | */ | 
|  | 111 | dtlb_cache_slot			= sh64_get_wired_dtlb_entry(); | 
|  | 112 |  | 
|  | 113 | return 0; | 
|  | 114 | } | 
|  | 115 |  | 
|  | 116 | #ifdef CONFIG_DCACHE_DISABLED | 
|  | 117 | #define sh64_dcache_purge_all()					do { } while (0) | 
|  | 118 | #define sh64_dcache_purge_coloured_phy_page(paddr, eaddr)	do { } while (0) | 
|  | 119 | #define sh64_dcache_purge_user_range(mm, start, end)		do { } while (0) | 
|  | 120 | #define sh64_dcache_purge_phy_page(paddr)			do { } while (0) | 
|  | 121 | #define sh64_dcache_purge_virt_page(mm, eaddr)			do { } while (0) | 
|  | 122 | #define sh64_dcache_purge_kernel_range(start, end)		do { } while (0) | 
|  | 123 | #define sh64_dcache_wback_current_user_range(start, end)	do { } while (0) | 
|  | 124 | #endif | 
|  | 125 |  | 
|  | 126 | /*##########################################################################*/ | 
|  | 127 |  | 
|  | 128 | /* From here onwards, a rewrite of the implementation, | 
|  | 129 | by Richard.Curnow@superh.com. | 
|  | 130 |  | 
|  | 131 | The major changes in this compared to the old version are; | 
|  | 132 | 1. use more selective purging through OCBP instead of using ALLOCO to purge | 
|  | 133 | by natural replacement.  This avoids purging out unrelated cache lines | 
|  | 134 | that happen to be in the same set. | 
|  | 135 | 2. exploit the APIs copy_user_page and clear_user_page better | 
|  | 136 | 3. be more selective about I-cache purging, in particular use invalidate_all | 
|  | 137 | more sparingly. | 
|  | 138 |  | 
|  | 139 | */ | 
|  | 140 |  | 
|  | 141 | /*########################################################################## | 
|  | 142 | SUPPORT FUNCTIONS | 
|  | 143 | ##########################################################################*/ | 
|  | 144 |  | 
|  | 145 | /****************************************************************************/ | 
|  | 146 | /* The following group of functions deal with mapping and unmapping a temporary | 
|  | 147 | page into the DTLB slot that have been set aside for our exclusive use. */ | 
|  | 148 | /* In order to accomplish this, we use the generic interface for adding and | 
|  | 149 | removing a wired slot entry as defined in arch/sh64/mm/tlb.c */ | 
|  | 150 | /****************************************************************************/ | 
|  | 151 |  | 
|  | 152 | static unsigned long slot_own_flags; | 
|  | 153 |  | 
|  | 154 | static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr) | 
|  | 155 | { | 
|  | 156 | local_irq_save(slot_own_flags); | 
|  | 157 | sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr); | 
|  | 158 | } | 
|  | 159 |  | 
|  | 160 | static inline void sh64_teardown_dtlb_cache_slot(void) | 
|  | 161 | { | 
|  | 162 | sh64_teardown_tlb_slot(dtlb_cache_slot); | 
|  | 163 | local_irq_restore(slot_own_flags); | 
|  | 164 | } | 
|  | 165 |  | 
|  | 166 | /****************************************************************************/ | 
|  | 167 |  | 
|  | 168 | #ifndef CONFIG_ICACHE_DISABLED | 
|  | 169 |  | 
|  | 170 | static void __inline__ sh64_icache_inv_all(void) | 
|  | 171 | { | 
|  | 172 | unsigned long long addr, flag, data; | 
|  | 173 | unsigned int flags; | 
|  | 174 |  | 
|  | 175 | addr=ICCR0; | 
|  | 176 | flag=ICCR0_ICI; | 
|  | 177 | data=0; | 
|  | 178 |  | 
|  | 179 | /* Make this a critical section for safety (probably not strictly necessary.) */ | 
|  | 180 | local_irq_save(flags); | 
|  | 181 |  | 
|  | 182 | /* Without %1 it gets unexplicably wrong */ | 
|  | 183 | asm volatile("getcfg	%3, 0, %0\n\t" | 
|  | 184 | "or	%0, %2, %0\n\t" | 
|  | 185 | "putcfg	%3, 0, %0\n\t" | 
|  | 186 | "synci" | 
|  | 187 | : "=&r" (data) | 
|  | 188 | : "0" (data), "r" (flag), "r" (addr)); | 
|  | 189 |  | 
|  | 190 | local_irq_restore(flags); | 
|  | 191 | } | 
|  | 192 |  | 
|  | 193 | static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end) | 
|  | 194 | { | 
|  | 195 | /* Invalidate range of addresses [start,end] from the I-cache, where | 
|  | 196 | * the addresses lie in the kernel superpage. */ | 
|  | 197 |  | 
|  | 198 | unsigned long long ullend, addr, aligned_start; | 
|  | 199 | #if (NEFF == 32) | 
|  | 200 | aligned_start = (unsigned long long)(signed long long)(signed long) start; | 
|  | 201 | #else | 
|  | 202 | #error "NEFF != 32" | 
|  | 203 | #endif | 
|  | 204 | aligned_start &= L1_CACHE_ALIGN_MASK; | 
|  | 205 | addr = aligned_start; | 
|  | 206 | #if (NEFF == 32) | 
|  | 207 | ullend = (unsigned long long) (signed long long) (signed long) end; | 
|  | 208 | #else | 
|  | 209 | #error "NEFF != 32" | 
|  | 210 | #endif | 
|  | 211 | while (addr <= ullend) { | 
|  | 212 | asm __volatile__ ("icbi %0, 0" : : "r" (addr)); | 
|  | 213 | addr += L1_CACHE_BYTES; | 
|  | 214 | } | 
|  | 215 | } | 
|  | 216 |  | 
|  | 217 | static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr) | 
|  | 218 | { | 
|  | 219 | /* If we get called, we know that vma->vm_flags contains VM_EXEC. | 
|  | 220 | Also, eaddr is page-aligned. */ | 
|  | 221 |  | 
|  | 222 | unsigned long long addr, end_addr; | 
|  | 223 | unsigned long flags = 0; | 
|  | 224 | unsigned long running_asid, vma_asid; | 
|  | 225 | addr = eaddr; | 
|  | 226 | end_addr = addr + PAGE_SIZE; | 
|  | 227 |  | 
|  | 228 | /* Check whether we can use the current ASID for the I-cache | 
|  | 229 | invalidation.  For example, if we're called via | 
|  | 230 | access_process_vm->flush_cache_page->here, (e.g. when reading from | 
|  | 231 | /proc), 'running_asid' will be that of the reader, not of the | 
|  | 232 | victim. | 
|  | 233 |  | 
|  | 234 | Also, note the risk that we might get pre-empted between the ASID | 
|  | 235 | compare and blocking IRQs, and before we regain control, the | 
|  | 236 | pid->ASID mapping changes.  However, the whole cache will get | 
|  | 237 | invalidated when the mapping is renewed, so the worst that can | 
|  | 238 | happen is that the loop below ends up invalidating somebody else's | 
|  | 239 | cache entries. | 
|  | 240 | */ | 
|  | 241 |  | 
|  | 242 | running_asid = get_asid(); | 
|  | 243 | vma_asid = (vma->vm_mm->context & MMU_CONTEXT_ASID_MASK); | 
|  | 244 | if (running_asid != vma_asid) { | 
|  | 245 | local_irq_save(flags); | 
|  | 246 | switch_and_save_asid(vma_asid); | 
|  | 247 | } | 
|  | 248 | while (addr < end_addr) { | 
|  | 249 | /* Worth unrolling a little */ | 
|  | 250 | asm __volatile__("icbi %0,  0" : : "r" (addr)); | 
|  | 251 | asm __volatile__("icbi %0, 32" : : "r" (addr)); | 
|  | 252 | asm __volatile__("icbi %0, 64" : : "r" (addr)); | 
|  | 253 | asm __volatile__("icbi %0, 96" : : "r" (addr)); | 
|  | 254 | addr += 128; | 
|  | 255 | } | 
|  | 256 | if (running_asid != vma_asid) { | 
|  | 257 | switch_and_save_asid(running_asid); | 
|  | 258 | local_irq_restore(flags); | 
|  | 259 | } | 
|  | 260 | } | 
|  | 261 |  | 
|  | 262 | /****************************************************************************/ | 
|  | 263 |  | 
|  | 264 | static void sh64_icache_inv_user_page_range(struct mm_struct *mm, | 
|  | 265 | unsigned long start, unsigned long end) | 
|  | 266 | { | 
|  | 267 | /* Used for invalidating big chunks of I-cache, i.e. assume the range | 
|  | 268 | is whole pages.  If 'start' or 'end' is not page aligned, the code | 
|  | 269 | is conservative and invalidates to the ends of the enclosing pages. | 
|  | 270 | This is functionally OK, just a performance loss. */ | 
|  | 271 |  | 
|  | 272 | /* See the comments below in sh64_dcache_purge_user_range() regarding | 
|  | 273 | the choice of algorithm.  However, for the I-cache option (2) isn't | 
|  | 274 | available because there are no physical tags so aliases can't be | 
|  | 275 | resolved.  The icbi instruction has to be used through the user | 
|  | 276 | mapping.   Because icbi is cheaper than ocbp on a cache hit, it | 
|  | 277 | would be cheaper to use the selective code for a large range than is | 
|  | 278 | possible with the D-cache.  Just assume 64 for now as a working | 
|  | 279 | figure. | 
|  | 280 | */ | 
|  | 281 |  | 
|  | 282 | int n_pages; | 
|  | 283 |  | 
|  | 284 | if (!mm) return; | 
|  | 285 |  | 
|  | 286 | n_pages = ((end - start) >> PAGE_SHIFT); | 
|  | 287 | if (n_pages >= 64) { | 
|  | 288 | sh64_icache_inv_all(); | 
|  | 289 | } else { | 
|  | 290 | unsigned long aligned_start; | 
|  | 291 | unsigned long eaddr; | 
|  | 292 | unsigned long after_last_page_start; | 
|  | 293 | unsigned long mm_asid, current_asid; | 
|  | 294 | unsigned long long flags = 0ULL; | 
|  | 295 |  | 
|  | 296 | mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; | 
|  | 297 | current_asid = get_asid(); | 
|  | 298 |  | 
|  | 299 | if (mm_asid != current_asid) { | 
|  | 300 | /* Switch ASID and run the invalidate loop under cli */ | 
|  | 301 | local_irq_save(flags); | 
|  | 302 | switch_and_save_asid(mm_asid); | 
|  | 303 | } | 
|  | 304 |  | 
|  | 305 | aligned_start = start & PAGE_MASK; | 
|  | 306 | after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK); | 
|  | 307 |  | 
|  | 308 | while (aligned_start < after_last_page_start) { | 
|  | 309 | struct vm_area_struct *vma; | 
|  | 310 | unsigned long vma_end; | 
|  | 311 | vma = find_vma(mm, aligned_start); | 
|  | 312 | if (!vma || (aligned_start <= vma->vm_end)) { | 
|  | 313 | /* Avoid getting stuck in an error condition */ | 
|  | 314 | aligned_start += PAGE_SIZE; | 
|  | 315 | continue; | 
|  | 316 | } | 
|  | 317 | vma_end = vma->vm_end; | 
|  | 318 | if (vma->vm_flags & VM_EXEC) { | 
|  | 319 | /* Executable */ | 
|  | 320 | eaddr = aligned_start; | 
|  | 321 | while (eaddr < vma_end) { | 
|  | 322 | sh64_icache_inv_user_page(vma, eaddr); | 
|  | 323 | eaddr += PAGE_SIZE; | 
|  | 324 | } | 
|  | 325 | } | 
|  | 326 | aligned_start = vma->vm_end; /* Skip to start of next region */ | 
|  | 327 | } | 
|  | 328 | if (mm_asid != current_asid) { | 
|  | 329 | switch_and_save_asid(current_asid); | 
|  | 330 | local_irq_restore(flags); | 
|  | 331 | } | 
|  | 332 | } | 
|  | 333 | } | 
|  | 334 |  | 
|  | 335 | static void sh64_icache_inv_user_small_range(struct mm_struct *mm, | 
|  | 336 | unsigned long start, int len) | 
|  | 337 | { | 
|  | 338 |  | 
|  | 339 | /* Invalidate a small range of user context I-cache, not necessarily | 
|  | 340 | page (or even cache-line) aligned. */ | 
|  | 341 |  | 
|  | 342 | unsigned long long eaddr = start; | 
|  | 343 | unsigned long long eaddr_end = start + len; | 
|  | 344 | unsigned long current_asid, mm_asid; | 
|  | 345 | unsigned long long flags; | 
|  | 346 | unsigned long long epage_start; | 
|  | 347 |  | 
|  | 348 | /* Since this is used inside ptrace, the ASID in the mm context | 
|  | 349 | typically won't match current_asid.  We'll have to switch ASID to do | 
|  | 350 | this.  For safety, and given that the range will be small, do all | 
|  | 351 | this under cli. | 
|  | 352 |  | 
|  | 353 | Note, there is a hazard that the ASID in mm->context is no longer | 
|  | 354 | actually associated with mm, i.e. if the mm->context has started a | 
|  | 355 | new cycle since mm was last active.  However, this is just a | 
|  | 356 | performance issue: all that happens is that we invalidate lines | 
|  | 357 | belonging to another mm, so the owning process has to refill them | 
|  | 358 | when that mm goes live again.  mm itself can't have any cache | 
|  | 359 | entries because there will have been a flush_cache_all when the new | 
|  | 360 | mm->context cycle started. */ | 
|  | 361 |  | 
|  | 362 | /* Align to start of cache line.  Otherwise, suppose len==8 and start | 
|  | 363 | was at 32N+28 : the last 4 bytes wouldn't get invalidated. */ | 
|  | 364 | eaddr = start & L1_CACHE_ALIGN_MASK; | 
|  | 365 | eaddr_end = start + len; | 
|  | 366 |  | 
|  | 367 | local_irq_save(flags); | 
|  | 368 | mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; | 
|  | 369 | current_asid = switch_and_save_asid(mm_asid); | 
|  | 370 |  | 
|  | 371 | epage_start = eaddr & PAGE_MASK; | 
|  | 372 |  | 
|  | 373 | while (eaddr < eaddr_end) | 
|  | 374 | { | 
|  | 375 | asm __volatile__("icbi %0, 0" : : "r" (eaddr)); | 
|  | 376 | eaddr += L1_CACHE_BYTES; | 
|  | 377 | } | 
|  | 378 | switch_and_save_asid(current_asid); | 
|  | 379 | local_irq_restore(flags); | 
|  | 380 | } | 
|  | 381 |  | 
|  | 382 | static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end) | 
|  | 383 | { | 
|  | 384 | /* The icbi instruction never raises ITLBMISS.  i.e. if there's not a | 
|  | 385 | cache hit on the virtual tag the instruction ends there, without a | 
|  | 386 | TLB lookup. */ | 
|  | 387 |  | 
|  | 388 | unsigned long long aligned_start; | 
|  | 389 | unsigned long long ull_end; | 
|  | 390 | unsigned long long addr; | 
|  | 391 |  | 
|  | 392 | ull_end = end; | 
|  | 393 |  | 
|  | 394 | /* Just invalidate over the range using the natural addresses.  TLB | 
|  | 395 | miss handling will be OK (TBC).  Since it's for the current process, | 
|  | 396 | either we're already in the right ASID context, or the ASIDs have | 
|  | 397 | been recycled since we were last active in which case we might just | 
|  | 398 | invalidate another processes I-cache entries : no worries, just a | 
|  | 399 | performance drop for him. */ | 
|  | 400 | aligned_start = start & L1_CACHE_ALIGN_MASK; | 
|  | 401 | addr = aligned_start; | 
|  | 402 | while (addr < ull_end) { | 
|  | 403 | asm __volatile__ ("icbi %0, 0" : : "r" (addr)); | 
|  | 404 | asm __volatile__ ("nop"); | 
|  | 405 | asm __volatile__ ("nop"); | 
|  | 406 | addr += L1_CACHE_BYTES; | 
|  | 407 | } | 
|  | 408 | } | 
|  | 409 |  | 
|  | 410 | #endif /* !CONFIG_ICACHE_DISABLED */ | 
|  | 411 |  | 
|  | 412 | /****************************************************************************/ | 
|  | 413 |  | 
|  | 414 | #ifndef CONFIG_DCACHE_DISABLED | 
|  | 415 |  | 
|  | 416 | /* Buffer used as the target of alloco instructions to purge data from cache | 
|  | 417 | sets by natural eviction. -- RPC */ | 
|  | 418 | #define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4) | 
|  | 419 | static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, }; | 
|  | 420 |  | 
|  | 421 | /****************************************************************************/ | 
|  | 422 |  | 
|  | 423 | static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets) | 
|  | 424 | { | 
|  | 425 | /* Purge all ways in a particular block of sets, specified by the base | 
|  | 426 | set number and number of sets.  Can handle wrap-around, if that's | 
|  | 427 | needed.  */ | 
|  | 428 |  | 
|  | 429 | int dummy_buffer_base_set; | 
|  | 430 | unsigned long long eaddr, eaddr0, eaddr1; | 
|  | 431 | int j; | 
|  | 432 | int set_offset; | 
|  | 433 |  | 
|  | 434 | dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift; | 
|  | 435 | set_offset = sets_to_purge_base - dummy_buffer_base_set; | 
|  | 436 |  | 
|  | 437 | for (j=0; j<n_sets; j++, set_offset++) { | 
|  | 438 | set_offset &= (cpu_data->dcache.sets - 1); | 
|  | 439 | eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift); | 
|  | 440 |  | 
|  | 441 | /* Do one alloco which hits the required set per cache way.  For | 
|  | 442 | write-back mode, this will purge the #ways resident lines.   There's | 
|  | 443 | little point unrolling this loop because the allocos stall more if | 
|  | 444 | they're too close together. */ | 
|  | 445 | eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways; | 
|  | 446 | for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) { | 
|  | 447 | asm __volatile__ ("alloco %0, 0" : : "r" (eaddr)); | 
|  | 448 | asm __volatile__ ("synco"); /* TAKum03020 */ | 
|  | 449 | } | 
|  | 450 |  | 
|  | 451 | eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways; | 
|  | 452 | for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) { | 
|  | 453 | /* Load from each address.  Required because alloco is a NOP if | 
|  | 454 | the cache is write-through.  Write-through is a config option. */ | 
|  | 455 | if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags))) | 
|  | 456 | *(volatile unsigned char *)(int)eaddr; | 
|  | 457 | } | 
|  | 458 | } | 
|  | 459 |  | 
|  | 460 | /* Don't use OCBI to invalidate the lines.  That costs cycles directly. | 
|  | 461 | If the dummy block is just left resident, it will naturally get | 
|  | 462 | evicted as required.  */ | 
|  | 463 |  | 
|  | 464 | return; | 
|  | 465 | } | 
|  | 466 |  | 
|  | 467 | /****************************************************************************/ | 
|  | 468 |  | 
|  | 469 | static void sh64_dcache_purge_all(void) | 
|  | 470 | { | 
|  | 471 | /* Purge the entire contents of the dcache.  The most efficient way to | 
|  | 472 | achieve this is to use alloco instructions on a region of unused | 
|  | 473 | memory equal in size to the cache, thereby causing the current | 
|  | 474 | contents to be discarded by natural eviction.  The alternative, | 
|  | 475 | namely reading every tag, setting up a mapping for the corresponding | 
|  | 476 | page and doing an OCBP for the line, would be much more expensive. | 
|  | 477 | */ | 
|  | 478 |  | 
|  | 479 | sh64_dcache_purge_sets(0, cpu_data->dcache.sets); | 
|  | 480 |  | 
|  | 481 | return; | 
|  | 482 |  | 
|  | 483 | } | 
|  | 484 |  | 
|  | 485 | /****************************************************************************/ | 
|  | 486 |  | 
|  | 487 | static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end) | 
|  | 488 | { | 
|  | 489 | /* Purge the range of addresses [start,end] from the D-cache.  The | 
|  | 490 | addresses lie in the superpage mapping.  There's no harm if we | 
|  | 491 | overpurge at either end - just a small performance loss. */ | 
|  | 492 | unsigned long long ullend, addr, aligned_start; | 
|  | 493 | #if (NEFF == 32) | 
|  | 494 | aligned_start = (unsigned long long)(signed long long)(signed long) start; | 
|  | 495 | #else | 
|  | 496 | #error "NEFF != 32" | 
|  | 497 | #endif | 
|  | 498 | aligned_start &= L1_CACHE_ALIGN_MASK; | 
|  | 499 | addr = aligned_start; | 
|  | 500 | #if (NEFF == 32) | 
|  | 501 | ullend = (unsigned long long) (signed long long) (signed long) end; | 
|  | 502 | #else | 
|  | 503 | #error "NEFF != 32" | 
|  | 504 | #endif | 
|  | 505 | while (addr <= ullend) { | 
|  | 506 | asm __volatile__ ("ocbp %0, 0" : : "r" (addr)); | 
|  | 507 | addr += L1_CACHE_BYTES; | 
|  | 508 | } | 
|  | 509 | return; | 
|  | 510 | } | 
|  | 511 |  | 
|  | 512 | /* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for | 
|  | 513 | anything else in the kernel */ | 
|  | 514 | #define MAGIC_PAGE0_START 0xffffffffec000000ULL | 
|  | 515 |  | 
|  | 516 | static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr) | 
|  | 517 | { | 
|  | 518 | /* Purge the physical page 'paddr' from the cache.  It's known that any | 
|  | 519 | cache lines requiring attention have the same page colour as the the | 
|  | 520 | address 'eaddr'. | 
|  | 521 |  | 
|  | 522 | This relies on the fact that the D-cache matches on physical tags | 
|  | 523 | when no virtual tag matches.  So we create an alias for the original | 
|  | 524 | page and purge through that.  (Alternatively, we could have done | 
|  | 525 | this by switching ASID to match the original mapping and purged | 
|  | 526 | through that, but that involves ASID switching cost + probably a | 
|  | 527 | TLBMISS + refill anyway.) | 
|  | 528 | */ | 
|  | 529 |  | 
|  | 530 | unsigned long long magic_page_start; | 
|  | 531 | unsigned long long magic_eaddr, magic_eaddr_end; | 
|  | 532 |  | 
|  | 533 | magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK); | 
|  | 534 |  | 
|  | 535 | /* As long as the kernel is not pre-emptible, this doesn't need to be | 
|  | 536 | under cli/sti. */ | 
|  | 537 |  | 
|  | 538 | sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr); | 
|  | 539 |  | 
|  | 540 | magic_eaddr = magic_page_start; | 
|  | 541 | magic_eaddr_end = magic_eaddr + PAGE_SIZE; | 
|  | 542 | while (magic_eaddr < magic_eaddr_end) { | 
|  | 543 | /* Little point in unrolling this loop - the OCBPs are blocking | 
|  | 544 | and won't go any quicker (i.e. the loop overhead is parallel | 
|  | 545 | to part of the OCBP execution.) */ | 
|  | 546 | asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr)); | 
|  | 547 | magic_eaddr += L1_CACHE_BYTES; | 
|  | 548 | } | 
|  | 549 |  | 
|  | 550 | sh64_teardown_dtlb_cache_slot(); | 
|  | 551 | } | 
|  | 552 |  | 
|  | 553 | /****************************************************************************/ | 
|  | 554 |  | 
|  | 555 | static void sh64_dcache_purge_phy_page(unsigned long paddr) | 
|  | 556 | { | 
|  | 557 | /* Pure a page given its physical start address, by creating a | 
|  | 558 | temporary 1 page mapping and purging across that.  Even if we know | 
|  | 559 | the virtual address (& vma or mm) of the page, the method here is | 
|  | 560 | more elegant because it avoids issues of coping with page faults on | 
|  | 561 | the purge instructions (i.e. no special-case code required in the | 
|  | 562 | critical path in the TLB miss handling). */ | 
|  | 563 |  | 
|  | 564 | unsigned long long eaddr_start, eaddr, eaddr_end; | 
|  | 565 | int i; | 
|  | 566 |  | 
|  | 567 | /* As long as the kernel is not pre-emptible, this doesn't need to be | 
|  | 568 | under cli/sti. */ | 
|  | 569 |  | 
|  | 570 | eaddr_start = MAGIC_PAGE0_START; | 
|  | 571 | for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) { | 
|  | 572 | sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr); | 
|  | 573 |  | 
|  | 574 | eaddr = eaddr_start; | 
|  | 575 | eaddr_end = eaddr + PAGE_SIZE; | 
|  | 576 | while (eaddr < eaddr_end) { | 
|  | 577 | asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr)); | 
|  | 578 | eaddr += L1_CACHE_BYTES; | 
|  | 579 | } | 
|  | 580 |  | 
|  | 581 | sh64_teardown_dtlb_cache_slot(); | 
|  | 582 | eaddr_start += PAGE_SIZE; | 
|  | 583 | } | 
|  | 584 | } | 
|  | 585 |  | 
| Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 586 | static void sh64_dcache_purge_user_pages(struct mm_struct *mm, | 
|  | 587 | unsigned long addr, unsigned long end) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 588 | { | 
|  | 589 | pgd_t *pgd; | 
|  | 590 | pmd_t *pmd; | 
|  | 591 | pte_t *pte; | 
|  | 592 | pte_t entry; | 
| Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 593 | spinlock_t *ptl; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 594 | unsigned long paddr; | 
|  | 595 |  | 
| Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 596 | if (!mm) | 
|  | 597 | return; /* No way to find physical address of page */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 598 |  | 
| Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 599 | pgd = pgd_offset(mm, addr); | 
|  | 600 | if (pgd_bad(*pgd)) | 
|  | 601 | return; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 602 |  | 
| Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 603 | pmd = pmd_offset(pgd, addr); | 
|  | 604 | if (pmd_none(*pmd) || pmd_bad(*pmd)) | 
|  | 605 | return; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 606 |  | 
| Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 607 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); | 
|  | 608 | do { | 
|  | 609 | entry = *pte; | 
|  | 610 | if (pte_none(entry) || !pte_present(entry)) | 
|  | 611 | continue; | 
|  | 612 | paddr = pte_val(entry) & PAGE_MASK; | 
|  | 613 | sh64_dcache_purge_coloured_phy_page(paddr, addr); | 
|  | 614 | } while (pte++, addr += PAGE_SIZE, addr != end); | 
|  | 615 | pte_unmap_unlock(pte - 1, ptl); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 616 | } | 
|  | 617 | /****************************************************************************/ | 
|  | 618 |  | 
|  | 619 | static void sh64_dcache_purge_user_range(struct mm_struct *mm, | 
|  | 620 | unsigned long start, unsigned long end) | 
|  | 621 | { | 
|  | 622 | /* There are at least 5 choices for the implementation of this, with | 
|  | 623 | pros (+), cons(-), comments(*): | 
|  | 624 |  | 
|  | 625 | 1. ocbp each line in the range through the original user's ASID | 
|  | 626 | + no lines spuriously evicted | 
|  | 627 | - tlbmiss handling (must either handle faults on demand => extra | 
|  | 628 | special-case code in tlbmiss critical path), or map the page in | 
|  | 629 | advance (=> flush_tlb_range in advance to avoid multiple hits) | 
|  | 630 | - ASID switching | 
|  | 631 | - expensive for large ranges | 
|  | 632 |  | 
|  | 633 | 2. temporarily map each page in the range to a special effective | 
|  | 634 | address and ocbp through the temporary mapping; relies on the | 
|  | 635 | fact that SH-5 OCB* always do TLB lookup and match on ptags (they | 
|  | 636 | never look at the etags) | 
|  | 637 | + no spurious evictions | 
|  | 638 | - expensive for large ranges | 
|  | 639 | * surely cheaper than (1) | 
|  | 640 |  | 
|  | 641 | 3. walk all the lines in the cache, check the tags, if a match | 
|  | 642 | occurs create a page mapping to ocbp the line through | 
|  | 643 | + no spurious evictions | 
|  | 644 | - tag inspection overhead | 
|  | 645 | - (especially for small ranges) | 
|  | 646 | - potential cost of setting up/tearing down page mapping for | 
|  | 647 | every line that matches the range | 
|  | 648 | * cost partly independent of range size | 
|  | 649 |  | 
|  | 650 | 4. walk all the lines in the cache, check the tags, if a match | 
|  | 651 | occurs use 4 * alloco to purge the line (+3 other probably | 
|  | 652 | innocent victims) by natural eviction | 
|  | 653 | + no tlb mapping overheads | 
|  | 654 | - spurious evictions | 
|  | 655 | - tag inspection overhead | 
|  | 656 |  | 
|  | 657 | 5. implement like flush_cache_all | 
|  | 658 | + no tag inspection overhead | 
|  | 659 | - spurious evictions | 
|  | 660 | - bad for small ranges | 
|  | 661 |  | 
|  | 662 | (1) can be ruled out as more expensive than (2).  (2) appears best | 
|  | 663 | for small ranges.  The choice between (3), (4) and (5) for large | 
|  | 664 | ranges and the range size for the large/small boundary need | 
|  | 665 | benchmarking to determine. | 
|  | 666 |  | 
|  | 667 | For now use approach (2) for small ranges and (5) for large ones. | 
|  | 668 |  | 
|  | 669 | */ | 
|  | 670 |  | 
|  | 671 | int n_pages; | 
|  | 672 |  | 
|  | 673 | n_pages = ((end - start) >> PAGE_SHIFT); | 
| Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 674 | if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 675 | #if 1 | 
|  | 676 | sh64_dcache_purge_all(); | 
|  | 677 | #else | 
|  | 678 | unsigned long long set, way; | 
|  | 679 | unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; | 
|  | 680 | for (set = 0; set < cpu_data->dcache.sets; set++) { | 
|  | 681 | unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift); | 
|  | 682 | for (way = 0; way < cpu_data->dcache.ways; way++) { | 
|  | 683 | unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift); | 
|  | 684 | unsigned long long tag0; | 
|  | 685 | unsigned long line_valid; | 
|  | 686 |  | 
|  | 687 | asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr)); | 
|  | 688 | line_valid = tag0 & SH_CACHE_VALID; | 
|  | 689 | if (line_valid) { | 
|  | 690 | unsigned long cache_asid; | 
|  | 691 | unsigned long epn; | 
|  | 692 |  | 
|  | 693 | cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift; | 
|  | 694 | /* The next line needs some | 
|  | 695 | explanation.  The virtual tags | 
|  | 696 | encode bits [31:13] of the virtual | 
|  | 697 | address, bit [12] of the 'tag' being | 
|  | 698 | implied by the cache set index. */ | 
|  | 699 | epn = (tag0 & cpu_data->dcache.epn_mask) | ((set & 0x80) << cpu_data->dcache.entry_shift); | 
|  | 700 |  | 
|  | 701 | if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) { | 
|  | 702 | /* TODO : could optimise this | 
|  | 703 | call by batching multiple | 
|  | 704 | adjacent sets together. */ | 
|  | 705 | sh64_dcache_purge_sets(set, 1); | 
|  | 706 | break; /* Don't waste time inspecting other ways for this set */ | 
|  | 707 | } | 
|  | 708 | } | 
|  | 709 | } | 
|  | 710 | } | 
|  | 711 | #endif | 
|  | 712 | } else { | 
| Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 713 | /* Small range, covered by a single page table page */ | 
|  | 714 | start &= PAGE_MASK;	/* should already be so */ | 
|  | 715 | end = PAGE_ALIGN(end);	/* should already be so */ | 
|  | 716 | sh64_dcache_purge_user_pages(mm, start, end); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 717 | } | 
|  | 718 | return; | 
|  | 719 | } | 
|  | 720 |  | 
|  | 721 | static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end) | 
|  | 722 | { | 
|  | 723 | unsigned long long aligned_start; | 
|  | 724 | unsigned long long ull_end; | 
|  | 725 | unsigned long long addr; | 
|  | 726 |  | 
|  | 727 | ull_end = end; | 
|  | 728 |  | 
|  | 729 | /* Just wback over the range using the natural addresses.  TLB miss | 
|  | 730 | handling will be OK (TBC) : the range has just been written to by | 
|  | 731 | the signal frame setup code, so the PTEs must exist. | 
|  | 732 |  | 
|  | 733 | Note, if we have CONFIG_PREEMPT and get preempted inside this loop, | 
|  | 734 | it doesn't matter, even if the pid->ASID mapping changes whilst | 
|  | 735 | we're away.  In that case the cache will have been flushed when the | 
|  | 736 | mapping was renewed.  So the writebacks below will be nugatory (and | 
|  | 737 | we'll doubtless have to fault the TLB entry/ies in again with the | 
|  | 738 | new ASID), but it's a rare case. | 
|  | 739 | */ | 
|  | 740 | aligned_start = start & L1_CACHE_ALIGN_MASK; | 
|  | 741 | addr = aligned_start; | 
|  | 742 | while (addr < ull_end) { | 
|  | 743 | asm __volatile__ ("ocbwb %0, 0" : : "r" (addr)); | 
|  | 744 | addr += L1_CACHE_BYTES; | 
|  | 745 | } | 
|  | 746 | } | 
|  | 747 |  | 
|  | 748 | /****************************************************************************/ | 
|  | 749 |  | 
|  | 750 | /* These *MUST* lie in an area of virtual address space that's otherwise unused. */ | 
|  | 751 | #define UNIQUE_EADDR_START 0xe0000000UL | 
|  | 752 | #define UNIQUE_EADDR_END   0xe8000000UL | 
|  | 753 |  | 
|  | 754 | static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr) | 
|  | 755 | { | 
|  | 756 | /* Given a physical address paddr, and a user virtual address | 
|  | 757 | user_eaddr which will eventually be mapped to it, create a one-off | 
|  | 758 | kernel-private eaddr mapped to the same paddr.  This is used for | 
|  | 759 | creating special destination pages for copy_user_page and | 
|  | 760 | clear_user_page */ | 
|  | 761 |  | 
|  | 762 | static unsigned long current_pointer = UNIQUE_EADDR_START; | 
|  | 763 | unsigned long coloured_pointer; | 
|  | 764 |  | 
|  | 765 | if (current_pointer == UNIQUE_EADDR_END) { | 
|  | 766 | sh64_dcache_purge_all(); | 
|  | 767 | current_pointer = UNIQUE_EADDR_START; | 
|  | 768 | } | 
|  | 769 |  | 
|  | 770 | coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | (user_eaddr & CACHE_OC_SYN_MASK); | 
|  | 771 | sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr); | 
|  | 772 |  | 
|  | 773 | current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS); | 
|  | 774 |  | 
|  | 775 | return coloured_pointer; | 
|  | 776 | } | 
|  | 777 |  | 
|  | 778 | /****************************************************************************/ | 
|  | 779 |  | 
|  | 780 | static void sh64_copy_user_page_coloured(void *to, void *from, unsigned long address) | 
|  | 781 | { | 
|  | 782 | void *coloured_to; | 
|  | 783 |  | 
|  | 784 | /* Discard any existing cache entries of the wrong colour.  These are | 
|  | 785 | present quite often, if the kernel has recently used the page | 
|  | 786 | internally, then given it up, then it's been allocated to the user. | 
|  | 787 | */ | 
|  | 788 | sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to); | 
|  | 789 |  | 
|  | 790 | coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to)); | 
|  | 791 | sh64_page_copy(from, coloured_to); | 
|  | 792 |  | 
|  | 793 | sh64_teardown_dtlb_cache_slot(); | 
|  | 794 | } | 
|  | 795 |  | 
|  | 796 | static void sh64_clear_user_page_coloured(void *to, unsigned long address) | 
|  | 797 | { | 
|  | 798 | void *coloured_to; | 
|  | 799 |  | 
|  | 800 | /* Discard any existing kernel-originated lines of the wrong colour (as | 
|  | 801 | above) */ | 
|  | 802 | sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to); | 
|  | 803 |  | 
|  | 804 | coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to)); | 
|  | 805 | sh64_page_clear(coloured_to); | 
|  | 806 |  | 
|  | 807 | sh64_teardown_dtlb_cache_slot(); | 
|  | 808 | } | 
|  | 809 |  | 
|  | 810 | #endif /* !CONFIG_DCACHE_DISABLED */ | 
|  | 811 |  | 
|  | 812 | /****************************************************************************/ | 
|  | 813 |  | 
|  | 814 | /*########################################################################## | 
|  | 815 | EXTERNALLY CALLABLE API. | 
|  | 816 | ##########################################################################*/ | 
|  | 817 |  | 
|  | 818 | /* These functions are described in Documentation/cachetlb.txt. | 
|  | 819 | Each one of these functions varies in behaviour depending on whether the | 
|  | 820 | I-cache and/or D-cache are configured out. | 
|  | 821 |  | 
|  | 822 | Note that the Linux term 'flush' corresponds to what is termed 'purge' in | 
|  | 823 | the sh/sh64 jargon for the D-cache, i.e. write back dirty data then | 
|  | 824 | invalidate the cache lines, and 'invalidate' for the I-cache. | 
|  | 825 | */ | 
|  | 826 |  | 
|  | 827 | #undef FLUSH_TRACE | 
|  | 828 |  | 
|  | 829 | void flush_cache_all(void) | 
|  | 830 | { | 
|  | 831 | /* Invalidate the entire contents of both caches, after writing back to | 
|  | 832 | memory any dirty data from the D-cache. */ | 
|  | 833 | sh64_dcache_purge_all(); | 
|  | 834 | sh64_icache_inv_all(); | 
|  | 835 | } | 
|  | 836 |  | 
|  | 837 | /****************************************************************************/ | 
|  | 838 |  | 
|  | 839 | void flush_cache_mm(struct mm_struct *mm) | 
|  | 840 | { | 
|  | 841 | /* Invalidate an entire user-address space from both caches, after | 
|  | 842 | writing back dirty data (e.g. for shared mmap etc). */ | 
|  | 843 |  | 
|  | 844 | /* This could be coded selectively by inspecting all the tags then | 
|  | 845 | doing 4*alloco on any set containing a match (as for | 
|  | 846 | flush_cache_range), but fork/exit/execve (where this is called from) | 
|  | 847 | are expensive anyway. */ | 
|  | 848 |  | 
|  | 849 | /* Have to do a purge here, despite the comments re I-cache below. | 
|  | 850 | There could be odd-coloured dirty data associated with the mm still | 
|  | 851 | in the cache - if this gets written out through natural eviction | 
|  | 852 | after the kernel has reused the page there will be chaos. | 
|  | 853 | */ | 
|  | 854 |  | 
|  | 855 | sh64_dcache_purge_all(); | 
|  | 856 |  | 
|  | 857 | /* The mm being torn down won't ever be active again, so any Icache | 
|  | 858 | lines tagged with its ASID won't be visible for the rest of the | 
|  | 859 | lifetime of this ASID cycle.  Before the ASID gets reused, there | 
|  | 860 | will be a flush_cache_all.  Hence we don't need to touch the | 
|  | 861 | I-cache.  This is similar to the lack of action needed in | 
|  | 862 | flush_tlb_mm - see fault.c. */ | 
|  | 863 | } | 
|  | 864 |  | 
|  | 865 | /****************************************************************************/ | 
|  | 866 |  | 
|  | 867 | void flush_cache_range(struct vm_area_struct *vma, unsigned long start, | 
|  | 868 | unsigned long end) | 
|  | 869 | { | 
|  | 870 | struct mm_struct *mm = vma->vm_mm; | 
|  | 871 |  | 
|  | 872 | /* Invalidate (from both caches) the range [start,end) of virtual | 
|  | 873 | addresses from the user address space specified by mm, after writing | 
|  | 874 | back any dirty data. | 
|  | 875 |  | 
| Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 876 | Note, 'end' is 1 byte beyond the end of the range to flush. */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 877 |  | 
|  | 878 | sh64_dcache_purge_user_range(mm, start, end); | 
|  | 879 | sh64_icache_inv_user_page_range(mm, start, end); | 
|  | 880 | } | 
|  | 881 |  | 
|  | 882 | /****************************************************************************/ | 
|  | 883 |  | 
|  | 884 | void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned long pfn) | 
|  | 885 | { | 
|  | 886 | /* Invalidate any entries in either cache for the vma within the user | 
|  | 887 | address space vma->vm_mm for the page starting at virtual address | 
|  | 888 | 'eaddr'.   This seems to be used primarily in breaking COW.  Note, | 
|  | 889 | the I-cache must be searched too in case the page in question is | 
|  | 890 | both writable and being executed from (e.g. stack trampolines.) | 
|  | 891 |  | 
| Hugh Dickins | 60ec558 | 2005-10-29 18:16:34 -0700 | [diff] [blame] | 892 | Note, this is called with pte lock held. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 893 | */ | 
|  | 894 |  | 
|  | 895 | sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT); | 
|  | 896 |  | 
|  | 897 | if (vma->vm_flags & VM_EXEC) { | 
|  | 898 | sh64_icache_inv_user_page(vma, eaddr); | 
|  | 899 | } | 
|  | 900 | } | 
|  | 901 |  | 
|  | 902 | /****************************************************************************/ | 
|  | 903 |  | 
|  | 904 | #ifndef CONFIG_DCACHE_DISABLED | 
|  | 905 |  | 
|  | 906 | void copy_user_page(void *to, void *from, unsigned long address, struct page *page) | 
|  | 907 | { | 
|  | 908 | /* 'from' and 'to' are kernel virtual addresses (within the superpage | 
|  | 909 | mapping of the physical RAM).  'address' is the user virtual address | 
|  | 910 | where the copy 'to' will be mapped after.  This allows a custom | 
|  | 911 | mapping to be used to ensure that the new copy is placed in the | 
|  | 912 | right cache sets for the user to see it without having to bounce it | 
|  | 913 | out via memory.  Note however : the call to flush_page_to_ram in | 
|  | 914 | (generic)/mm/memory.c:(break_cow) undoes all this good work in that one | 
|  | 915 | very important case! | 
|  | 916 |  | 
|  | 917 | TBD : can we guarantee that on every call, any cache entries for | 
|  | 918 | 'from' are in the same colour sets as 'address' also?  i.e. is this | 
|  | 919 | always used just to deal with COW?  (I suspect not). */ | 
|  | 920 |  | 
|  | 921 | /* There are two possibilities here for when the page 'from' was last accessed: | 
|  | 922 | * by the kernel : this is OK, no purge required. | 
|  | 923 | * by the/a user (e.g. for break_COW) : need to purge. | 
|  | 924 |  | 
|  | 925 | If the potential user mapping at 'address' is the same colour as | 
|  | 926 | 'from' there is no need to purge any cache lines from the 'from' | 
|  | 927 | page mapped into cache sets of colour 'address'.  (The copy will be | 
|  | 928 | accessing the page through 'from'). | 
|  | 929 | */ | 
|  | 930 |  | 
|  | 931 | if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) { | 
|  | 932 | sh64_dcache_purge_coloured_phy_page(__pa(from), address); | 
|  | 933 | } | 
|  | 934 |  | 
|  | 935 | if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) { | 
|  | 936 | /* No synonym problem on destination */ | 
|  | 937 | sh64_page_copy(from, to); | 
|  | 938 | } else { | 
|  | 939 | sh64_copy_user_page_coloured(to, from, address); | 
|  | 940 | } | 
|  | 941 |  | 
|  | 942 | /* Note, don't need to flush 'from' page from the cache again - it's | 
|  | 943 | done anyway by the generic code */ | 
|  | 944 | } | 
|  | 945 |  | 
|  | 946 | void clear_user_page(void *to, unsigned long address, struct page *page) | 
|  | 947 | { | 
|  | 948 | /* 'to' is a kernel virtual address (within the superpage | 
|  | 949 | mapping of the physical RAM).  'address' is the user virtual address | 
|  | 950 | where the 'to' page will be mapped after.  This allows a custom | 
|  | 951 | mapping to be used to ensure that the new copy is placed in the | 
|  | 952 | right cache sets for the user to see it without having to bounce it | 
|  | 953 | out via memory. | 
|  | 954 | */ | 
|  | 955 |  | 
|  | 956 | if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) { | 
|  | 957 | /* No synonym problem on destination */ | 
|  | 958 | sh64_page_clear(to); | 
|  | 959 | } else { | 
|  | 960 | sh64_clear_user_page_coloured(to, address); | 
|  | 961 | } | 
|  | 962 | } | 
|  | 963 |  | 
|  | 964 | #endif /* !CONFIG_DCACHE_DISABLED */ | 
|  | 965 |  | 
|  | 966 | /****************************************************************************/ | 
|  | 967 |  | 
|  | 968 | void flush_dcache_page(struct page *page) | 
|  | 969 | { | 
|  | 970 | sh64_dcache_purge_phy_page(page_to_phys(page)); | 
|  | 971 | wmb(); | 
|  | 972 | } | 
|  | 973 |  | 
|  | 974 | /****************************************************************************/ | 
|  | 975 |  | 
|  | 976 | void flush_icache_range(unsigned long start, unsigned long end) | 
|  | 977 | { | 
|  | 978 | /* Flush the range [start,end] of kernel virtual adddress space from | 
|  | 979 | the I-cache.  The corresponding range must be purged from the | 
|  | 980 | D-cache also because the SH-5 doesn't have cache snooping between | 
|  | 981 | the caches.  The addresses will be visible through the superpage | 
|  | 982 | mapping, therefore it's guaranteed that there no cache entries for | 
|  | 983 | the range in cache sets of the wrong colour. | 
|  | 984 |  | 
|  | 985 | Primarily used for cohering the I-cache after a module has | 
|  | 986 | been loaded.  */ | 
|  | 987 |  | 
|  | 988 | /* We also make sure to purge the same range from the D-cache since | 
|  | 989 | flush_page_to_ram() won't be doing this for us! */ | 
|  | 990 |  | 
|  | 991 | sh64_dcache_purge_kernel_range(start, end); | 
|  | 992 | wmb(); | 
|  | 993 | sh64_icache_inv_kernel_range(start, end); | 
|  | 994 | } | 
|  | 995 |  | 
|  | 996 | /****************************************************************************/ | 
|  | 997 |  | 
|  | 998 | void flush_icache_user_range(struct vm_area_struct *vma, | 
|  | 999 | struct page *page, unsigned long addr, int len) | 
|  | 1000 | { | 
|  | 1001 | /* Flush the range of user (defined by vma->vm_mm) address space | 
|  | 1002 | starting at 'addr' for 'len' bytes from the cache.  The range does | 
|  | 1003 | not straddle a page boundary, the unique physical page containing | 
|  | 1004 | the range is 'page'.  This seems to be used mainly for invalidating | 
|  | 1005 | an address range following a poke into the program text through the | 
|  | 1006 | ptrace() call from another process (e.g. for BRK instruction | 
|  | 1007 | insertion). */ | 
|  | 1008 |  | 
|  | 1009 | sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr); | 
|  | 1010 | mb(); | 
|  | 1011 |  | 
|  | 1012 | if (vma->vm_flags & VM_EXEC) { | 
|  | 1013 | sh64_icache_inv_user_small_range(vma->vm_mm, addr, len); | 
|  | 1014 | } | 
|  | 1015 | } | 
|  | 1016 |  | 
|  | 1017 | /*########################################################################## | 
|  | 1018 | ARCH/SH64 PRIVATE CALLABLE API. | 
|  | 1019 | ##########################################################################*/ | 
|  | 1020 |  | 
|  | 1021 | void flush_cache_sigtramp(unsigned long start, unsigned long end) | 
|  | 1022 | { | 
|  | 1023 | /* For the address range [start,end), write back the data from the | 
|  | 1024 | D-cache and invalidate the corresponding region of the I-cache for | 
|  | 1025 | the current process.  Used to flush signal trampolines on the stack | 
|  | 1026 | to make them executable. */ | 
|  | 1027 |  | 
|  | 1028 | sh64_dcache_wback_current_user_range(start, end); | 
|  | 1029 | wmb(); | 
|  | 1030 | sh64_icache_inv_current_user_range(start, end); | 
|  | 1031 | } | 
|  | 1032 |  |