Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public License |
| 6 | * as published by the Free Software Foundation, version 2. |
| 7 | * |
| 8 | * This program is distributed in the hope that it will be useful, but |
| 9 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
| 11 | * NON INFRINGEMENT. See the GNU General Public License for |
| 12 | * more details. |
| 13 | * |
| 14 | * This code maintains the "home" for each page in the system. |
| 15 | */ |
| 16 | |
| 17 | #include <linux/kernel.h> |
| 18 | #include <linux/mm.h> |
| 19 | #include <linux/spinlock.h> |
| 20 | #include <linux/list.h> |
| 21 | #include <linux/bootmem.h> |
| 22 | #include <linux/rmap.h> |
| 23 | #include <linux/pagemap.h> |
| 24 | #include <linux/mutex.h> |
| 25 | #include <linux/interrupt.h> |
| 26 | #include <linux/sysctl.h> |
| 27 | #include <linux/pagevec.h> |
| 28 | #include <linux/ptrace.h> |
| 29 | #include <linux/timex.h> |
| 30 | #include <linux/cache.h> |
| 31 | #include <linux/smp.h> |
| 32 | |
| 33 | #include <asm/page.h> |
| 34 | #include <asm/sections.h> |
| 35 | #include <asm/tlbflush.h> |
| 36 | #include <asm/pgalloc.h> |
| 37 | #include <asm/homecache.h> |
| 38 | |
| 39 | #include "migrate.h" |
| 40 | |
| 41 | |
| 42 | #if CHIP_HAS_COHERENT_LOCAL_CACHE() |
| 43 | |
| 44 | /* |
| 45 | * The noallocl2 option suppresses all use of the L2 cache to cache |
| 46 | * locally from a remote home. There's no point in using it if we |
| 47 | * don't have coherent local caching, though. |
| 48 | */ |
Chris Metcalf | 0707ad3 | 2010-06-25 17:04:17 -0400 | [diff] [blame] | 49 | static int __write_once noallocl2; |
Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 50 | static int __init set_noallocl2(char *str) |
| 51 | { |
| 52 | noallocl2 = 1; |
| 53 | return 0; |
| 54 | } |
| 55 | early_param("noallocl2", set_noallocl2); |
| 56 | |
| 57 | #else |
| 58 | |
| 59 | #define noallocl2 0 |
| 60 | |
| 61 | #endif |
| 62 | |
Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 63 | /* Provide no-op versions of these routines to keep flush_remote() cleaner. */ |
| 64 | #define mark_caches_evicted_start() 0 |
| 65 | #define mark_caches_evicted_finish(mask, timestamp) do {} while (0) |
| 66 | |
| 67 | |
Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 68 | /* |
| 69 | * Update the irq_stat for cpus that we are going to interrupt |
| 70 | * with TLB or cache flushes. Also handle removing dataplane cpus |
| 71 | * from the TLB flush set, and setting dataplane_tlb_state instead. |
| 72 | */ |
| 73 | static void hv_flush_update(const struct cpumask *cache_cpumask, |
| 74 | struct cpumask *tlb_cpumask, |
| 75 | unsigned long tlb_va, unsigned long tlb_length, |
| 76 | HV_Remote_ASID *asids, int asidcount) |
| 77 | { |
| 78 | struct cpumask mask; |
| 79 | int i, cpu; |
| 80 | |
| 81 | cpumask_clear(&mask); |
| 82 | if (cache_cpumask) |
| 83 | cpumask_or(&mask, &mask, cache_cpumask); |
| 84 | if (tlb_cpumask && tlb_length) { |
| 85 | cpumask_or(&mask, &mask, tlb_cpumask); |
| 86 | } |
| 87 | |
| 88 | for (i = 0; i < asidcount; ++i) |
| 89 | cpumask_set_cpu(asids[i].y * smp_width + asids[i].x, &mask); |
| 90 | |
| 91 | /* |
| 92 | * Don't bother to update atomically; losing a count |
| 93 | * here is not that critical. |
| 94 | */ |
| 95 | for_each_cpu(cpu, &mask) |
| 96 | ++per_cpu(irq_stat, cpu).irq_hv_flush_count; |
| 97 | } |
| 98 | |
| 99 | /* |
| 100 | * This wrapper function around hv_flush_remote() does several things: |
| 101 | * |
| 102 | * - Provides a return value error-checking panic path, since |
| 103 | * there's never any good reason for hv_flush_remote() to fail. |
| 104 | * - Accepts a 32-bit PFN rather than a 64-bit PA, which generally |
| 105 | * is the type that Linux wants to pass around anyway. |
| 106 | * - Centralizes the mark_caches_evicted() handling. |
| 107 | * - Canonicalizes that lengths of zero make cpumasks NULL. |
| 108 | * - Handles deferring TLB flushes for dataplane tiles. |
| 109 | * - Tracks remote interrupts in the per-cpu irq_cpustat_t. |
| 110 | * |
| 111 | * Note that we have to wait until the cache flush completes before |
| 112 | * updating the per-cpu last_cache_flush word, since otherwise another |
| 113 | * concurrent flush can race, conclude the flush has already |
| 114 | * completed, and start to use the page while it's still dirty |
| 115 | * remotely (running concurrently with the actual evict, presumably). |
| 116 | */ |
| 117 | void flush_remote(unsigned long cache_pfn, unsigned long cache_control, |
| 118 | const struct cpumask *cache_cpumask_orig, |
| 119 | HV_VirtAddr tlb_va, unsigned long tlb_length, |
| 120 | unsigned long tlb_pgsize, |
| 121 | const struct cpumask *tlb_cpumask_orig, |
| 122 | HV_Remote_ASID *asids, int asidcount) |
| 123 | { |
| 124 | int rc; |
| 125 | int timestamp = 0; /* happy compiler */ |
| 126 | struct cpumask cache_cpumask_copy, tlb_cpumask_copy; |
| 127 | struct cpumask *cache_cpumask, *tlb_cpumask; |
| 128 | HV_PhysAddr cache_pa; |
| 129 | char cache_buf[NR_CPUS*5], tlb_buf[NR_CPUS*5]; |
| 130 | |
| 131 | mb(); /* provided just to simplify "magic hypervisor" mode */ |
| 132 | |
| 133 | /* |
| 134 | * Canonicalize and copy the cpumasks. |
| 135 | */ |
| 136 | if (cache_cpumask_orig && cache_control) { |
| 137 | cpumask_copy(&cache_cpumask_copy, cache_cpumask_orig); |
| 138 | cache_cpumask = &cache_cpumask_copy; |
| 139 | } else { |
| 140 | cpumask_clear(&cache_cpumask_copy); |
| 141 | cache_cpumask = NULL; |
| 142 | } |
| 143 | if (cache_cpumask == NULL) |
| 144 | cache_control = 0; |
| 145 | if (tlb_cpumask_orig && tlb_length) { |
| 146 | cpumask_copy(&tlb_cpumask_copy, tlb_cpumask_orig); |
| 147 | tlb_cpumask = &tlb_cpumask_copy; |
| 148 | } else { |
| 149 | cpumask_clear(&tlb_cpumask_copy); |
| 150 | tlb_cpumask = NULL; |
| 151 | } |
| 152 | |
| 153 | hv_flush_update(cache_cpumask, tlb_cpumask, tlb_va, tlb_length, |
| 154 | asids, asidcount); |
| 155 | cache_pa = (HV_PhysAddr)cache_pfn << PAGE_SHIFT; |
| 156 | if (cache_control & HV_FLUSH_EVICT_L2) |
| 157 | timestamp = mark_caches_evicted_start(); |
| 158 | rc = hv_flush_remote(cache_pa, cache_control, |
| 159 | cpumask_bits(cache_cpumask), |
| 160 | tlb_va, tlb_length, tlb_pgsize, |
| 161 | cpumask_bits(tlb_cpumask), |
| 162 | asids, asidcount); |
| 163 | if (cache_control & HV_FLUSH_EVICT_L2) |
| 164 | mark_caches_evicted_finish(cache_cpumask, timestamp); |
| 165 | if (rc == 0) |
| 166 | return; |
| 167 | cpumask_scnprintf(cache_buf, sizeof(cache_buf), &cache_cpumask_copy); |
| 168 | cpumask_scnprintf(tlb_buf, sizeof(tlb_buf), &tlb_cpumask_copy); |
| 169 | |
Chris Metcalf | 0707ad3 | 2010-06-25 17:04:17 -0400 | [diff] [blame] | 170 | pr_err("hv_flush_remote(%#llx, %#lx, %p [%s]," |
Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 171 | " %#lx, %#lx, %#lx, %p [%s], %p, %d) = %d\n", |
| 172 | cache_pa, cache_control, cache_cpumask, cache_buf, |
| 173 | (unsigned long)tlb_va, tlb_length, tlb_pgsize, |
| 174 | tlb_cpumask, tlb_buf, |
| 175 | asids, asidcount, rc); |
Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 176 | panic("Unsafe to continue."); |
| 177 | } |
| 178 | |
| 179 | void homecache_evict(const struct cpumask *mask) |
| 180 | { |
| 181 | flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0); |
| 182 | } |
| 183 | |
| 184 | /* Return a mask of the cpus whose caches currently own these pages. */ |
| 185 | static void homecache_mask(struct page *page, int pages, |
| 186 | struct cpumask *home_mask) |
| 187 | { |
| 188 | int i; |
| 189 | cpumask_clear(home_mask); |
| 190 | for (i = 0; i < pages; ++i) { |
| 191 | int home = page_home(&page[i]); |
| 192 | if (home == PAGE_HOME_IMMUTABLE || |
| 193 | home == PAGE_HOME_INCOHERENT) { |
| 194 | cpumask_copy(home_mask, cpu_possible_mask); |
| 195 | return; |
| 196 | } |
| 197 | #if CHIP_HAS_CBOX_HOME_MAP() |
| 198 | if (home == PAGE_HOME_HASH) { |
| 199 | cpumask_or(home_mask, home_mask, &hash_for_home_map); |
| 200 | continue; |
| 201 | } |
| 202 | #endif |
| 203 | if (home == PAGE_HOME_UNCACHED) |
| 204 | continue; |
| 205 | BUG_ON(home < 0 || home >= NR_CPUS); |
| 206 | cpumask_set_cpu(home, home_mask); |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | /* |
| 211 | * Return the passed length, or zero if it's long enough that we |
| 212 | * believe we should evict the whole L2 cache. |
| 213 | */ |
| 214 | static unsigned long cache_flush_length(unsigned long length) |
| 215 | { |
| 216 | return (length >= CHIP_L2_CACHE_SIZE()) ? HV_FLUSH_EVICT_L2 : length; |
| 217 | } |
| 218 | |
| 219 | /* On the simulator, confirm lines have been evicted everywhere. */ |
| 220 | static void validate_lines_evicted(unsigned long pfn, size_t length) |
| 221 | { |
| 222 | sim_syscall(SIM_SYSCALL_VALIDATE_LINES_EVICTED, |
| 223 | (HV_PhysAddr)pfn << PAGE_SHIFT, length); |
| 224 | } |
| 225 | |
| 226 | /* Flush a page out of whatever cache(s) it is in. */ |
| 227 | void homecache_flush_cache(struct page *page, int order) |
| 228 | { |
| 229 | int pages = 1 << order; |
| 230 | int length = cache_flush_length(pages * PAGE_SIZE); |
| 231 | unsigned long pfn = page_to_pfn(page); |
| 232 | struct cpumask home_mask; |
| 233 | |
| 234 | homecache_mask(page, pages, &home_mask); |
| 235 | flush_remote(pfn, length, &home_mask, 0, 0, 0, NULL, NULL, 0); |
| 236 | validate_lines_evicted(pfn, pages * PAGE_SIZE); |
| 237 | } |
| 238 | |
| 239 | |
| 240 | /* Report the home corresponding to a given PTE. */ |
| 241 | static int pte_to_home(pte_t pte) |
| 242 | { |
| 243 | if (hv_pte_get_nc(pte)) |
| 244 | return PAGE_HOME_IMMUTABLE; |
| 245 | switch (hv_pte_get_mode(pte)) { |
| 246 | case HV_PTE_MODE_CACHE_TILE_L3: |
| 247 | return get_remote_cache_cpu(pte); |
| 248 | case HV_PTE_MODE_CACHE_NO_L3: |
| 249 | return PAGE_HOME_INCOHERENT; |
| 250 | case HV_PTE_MODE_UNCACHED: |
| 251 | return PAGE_HOME_UNCACHED; |
| 252 | #if CHIP_HAS_CBOX_HOME_MAP() |
| 253 | case HV_PTE_MODE_CACHE_HASH_L3: |
| 254 | return PAGE_HOME_HASH; |
| 255 | #endif |
| 256 | } |
| 257 | panic("Bad PTE %#llx\n", pte.val); |
| 258 | } |
| 259 | |
| 260 | /* Update the home of a PTE if necessary (can also be used for a pgprot_t). */ |
| 261 | pte_t pte_set_home(pte_t pte, int home) |
| 262 | { |
| 263 | /* Check for non-linear file mapping "PTEs" and pass them through. */ |
| 264 | if (pte_file(pte)) |
| 265 | return pte; |
| 266 | |
| 267 | #if CHIP_HAS_MMIO() |
| 268 | /* Check for MMIO mappings and pass them through. */ |
| 269 | if (hv_pte_get_mode(pte) == HV_PTE_MODE_MMIO) |
| 270 | return pte; |
| 271 | #endif |
| 272 | |
| 273 | |
| 274 | /* |
| 275 | * Only immutable pages get NC mappings. If we have a |
| 276 | * non-coherent PTE, but the underlying page is not |
| 277 | * immutable, it's likely the result of a forced |
| 278 | * caching setting running up against ptrace setting |
| 279 | * the page to be writable underneath. In this case, |
| 280 | * just keep the PTE coherent. |
| 281 | */ |
| 282 | if (hv_pte_get_nc(pte) && home != PAGE_HOME_IMMUTABLE) { |
| 283 | pte = hv_pte_clear_nc(pte); |
Chris Metcalf | 0707ad3 | 2010-06-25 17:04:17 -0400 | [diff] [blame] | 284 | pr_err("non-immutable page incoherently referenced: %#llx\n", |
Chris Metcalf | 867e359 | 2010-05-28 23:09:12 -0400 | [diff] [blame] | 285 | pte.val); |
| 286 | } |
| 287 | |
| 288 | switch (home) { |
| 289 | |
| 290 | case PAGE_HOME_UNCACHED: |
| 291 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED); |
| 292 | break; |
| 293 | |
| 294 | case PAGE_HOME_INCOHERENT: |
| 295 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); |
| 296 | break; |
| 297 | |
| 298 | case PAGE_HOME_IMMUTABLE: |
| 299 | /* |
| 300 | * We could home this page anywhere, since it's immutable, |
| 301 | * but by default just home it to follow "hash_default". |
| 302 | */ |
| 303 | BUG_ON(hv_pte_get_writable(pte)); |
| 304 | if (pte_get_forcecache(pte)) { |
| 305 | /* Upgrade "force any cpu" to "No L3" for immutable. */ |
| 306 | if (hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_TILE_L3 |
| 307 | && pte_get_anyhome(pte)) { |
| 308 | pte = hv_pte_set_mode(pte, |
| 309 | HV_PTE_MODE_CACHE_NO_L3); |
| 310 | } |
| 311 | } else |
| 312 | #if CHIP_HAS_CBOX_HOME_MAP() |
| 313 | if (hash_default) |
| 314 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); |
| 315 | else |
| 316 | #endif |
| 317 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); |
| 318 | pte = hv_pte_set_nc(pte); |
| 319 | break; |
| 320 | |
| 321 | #if CHIP_HAS_CBOX_HOME_MAP() |
| 322 | case PAGE_HOME_HASH: |
| 323 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); |
| 324 | break; |
| 325 | #endif |
| 326 | |
| 327 | default: |
| 328 | BUG_ON(home < 0 || home >= NR_CPUS || |
| 329 | !cpu_is_valid_lotar(home)); |
| 330 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_TILE_L3); |
| 331 | pte = set_remote_cache_cpu(pte, home); |
| 332 | break; |
| 333 | } |
| 334 | |
| 335 | #if CHIP_HAS_NC_AND_NOALLOC_BITS() |
| 336 | if (noallocl2) |
| 337 | pte = hv_pte_set_no_alloc_l2(pte); |
| 338 | |
| 339 | /* Simplify "no local and no l3" to "uncached" */ |
| 340 | if (hv_pte_get_no_alloc_l2(pte) && hv_pte_get_no_alloc_l1(pte) && |
| 341 | hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_NO_L3) { |
| 342 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED); |
| 343 | } |
| 344 | #endif |
| 345 | |
| 346 | /* Checking this case here gives a better panic than from the hv. */ |
| 347 | BUG_ON(hv_pte_get_mode(pte) == 0); |
| 348 | |
| 349 | return pte; |
| 350 | } |
| 351 | |
| 352 | /* |
| 353 | * The routines in this section are the "static" versions of the normal |
| 354 | * dynamic homecaching routines; they just set the home cache |
| 355 | * of a kernel page once, and require a full-chip cache/TLB flush, |
| 356 | * so they're not suitable for anything but infrequent use. |
| 357 | */ |
| 358 | |
| 359 | #if CHIP_HAS_CBOX_HOME_MAP() |
| 360 | static inline int initial_page_home(void) { return PAGE_HOME_HASH; } |
| 361 | #else |
| 362 | static inline int initial_page_home(void) { return 0; } |
| 363 | #endif |
| 364 | |
| 365 | int page_home(struct page *page) |
| 366 | { |
| 367 | if (PageHighMem(page)) { |
| 368 | return initial_page_home(); |
| 369 | } else { |
| 370 | unsigned long kva = (unsigned long)page_address(page); |
| 371 | return pte_to_home(*virt_to_pte(NULL, kva)); |
| 372 | } |
| 373 | } |
| 374 | |
| 375 | void homecache_change_page_home(struct page *page, int order, int home) |
| 376 | { |
| 377 | int i, pages = (1 << order); |
| 378 | unsigned long kva; |
| 379 | |
| 380 | BUG_ON(PageHighMem(page)); |
| 381 | BUG_ON(page_count(page) > 1); |
| 382 | BUG_ON(page_mapcount(page) != 0); |
| 383 | kva = (unsigned long) page_address(page); |
| 384 | flush_remote(0, HV_FLUSH_EVICT_L2, &cpu_cacheable_map, |
| 385 | kva, pages * PAGE_SIZE, PAGE_SIZE, cpu_online_mask, |
| 386 | NULL, 0); |
| 387 | |
| 388 | for (i = 0; i < pages; ++i, kva += PAGE_SIZE) { |
| 389 | pte_t *ptep = virt_to_pte(NULL, kva); |
| 390 | pte_t pteval = *ptep; |
| 391 | BUG_ON(!pte_present(pteval) || pte_huge(pteval)); |
| 392 | *ptep = pte_set_home(pteval, home); |
| 393 | } |
| 394 | } |
| 395 | |
| 396 | struct page *homecache_alloc_pages(gfp_t gfp_mask, |
| 397 | unsigned int order, int home) |
| 398 | { |
| 399 | struct page *page; |
| 400 | BUG_ON(gfp_mask & __GFP_HIGHMEM); /* must be lowmem */ |
| 401 | page = alloc_pages(gfp_mask, order); |
| 402 | if (page) |
| 403 | homecache_change_page_home(page, order, home); |
| 404 | return page; |
| 405 | } |
| 406 | |
| 407 | struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask, |
| 408 | unsigned int order, int home) |
| 409 | { |
| 410 | struct page *page; |
| 411 | BUG_ON(gfp_mask & __GFP_HIGHMEM); /* must be lowmem */ |
| 412 | page = alloc_pages_node(nid, gfp_mask, order); |
| 413 | if (page) |
| 414 | homecache_change_page_home(page, order, home); |
| 415 | return page; |
| 416 | } |
| 417 | |
| 418 | void homecache_free_pages(unsigned long addr, unsigned int order) |
| 419 | { |
| 420 | struct page *page; |
| 421 | |
| 422 | if (addr == 0) |
| 423 | return; |
| 424 | |
| 425 | VM_BUG_ON(!virt_addr_valid((void *)addr)); |
| 426 | page = virt_to_page((void *)addr); |
| 427 | if (put_page_testzero(page)) { |
| 428 | int pages = (1 << order); |
| 429 | homecache_change_page_home(page, order, initial_page_home()); |
| 430 | while (pages--) |
| 431 | __free_page(page++); |
| 432 | } |
| 433 | } |