blob: be4656403d77db5abf098417ad4603ae830dab18 [file] [log] [blame]
Ingo Molnar9f4c8152008-01-30 13:33:41 +01001/*
2 * Copyright 2002 Andi Kleen, SuSE Labs.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 * Thanks to Ben LaHaise for precious feedback.
Ingo Molnar9f4c8152008-01-30 13:33:41 +01004 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07005
Linus Torvalds1da177e2005-04-16 15:20:36 -07006#include <linux/highmem.h>
7#include <linux/module.h>
Ingo Molnar9f4c8152008-01-30 13:33:41 +01008#include <linux/sched.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -07009#include <linux/slab.h>
Ingo Molnar9f4c8152008-01-30 13:33:41 +010010#include <linux/mm.h>
11
Linus Torvalds1da177e2005-04-16 15:20:36 -070012#include <asm/processor.h>
13#include <asm/tlbflush.h>
Dave Jonesf8af0952006-01-06 00:12:10 -080014#include <asm/sections.h>
Ingo Molnar9f4c8152008-01-30 13:33:41 +010015#include <asm/uaccess.h>
16#include <asm/pgalloc.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017
18static DEFINE_SPINLOCK(cpa_lock);
19static struct list_head df_list = LIST_HEAD_INIT(df_list);
20
Ingo Molnar9f4c8152008-01-30 13:33:41 +010021pte_t *lookup_address(unsigned long address)
22{
Linus Torvalds1da177e2005-04-16 15:20:36 -070023 pgd_t *pgd = pgd_offset_k(address);
24 pud_t *pud;
25 pmd_t *pmd;
Ingo Molnar9f4c8152008-01-30 13:33:41 +010026
Linus Torvalds1da177e2005-04-16 15:20:36 -070027 if (pgd_none(*pgd))
28 return NULL;
29 pud = pud_offset(pgd, address);
30 if (pud_none(*pud))
31 return NULL;
32 pmd = pmd_offset(pud, address);
33 if (pmd_none(*pmd))
34 return NULL;
35 if (pmd_large(*pmd))
36 return (pte_t *)pmd;
Linus Torvalds1da177e2005-04-16 15:20:36 -070037
Ingo Molnar9f4c8152008-01-30 13:33:41 +010038 return pte_offset_kernel(pmd, address);
39}
40
41static struct page *
42split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
43{
Linus Torvalds1da177e2005-04-16 15:20:36 -070044 unsigned long addr;
45 struct page *base;
46 pte_t *pbase;
Ingo Molnar9f4c8152008-01-30 13:33:41 +010047 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -070048
49 spin_unlock_irq(&cpa_lock);
50 base = alloc_pages(GFP_KERNEL, 0);
51 spin_lock_irq(&cpa_lock);
Ingo Molnar9f4c8152008-01-30 13:33:41 +010052 if (!base)
Linus Torvalds1da177e2005-04-16 15:20:36 -070053 return NULL;
54
Nick Piggin84d1c052006-03-22 00:08:31 -080055 /*
56 * page_private is used to track the number of entries in
57 * the page table page that have non standard attributes.
58 */
59 SetPagePrivate(base);
60 page_private(base) = 0;
61
Linus Torvalds1da177e2005-04-16 15:20:36 -070062 address = __pa(address);
Ingo Molnar9f4c8152008-01-30 13:33:41 +010063 addr = address & LARGE_PAGE_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -070064 pbase = (pte_t *)page_address(base);
Jeremy Fitzhardingefdb4c332007-07-17 18:37:03 -070065 paravirt_alloc_pt(&init_mm, page_to_pfn(base));
Ingo Molnar9f4c8152008-01-30 13:33:41 +010066
Linus Torvalds1da177e2005-04-16 15:20:36 -070067 for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
Ingo Molnar9f4c8152008-01-30 13:33:41 +010068 set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
69 addr == address ? prot : ref_prot));
Linus Torvalds1da177e2005-04-16 15:20:36 -070070 }
71 return base;
Ingo Molnar9f4c8152008-01-30 13:33:41 +010072}
Linus Torvalds1da177e2005-04-16 15:20:36 -070073
Andi Kleen018d2ad2007-06-20 12:23:36 +020074static void cache_flush_page(struct page *p)
Ingo Molnar9f4c8152008-01-30 13:33:41 +010075{
76 void *addr = page_address(p);
Andi Kleen018d2ad2007-06-20 12:23:36 +020077 int i;
Ingo Molnar9f4c8152008-01-30 13:33:41 +010078
Andi Kleen018d2ad2007-06-20 12:23:36 +020079 for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
Ingo Molnar9f4c8152008-01-30 13:33:41 +010080 clflush(addr + i);
Andi Kleen018d2ad2007-06-20 12:23:36 +020081}
Andi Kleen3760dd62006-12-07 02:14:05 +010082
Andi Kleen018d2ad2007-06-20 12:23:36 +020083static void flush_kernel_map(void *arg)
84{
85 struct list_head *lh = (struct list_head *)arg;
86 struct page *p;
87
88 /* High level code is not ready for clflush yet */
Andi Kleend3f3c932007-08-10 22:31:02 +020089 if (0 && cpu_has_clflush) {
Ingo Molnar9f4c8152008-01-30 13:33:41 +010090 list_for_each_entry(p, lh, lru)
Andi Kleen018d2ad2007-06-20 12:23:36 +020091 cache_flush_page(p);
Ingo Molnar9f4c8152008-01-30 13:33:41 +010092 } else {
93 if (boot_cpu_data.x86_model >= 4)
94 wbinvd();
95 }
Andi Kleen3760dd62006-12-07 02:14:05 +010096
Ingo Molnar9f4c8152008-01-30 13:33:41 +010097 /*
98 * Flush all to work around Errata in early athlons regarding
99 * large page flushing.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100 */
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100101 __flush_tlb_all();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102}
103
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100104static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
105{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106 unsigned long flags;
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100107 struct page *page;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100109 /* change init_mm */
110 set_pte_atomic(kpte, pte);
Jeremy Fitzhardinge5311ab62007-05-02 19:27:13 +0200111 if (SHARED_KERNEL_PMD)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112 return;
113
114 spin_lock_irqsave(&pgd_lock, flags);
115 for (page = pgd_list; page; page = (struct page *)page->index) {
116 pgd_t *pgd;
117 pud_t *pud;
118 pmd_t *pmd;
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100119
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120 pgd = (pgd_t *)page_address(page) + pgd_index(address);
121 pud = pud_offset(pgd, address);
122 pmd = pmd_offset(pud, address);
123 set_pte_atomic((pte_t *)pmd, pte);
124 }
125 spin_unlock_irqrestore(&pgd_lock, flags);
126}
127
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100128/*
129 * No more special protections in this 2/4MB area - revert to a large
130 * page again.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131 */
132static inline void revert_page(struct page *kpte_page, unsigned long address)
133{
Dave Jonesf8af0952006-01-06 00:12:10 -0800134 pgprot_t ref_prot;
135 pte_t *linear;
136
137 ref_prot =
138 ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
139 ? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE;
140
141 linear = (pte_t *)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142 pmd_offset(pud_offset(pgd_offset_k(address), address), address);
143 set_pmd_pte(linear, address,
144 pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT,
Dave Jonesf8af0952006-01-06 00:12:10 -0800145 ref_prot));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146}
147
Andi Kleen65d2f0b2007-07-21 17:09:51 +0200148static inline void save_page(struct page *kpte_page)
149{
150 if (!test_and_set_bit(PG_arch_1, &kpte_page->flags))
151 list_add(&kpte_page->lru, &df_list);
152}
153
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100154static int __change_page_attr(struct page *page, pgprot_t prot)
155{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 struct page *kpte_page;
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100157 unsigned long address;
158 pte_t *kpte;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159
160 BUG_ON(PageHighMem(page));
161 address = (unsigned long)page_address(page);
162
163 kpte = lookup_address(address);
164 if (!kpte)
165 return -EINVAL;
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100166
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 kpte_page = virt_to_page(kpte);
Andi Kleen65d2f0b2007-07-21 17:09:51 +0200168 BUG_ON(PageLRU(kpte_page));
169 BUG_ON(PageCompound(kpte_page));
170
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100171 if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
Jan Beulichd01ad8d2007-05-02 19:27:10 +0200172 if (!pte_huge(*kpte)) {
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100173 set_pte_atomic(kpte, mk_pte(page, prot));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174 } else {
Dave Jonesf8af0952006-01-06 00:12:10 -0800175 struct page *split;
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100176 pgprot_t ref_prot;
Dave Jonesf8af0952006-01-06 00:12:10 -0800177
178 ref_prot =
179 ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
180 ? PAGE_KERNEL_EXEC : PAGE_KERNEL;
181 split = split_large_page(address, prot, ref_prot);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 if (!split)
183 return -ENOMEM;
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100184
185 set_pmd_pte(kpte, address, mk_pte(split, ref_prot));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186 kpte_page = split;
Nick Piggin84d1c052006-03-22 00:08:31 -0800187 }
188 page_private(kpte_page)++;
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100189 } else {
190 if (!pte_huge(*kpte)) {
191 set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
192 BUG_ON(page_private(kpte_page) == 0);
193 page_private(kpte_page)--;
194 } else
195 BUG();
196 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197
198 /*
199 * If the pte was reserved, it means it was created at boot
200 * time (not via split_large_page) and in turn we must not
201 * replace it with a largepage.
202 */
Andi Kleen65d2f0b2007-07-21 17:09:51 +0200203
204 save_page(kpte_page);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205 if (!PageReserved(kpte_page)) {
Nick Piggin84d1c052006-03-22 00:08:31 -0800206 if (cpu_has_pse && (page_private(kpte_page) == 0)) {
Zachary Amsdenc119ecc2007-02-13 13:26:21 +0100207 paravirt_release_pt(page_to_pfn(kpte_page));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 revert_page(kpte_page, address);
209 }
210 }
211 return 0;
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100212}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213
Andi Kleen018d2ad2007-06-20 12:23:36 +0200214static inline void flush_map(struct list_head *l)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215{
Andi Kleen018d2ad2007-06-20 12:23:36 +0200216 on_each_cpu(flush_kernel_map, l, 1, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217}
218
219/*
220 * Change the page attributes of an page in the linear mapping.
221 *
222 * This should be used when a page is mapped with a different caching policy
223 * than write-back somewhere - some CPUs do not like it when mappings with
224 * different caching policies exist. This changes the page attributes of the
225 * in kernel linear mapping too.
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100226 *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 * The caller needs to ensure that there are no conflicting mappings elsewhere.
228 * This function only deals with the kernel linear map.
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100229 *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230 * Caller must call global_flush_tlb() after this.
231 */
232int change_page_attr(struct page *page, int numpages, pgprot_t prot)
233{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 unsigned long flags;
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100235 int err = 0, i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236
237 spin_lock_irqsave(&cpa_lock, flags);
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100238 for (i = 0; i < numpages; i++, page++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239 err = __change_page_attr(page, prot);
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100240 if (err)
241 break;
242 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 spin_unlock_irqrestore(&cpa_lock, flags);
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100244
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 return err;
246}
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100247EXPORT_SYMBOL(change_page_attr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248
249void global_flush_tlb(void)
Oleg Nesterov626ab0e2006-06-23 02:05:55 -0700250{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251 struct page *pg, *next;
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100252 struct list_head l;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253
254 BUG_ON(irqs_disabled());
255
256 spin_lock_irq(&cpa_lock);
Oleg Nesterov626ab0e2006-06-23 02:05:55 -0700257 list_replace_init(&df_list, &l);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 spin_unlock_irq(&cpa_lock);
Andi Kleen018d2ad2007-06-20 12:23:36 +0200259 flush_map(&l);
Andi Kleen3760dd62006-12-07 02:14:05 +0100260 list_for_each_entry_safe(pg, next, &l, lru) {
Andi Kleen65d2f0b2007-07-21 17:09:51 +0200261 list_del(&pg->lru);
262 clear_bit(PG_arch_1, &pg->flags);
263 if (PageReserved(pg) || !cpu_has_pse || page_private(pg) != 0)
264 continue;
265 ClearPagePrivate(pg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 __free_page(pg);
Andi Kleen3760dd62006-12-07 02:14:05 +0100267 }
Oleg Nesterov626ab0e2006-06-23 02:05:55 -0700268}
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100269EXPORT_SYMBOL(global_flush_tlb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270
271#ifdef CONFIG_DEBUG_PAGEALLOC
272void kernel_map_pages(struct page *page, int numpages, int enable)
273{
274 if (PageHighMem(page))
275 return;
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100276 if (!enable) {
Ingo Molnarf9b84042006-06-27 02:54:49 -0700277 debug_check_no_locks_freed(page_address(page),
278 numpages * PAGE_SIZE);
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100279 }
Ingo Molnarde5097c2006-01-09 15:59:21 -0800280
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100281 /*
282 * the return value is ignored - the calls cannot fail,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 * large pages are disabled at boot time.
284 */
285 change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100286
287 /*
288 * we should perform an IPI and flush all tlbs,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 * but that can deadlock->flush only current cpu.
290 */
291 __flush_tlb_all();
292}
293#endif