blob: 628e507b7936787c4d9e1b689ba67f86cf1b7875 [file] [log] [blame]
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -07001/*
2 * Handle caching attributes in page tables (PAT)
3 *
4 * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
5 * Suresh B Siddha <suresh.b.siddha@intel.com>
6 *
7 * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
8 */
9
Ingo Molnarad2cde12008-09-30 13:20:45 +020010#include <linux/seq_file.h>
venkatesh.pallipadi@intel.come7f260a2008-03-18 17:00:21 -070011#include <linux/bootmem.h>
venkatesh.pallipadi@intel.comfec09622008-07-18 16:08:14 -070012#include <linux/debugfs.h>
Ingo Molnarad2cde12008-09-30 13:20:45 +020013#include <linux/kernel.h>
Ingo Molnar92b9af92009-02-28 14:09:27 +010014#include <linux/module.h>
Ingo Molnarad2cde12008-09-30 13:20:45 +020015#include <linux/gfp.h>
16#include <linux/mm.h>
17#include <linux/fs.h>
Venkatesh Pallipadi335ef892009-07-10 09:57:36 -070018#include <linux/rbtree.h>
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -070019
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -070020#include <asm/cacheflush.h>
Ingo Molnarad2cde12008-09-30 13:20:45 +020021#include <asm/processor.h>
22#include <asm/tlbflush.h>
Jack Steinerfd12a0d2009-11-19 14:23:41 -060023#include <asm/x86_init.h>
Ingo Molnarad2cde12008-09-30 13:20:45 +020024#include <asm/pgtable.h>
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -070025#include <asm/fcntl.h>
Ingo Molnarad2cde12008-09-30 13:20:45 +020026#include <asm/e820.h>
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -070027#include <asm/mtrr.h>
Ingo Molnarad2cde12008-09-30 13:20:45 +020028#include <asm/page.h>
29#include <asm/msr.h>
30#include <asm/pat.h>
venkatesh.pallipadi@intel.come7f260a2008-03-18 17:00:21 -070031#include <asm/io.h>
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -070032
venkatesh.pallipadi@intel.combe5a0c12010-02-10 11:57:06 -080033#include "pat_internal.h"
34
Thomas Gleixner8d4a4302008-05-08 09:18:43 +020035#ifdef CONFIG_X86_PAT
Andreas Herrmann499f8f82008-06-10 16:06:21 +020036int __read_mostly pat_enabled = 1;
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -070037
Marcin Slusarz1ee4bd92009-04-10 22:47:17 +020038static inline void pat_disable(const char *reason)
Thomas Gleixner8d4a4302008-05-08 09:18:43 +020039{
Andreas Herrmann499f8f82008-06-10 16:06:21 +020040 pat_enabled = 0;
Thomas Gleixner8d4a4302008-05-08 09:18:43 +020041 printk(KERN_INFO "%s\n", reason);
42}
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -070043
Andrew Mortonbe524fb2008-05-29 00:01:28 -070044static int __init nopat(char *str)
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -070045{
Thomas Gleixner8d4a4302008-05-08 09:18:43 +020046 pat_disable("PAT support disabled.");
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -070047 return 0;
48}
49early_param("nopat", nopat);
H. Peter Anvin75a04812009-01-22 16:17:05 -080050#else
51static inline void pat_disable(const char *reason)
52{
53 (void)reason;
54}
Thomas Gleixner8d4a4302008-05-08 09:18:43 +020055#endif
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -070056
Venki Pallipadi77b52b42008-05-05 19:09:10 -070057
venkatesh.pallipadi@intel.combe5a0c12010-02-10 11:57:06 -080058int pat_debug_enable;
Ingo Molnarad2cde12008-09-30 13:20:45 +020059
Venki Pallipadi77b52b42008-05-05 19:09:10 -070060static int __init pat_debug_setup(char *str)
61{
venkatesh.pallipadi@intel.combe5a0c12010-02-10 11:57:06 -080062 pat_debug_enable = 1;
Venki Pallipadi77b52b42008-05-05 19:09:10 -070063 return 0;
64}
65__setup("debugpat", pat_debug_setup);
66
Thomas Gleixner8d4a4302008-05-08 09:18:43 +020067static u64 __read_mostly boot_pat_state;
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -070068
69enum {
70 PAT_UC = 0, /* uncached */
71 PAT_WC = 1, /* Write combining */
72 PAT_WT = 4, /* Write Through */
73 PAT_WP = 5, /* Write Protected */
74 PAT_WB = 6, /* Write Back (default) */
75 PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */
76};
77
Andreas Herrmanncd7a4e92008-06-10 16:05:39 +020078#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8))
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -070079
80void pat_init(void)
81{
82 u64 pat;
Roland Dreiere23a8b62009-09-23 15:35:35 -070083 bool boot_cpu = !boot_pat_state;
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -070084
Andreas Herrmann499f8f82008-06-10 16:06:21 +020085 if (!pat_enabled)
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -070086 return;
87
H. Peter Anvin75a04812009-01-22 16:17:05 -080088 if (!cpu_has_pat) {
89 if (!boot_pat_state) {
90 pat_disable("PAT not supported by CPU.");
91 return;
92 } else {
93 /*
94 * If this happens we are on a secondary CPU, but
95 * switched to PAT on the boot CPU. We have no way to
96 * undo PAT.
97 */
98 printk(KERN_ERR "PAT enabled, "
99 "but not supported by secondary CPU\n");
100 BUG();
101 }
Thomas Gleixner8d4a4302008-05-08 09:18:43 +0200102 }
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700103
104 /* Set PWT to Write-Combining. All other bits stay the same */
105 /*
106 * PTE encoding used in Linux:
107 * PAT
108 * |PCD
109 * ||PWT
110 * |||
111 * 000 WB _PAGE_CACHE_WB
112 * 001 WC _PAGE_CACHE_WC
113 * 010 UC- _PAGE_CACHE_UC_MINUS
114 * 011 UC _PAGE_CACHE_UC
115 * PAT bit unused
116 */
Andreas Herrmanncd7a4e92008-06-10 16:05:39 +0200117 pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
118 PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700119
120 /* Boot CPU check */
Thomas Gleixner8d4a4302008-05-08 09:18:43 +0200121 if (!boot_pat_state)
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700122 rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700123
124 wrmsrl(MSR_IA32_CR_PAT, pat);
Roland Dreiere23a8b62009-09-23 15:35:35 -0700125
126 if (boot_cpu)
127 printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
128 smp_processor_id(), boot_pat_state, pat);
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700129}
130
131#undef PAT
132
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700133/*
134 * The global memtype list keeps track of memory type for specific
135 * physical memory areas. Conflicting memory types in different
136 * mappings can cause CPU cache corruption. To avoid this we keep track.
137 *
138 * The list is sorted based on starting address and can contain multiple
139 * entries for each address (this allows reference counting for overlapping
140 * areas). All the aliases have the same cache attributes of course.
141 * Zero attributes are represented as holes.
142 *
Venkatesh Pallipadi335ef892009-07-10 09:57:36 -0700143 * The data structure is a list that is also organized as an rbtree
144 * sorted on the start address of memtype range.
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700145 *
Venkatesh Pallipadi335ef892009-07-10 09:57:36 -0700146 * memtype_lock protects both the linear list and rbtree.
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700147 */
148
Venkatesh Pallipadi335ef892009-07-10 09:57:36 -0700149static struct rb_root memtype_rbroot = RB_ROOT;
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700150static LIST_HEAD(memtype_list);
Ingo Molnarad2cde12008-09-30 13:20:45 +0200151static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700152
Venkatesh Pallipadi335ef892009-07-10 09:57:36 -0700153static struct memtype *memtype_rb_search(struct rb_root *root, u64 start)
154{
155 struct rb_node *node = root->rb_node;
156 struct memtype *last_lower = NULL;
157
158 while (node) {
159 struct memtype *data = container_of(node, struct memtype, rb);
160
161 if (data->start < start) {
162 last_lower = data;
163 node = node->rb_right;
164 } else if (data->start > start) {
165 node = node->rb_left;
166 } else
167 return data;
168 }
169
170 /* Will return NULL if there is no entry with its start <= start */
171 return last_lower;
172}
173
174static void memtype_rb_insert(struct rb_root *root, struct memtype *data)
175{
176 struct rb_node **new = &(root->rb_node);
177 struct rb_node *parent = NULL;
178
179 while (*new) {
180 struct memtype *this = container_of(*new, struct memtype, rb);
181
182 parent = *new;
183 if (data->start <= this->start)
184 new = &((*new)->rb_left);
185 else if (data->start > this->start)
186 new = &((*new)->rb_right);
187 }
188
189 rb_link_node(&data->rb, parent, new);
190 rb_insert_color(&data->rb, root);
191}
192
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700193/*
194 * Does intersection of PAT memory type and MTRR memory type and returns
195 * the resulting memory type as PAT understands it.
196 * (Type in pat and mtrr will not have same value)
197 * The intersection is based on "Effective Memory Type" tables in IA-32
198 * SDM vol 3a
199 */
Hugh Dickins6cf514f2008-06-16 18:42:43 +0100200static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type)
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700201{
Venki Pallipadic26421d2008-05-29 12:01:44 -0700202 /*
203 * Look for MTRR hint to get the effective type in case where PAT
204 * request is for WB.
205 */
Andreas Herrmanndd0c7c42008-06-18 15:38:57 +0200206 if (req_type == _PAGE_CACHE_WB) {
207 u8 mtrr_type;
208
209 mtrr_type = mtrr_type_lookup(start, end);
Suresh Siddhab6ff32d2009-04-09 14:26:51 -0700210 if (mtrr_type != MTRR_TYPE_WRBACK)
211 return _PAGE_CACHE_UC_MINUS;
212
213 return _PAGE_CACHE_WB;
Andreas Herrmanndd0c7c42008-06-18 15:38:57 +0200214 }
215
216 return req_type;
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700217}
218
Ingo Molnarad2cde12008-09-30 13:20:45 +0200219static int
220chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
Andreas Herrmann64fe44c2008-06-20 22:07:09 +0200221{
222 if (new->type != entry->type) {
223 if (type) {
224 new->type = entry->type;
225 *type = entry->type;
226 } else
227 goto conflict;
228 }
229
230 /* check overlaps with more than one entry in the list */
231 list_for_each_entry_continue(entry, &memtype_list, nd) {
232 if (new->end <= entry->start)
233 break;
234 else if (new->type != entry->type)
235 goto conflict;
236 }
237 return 0;
238
239 conflict:
240 printk(KERN_INFO "%s:%d conflicting memory types "
241 "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start,
242 new->end, cattr_name(new->type), cattr_name(entry->type));
243 return -EBUSY;
244}
245
Suresh Siddhabe03d9e2009-02-11 11:20:23 -0800246static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
247{
248 int ram_page = 0, not_rampage = 0;
249 unsigned long page_nr;
250
251 for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
252 ++page_nr) {
253 /*
254 * For legacy reasons, physical address range in the legacy ISA
255 * region is tracked as non-RAM. This will allow users of
256 * /dev/mem to map portions of legacy ISA region, even when
257 * some of those portions are listed(or not even listed) with
258 * different e820 types(RAM/reserved/..)
259 */
260 if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) &&
261 page_is_ram(page_nr))
262 ram_page = 1;
263 else
264 not_rampage = 1;
265
266 if (ram_page == not_rampage)
267 return -1;
268 }
269
270 return ram_page;
271}
272
venkatesh.pallipadi@intel.come7f260a2008-03-18 17:00:21 -0700273/*
Venkatesh Pallipadif5841742009-07-10 09:57:38 -0700274 * For RAM pages, we use page flags to mark the pages with appropriate type.
275 * Here we do two pass:
276 * - Find the memtype of all the pages in the range, look for any conflicts
277 * - In case of no conflicts, set the new memtype for pages in the range
Suresh Siddha9542ada2008-09-24 08:53:33 -0700278 *
Venkatesh Pallipadif5841742009-07-10 09:57:38 -0700279 * Caller must hold memtype_lock for atomicity.
Suresh Siddha9542ada2008-09-24 08:53:33 -0700280 */
281static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
Ingo Molnarad2cde12008-09-30 13:20:45 +0200282 unsigned long *new_type)
Suresh Siddha9542ada2008-09-24 08:53:33 -0700283{
284 struct page *page;
Venkatesh Pallipadif5841742009-07-10 09:57:38 -0700285 u64 pfn;
286
287 if (req_type == _PAGE_CACHE_UC) {
288 /* We do not support strong UC */
289 WARN_ON_ONCE(1);
290 req_type = _PAGE_CACHE_UC_MINUS;
291 }
292
293 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
294 unsigned long type;
295
296 page = pfn_to_page(pfn);
297 type = get_page_memtype(page);
298 if (type != -1) {
299 printk(KERN_INFO "reserve_ram_pages_type failed "
300 "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n",
301 start, end, type, req_type);
302 if (new_type)
303 *new_type = type;
304
305 return -EBUSY;
306 }
307 }
308
309 if (new_type)
310 *new_type = req_type;
Suresh Siddha9542ada2008-09-24 08:53:33 -0700311
312 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
313 page = pfn_to_page(pfn);
Venkatesh Pallipadif5841742009-07-10 09:57:38 -0700314 set_page_memtype(page, req_type);
Suresh Siddha9542ada2008-09-24 08:53:33 -0700315 }
316 return 0;
Suresh Siddha9542ada2008-09-24 08:53:33 -0700317}
318
319static int free_ram_pages_type(u64 start, u64 end)
320{
321 struct page *page;
Venkatesh Pallipadif5841742009-07-10 09:57:38 -0700322 u64 pfn;
Suresh Siddha9542ada2008-09-24 08:53:33 -0700323
324 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
325 page = pfn_to_page(pfn);
Venkatesh Pallipadif5841742009-07-10 09:57:38 -0700326 set_page_memtype(page, -1);
Suresh Siddha9542ada2008-09-24 08:53:33 -0700327 }
328 return 0;
Suresh Siddha9542ada2008-09-24 08:53:33 -0700329}
330
venkatesh.pallipadi@intel.combe5a0c12010-02-10 11:57:06 -0800331static int memtype_check_insert(struct memtype *new, unsigned long *new_type)
332{
333 struct memtype *entry;
334 u64 start, end;
335 unsigned long actual_type;
336 struct list_head *where;
337 int err = 0;
338
339 start = new->start;
340 end = new->end;
341 actual_type = new->type;
342
343 /* Search for existing mapping that overlaps the current range */
344 where = NULL;
345 list_for_each_entry(entry, &memtype_list, nd) {
346 if (end <= entry->start) {
347 where = entry->nd.prev;
348 break;
349 } else if (start <= entry->start) { /* end > entry->start */
350 err = chk_conflict(new, entry, new_type);
351 if (!err) {
352 dprintk("Overlap at 0x%Lx-0x%Lx\n",
353 entry->start, entry->end);
354 where = entry->nd.prev;
355 }
356 break;
357 } else if (start < entry->end) { /* start > entry->start */
358 err = chk_conflict(new, entry, new_type);
359 if (!err) {
360 dprintk("Overlap at 0x%Lx-0x%Lx\n",
361 entry->start, entry->end);
362
363 /*
364 * Move to right position in the linked
365 * list to add this new entry
366 */
367 list_for_each_entry_continue(entry,
368 &memtype_list, nd) {
369 if (start <= entry->start) {
370 where = entry->nd.prev;
371 break;
372 }
373 }
374 }
375 break;
376 }
377 }
378 if (!err) {
379 if (where)
380 list_add(&new->nd, where);
381 else
382 list_add_tail(&new->nd, &memtype_list);
383
384 memtype_rb_insert(&memtype_rbroot, new);
385 }
386 return err;
387}
388
Suresh Siddha9542ada2008-09-24 08:53:33 -0700389/*
venkatesh.pallipadi@intel.come7f260a2008-03-18 17:00:21 -0700390 * req_type typically has one of the:
391 * - _PAGE_CACHE_WB
392 * - _PAGE_CACHE_WC
393 * - _PAGE_CACHE_UC_MINUS
394 * - _PAGE_CACHE_UC
395 *
Andreas Herrmannac979912008-06-20 22:01:49 +0200396 * If new_type is NULL, function will return an error if it cannot reserve the
397 * region with req_type. If new_type is non-NULL, function will return
398 * available type in new_type in case of no error. In case of any error
venkatesh.pallipadi@intel.come7f260a2008-03-18 17:00:21 -0700399 * it will return a negative return value.
400 */
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700401int reserve_memtype(u64 start, u64 end, unsigned long req_type,
Ingo Molnarad2cde12008-09-30 13:20:45 +0200402 unsigned long *new_type)
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700403{
venkatesh.pallipadi@intel.combe5a0c12010-02-10 11:57:06 -0800404 struct memtype *new;
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700405 unsigned long actual_type;
Suresh Siddha9542ada2008-09-24 08:53:33 -0700406 int is_range_ram;
Ingo Molnarad2cde12008-09-30 13:20:45 +0200407 int err = 0;
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700408
Ingo Molnarad2cde12008-09-30 13:20:45 +0200409 BUG_ON(start >= end); /* end is exclusive */
Andreas Herrmann69e26be2008-06-20 22:03:06 +0200410
Andreas Herrmann499f8f82008-06-10 16:06:21 +0200411 if (!pat_enabled) {
venkatesh.pallipadi@intel.come7f260a2008-03-18 17:00:21 -0700412 /* This is identical to page table setting without PAT */
Andreas Herrmannac979912008-06-20 22:01:49 +0200413 if (new_type) {
Xiaotian Feng83ea05e2009-11-10 17:23:07 +0800414 if (req_type == _PAGE_CACHE_WC)
Venkatesh Pallipadi5fc51742009-07-10 09:57:32 -0700415 *new_type = _PAGE_CACHE_UC_MINUS;
Andreas Herrmannac979912008-06-20 22:01:49 +0200416 else
417 *new_type = req_type & _PAGE_CACHE_MASK;
venkatesh.pallipadi@intel.come7f260a2008-03-18 17:00:21 -0700418 }
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700419 return 0;
420 }
421
422 /* Low ISA region is always mapped WB in page table. No need to track */
H. Peter Anvin8a271382009-11-23 14:49:20 -0800423 if (x86_platform.is_untracked_pat_range(start, end)) {
Andreas Herrmannac979912008-06-20 22:01:49 +0200424 if (new_type)
425 *new_type = _PAGE_CACHE_WB;
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700426 return 0;
427 }
428
Suresh Siddhab6ff32d2009-04-09 14:26:51 -0700429 /*
430 * Call mtrr_lookup to get the type hint. This is an
431 * optimization for /dev/mem mmap'ers into WB memory (BIOS
432 * tools and ACPI tools). Use WB request for WB memory and use
433 * UC_MINUS otherwise.
434 */
435 actual_type = pat_x_mtrr_type(start, end, req_type & _PAGE_CACHE_MASK);
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700436
Suresh Siddha95971342009-01-13 10:21:30 -0800437 if (new_type)
438 *new_type = actual_type;
439
Suresh Siddhabe03d9e2009-02-11 11:20:23 -0800440 is_range_ram = pat_pagerange_is_ram(start, end);
Venkatesh Pallipadif5841742009-07-10 09:57:38 -0700441 if (is_range_ram == 1) {
442
443 spin_lock(&memtype_lock);
444 err = reserve_ram_pages_type(start, end, req_type, new_type);
445 spin_unlock(&memtype_lock);
446
447 return err;
448 } else if (is_range_ram < 0) {
Suresh Siddha9542ada2008-09-24 08:53:33 -0700449 return -EINVAL;
Venkatesh Pallipadif5841742009-07-10 09:57:38 -0700450 }
Suresh Siddha9542ada2008-09-24 08:53:33 -0700451
Andreas Herrmannac979912008-06-20 22:01:49 +0200452 new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
453 if (!new)
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700454 return -ENOMEM;
455
Ingo Molnarad2cde12008-09-30 13:20:45 +0200456 new->start = start;
457 new->end = end;
458 new->type = actual_type;
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700459
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700460 spin_lock(&memtype_lock);
461
venkatesh.pallipadi@intel.combe5a0c12010-02-10 11:57:06 -0800462 err = memtype_check_insert(new, new_type);
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700463 if (err) {
Andreas Herrmann3e9c83b2008-06-20 22:04:02 +0200464 printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, "
465 "track %s, req %s\n",
466 start, end, cattr_name(new->type), cattr_name(req_type));
Andreas Herrmannac979912008-06-20 22:01:49 +0200467 kfree(new);
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700468 spin_unlock(&memtype_lock);
Ingo Molnarad2cde12008-09-30 13:20:45 +0200469
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700470 return err;
471 }
472
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700473 spin_unlock(&memtype_lock);
Andreas Herrmann3e9c83b2008-06-20 22:04:02 +0200474
475 dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
476 start, end, cattr_name(new->type), cattr_name(req_type),
477 new_type ? cattr_name(*new_type) : "-");
478
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700479 return err;
480}
481
482int free_memtype(u64 start, u64 end)
483{
Venkatesh Pallipadi335ef892009-07-10 09:57:36 -0700484 struct memtype *entry, *saved_entry;
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700485 int err = -EINVAL;
Suresh Siddha9542ada2008-09-24 08:53:33 -0700486 int is_range_ram;
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700487
Andreas Herrmann69e26be2008-06-20 22:03:06 +0200488 if (!pat_enabled)
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700489 return 0;
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700490
491 /* Low ISA region is always mapped WB. No need to track */
H. Peter Anvin8a271382009-11-23 14:49:20 -0800492 if (x86_platform.is_untracked_pat_range(start, end))
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700493 return 0;
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700494
Suresh Siddhabe03d9e2009-02-11 11:20:23 -0800495 is_range_ram = pat_pagerange_is_ram(start, end);
Venkatesh Pallipadif5841742009-07-10 09:57:38 -0700496 if (is_range_ram == 1) {
497
498 spin_lock(&memtype_lock);
499 err = free_ram_pages_type(start, end);
500 spin_unlock(&memtype_lock);
501
502 return err;
503 } else if (is_range_ram < 0) {
Suresh Siddha9542ada2008-09-24 08:53:33 -0700504 return -EINVAL;
Venkatesh Pallipadif5841742009-07-10 09:57:38 -0700505 }
Suresh Siddha9542ada2008-09-24 08:53:33 -0700506
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700507 spin_lock(&memtype_lock);
Venkatesh Pallipadi335ef892009-07-10 09:57:36 -0700508
509 entry = memtype_rb_search(&memtype_rbroot, start);
510 if (unlikely(entry == NULL))
511 goto unlock_ret;
512
513 /*
514 * Saved entry points to an entry with start same or less than what
515 * we searched for. Now go through the list in both directions to look
516 * for the entry that matches with both start and end, with list stored
517 * in sorted start address
518 */
519 saved_entry = entry;
Suresh Siddhadcb73bf2009-09-16 14:28:03 -0700520 list_for_each_entry_from(entry, &memtype_list, nd) {
Andreas Herrmannac979912008-06-20 22:01:49 +0200521 if (entry->start == start && entry->end == end) {
Venkatesh Pallipadi335ef892009-07-10 09:57:36 -0700522 rb_erase(&entry->rb, &memtype_rbroot);
Andreas Herrmannac979912008-06-20 22:01:49 +0200523 list_del(&entry->nd);
524 kfree(entry);
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700525 err = 0;
526 break;
Venkatesh Pallipadi335ef892009-07-10 09:57:36 -0700527 } else if (entry->start > start) {
528 break;
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700529 }
530 }
Venkatesh Pallipadi335ef892009-07-10 09:57:36 -0700531
532 if (!err)
533 goto unlock_ret;
534
535 entry = saved_entry;
536 list_for_each_entry_reverse(entry, &memtype_list, nd) {
537 if (entry->start == start && entry->end == end) {
538 rb_erase(&entry->rb, &memtype_rbroot);
539 list_del(&entry->nd);
540 kfree(entry);
541 err = 0;
542 break;
543 } else if (entry->start < start) {
544 break;
545 }
546 }
547unlock_ret:
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700548 spin_unlock(&memtype_lock);
549
550 if (err) {
Ingo Molnar28eb559b2008-04-03 10:14:33 +0200551 printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n",
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700552 current->comm, current->pid, start, end);
553 }
venkatesh.pallipadi@intel.com6997ab42008-03-18 17:00:25 -0700554
Venki Pallipadi77b52b42008-05-05 19:09:10 -0700555 dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end);
Ingo Molnarad2cde12008-09-30 13:20:45 +0200556
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700557 return err;
558}
559
venkatesh.pallipadi@intel.comf0970c12008-03-18 17:00:20 -0700560
Venkatesh Pallipadi9fd126b2009-07-10 09:57:34 -0700561/**
Venkatesh Pallipadi637b86e2009-07-10 09:57:39 -0700562 * lookup_memtype - Looksup the memory type for a physical address
563 * @paddr: physical address of which memory type needs to be looked up
564 *
565 * Only to be called when PAT is enabled
566 *
567 * Returns _PAGE_CACHE_WB, _PAGE_CACHE_WC, _PAGE_CACHE_UC_MINUS or
568 * _PAGE_CACHE_UC
569 */
570static unsigned long lookup_memtype(u64 paddr)
571{
572 int rettype = _PAGE_CACHE_WB;
573 struct memtype *entry;
574
H. Peter Anvin8a271382009-11-23 14:49:20 -0800575 if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE))
Venkatesh Pallipadi637b86e2009-07-10 09:57:39 -0700576 return rettype;
577
578 if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
579 struct page *page;
580 spin_lock(&memtype_lock);
581 page = pfn_to_page(paddr >> PAGE_SHIFT);
582 rettype = get_page_memtype(page);
583 spin_unlock(&memtype_lock);
584 /*
585 * -1 from get_page_memtype() implies RAM page is in its
586 * default state and not reserved, and hence of type WB
587 */
588 if (rettype == -1)
589 rettype = _PAGE_CACHE_WB;
590
591 return rettype;
592 }
593
594 spin_lock(&memtype_lock);
595
596 entry = memtype_rb_search(&memtype_rbroot, paddr);
597 if (entry != NULL)
598 rettype = entry->type;
599 else
600 rettype = _PAGE_CACHE_UC_MINUS;
601
602 spin_unlock(&memtype_lock);
603 return rettype;
604}
605
606/**
Venkatesh Pallipadi9fd126b2009-07-10 09:57:34 -0700607 * io_reserve_memtype - Request a memory type mapping for a region of memory
608 * @start: start (physical address) of the region
609 * @end: end (physical address) of the region
610 * @type: A pointer to memtype, with requested type. On success, requested
611 * or any other compatible type that was available for the region is returned
612 *
613 * On success, returns 0
614 * On failure, returns non-zero
615 */
616int io_reserve_memtype(resource_size_t start, resource_size_t end,
617 unsigned long *type)
618{
H. Peter Anvinb8551922009-08-26 17:17:51 -0700619 resource_size_t size = end - start;
Venkatesh Pallipadi9fd126b2009-07-10 09:57:34 -0700620 unsigned long req_type = *type;
621 unsigned long new_type;
622 int ret;
623
H. Peter Anvinb8551922009-08-26 17:17:51 -0700624 WARN_ON_ONCE(iomem_map_sanity_check(start, size));
Venkatesh Pallipadi9fd126b2009-07-10 09:57:34 -0700625
626 ret = reserve_memtype(start, end, req_type, &new_type);
627 if (ret)
628 goto out_err;
629
H. Peter Anvinb8551922009-08-26 17:17:51 -0700630 if (!is_new_memtype_allowed(start, size, req_type, new_type))
Venkatesh Pallipadi9fd126b2009-07-10 09:57:34 -0700631 goto out_free;
632
H. Peter Anvinb8551922009-08-26 17:17:51 -0700633 if (kernel_map_sync_memtype(start, size, new_type) < 0)
Venkatesh Pallipadi9fd126b2009-07-10 09:57:34 -0700634 goto out_free;
635
636 *type = new_type;
637 return 0;
638
639out_free:
640 free_memtype(start, end);
641 ret = -EBUSY;
642out_err:
643 return ret;
644}
645
646/**
647 * io_free_memtype - Release a memory type mapping for a region of memory
648 * @start: start (physical address) of the region
649 * @end: end (physical address) of the region
650 */
651void io_free_memtype(resource_size_t start, resource_size_t end)
652{
653 free_memtype(start, end);
654}
655
venkatesh.pallipadi@intel.comf0970c12008-03-18 17:00:20 -0700656pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
657 unsigned long size, pgprot_t vma_prot)
658{
659 return vma_prot;
660}
661
Ingo Molnard0926332008-07-18 00:26:59 +0200662#ifdef CONFIG_STRICT_DEVMEM
663/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/
Venki Pallipadi0124cec2008-04-26 11:32:12 -0700664static inline int range_is_allowed(unsigned long pfn, unsigned long size)
665{
666 return 1;
667}
668#else
Ravikiran G Thirumalai9e41bff2008-10-30 13:59:21 -0700669/* This check is needed to avoid cache aliasing when PAT is enabled */
Venki Pallipadi0124cec2008-04-26 11:32:12 -0700670static inline int range_is_allowed(unsigned long pfn, unsigned long size)
671{
672 u64 from = ((u64)pfn) << PAGE_SHIFT;
673 u64 to = from + size;
674 u64 cursor = from;
675
Ravikiran G Thirumalai9e41bff2008-10-30 13:59:21 -0700676 if (!pat_enabled)
677 return 1;
678
Venki Pallipadi0124cec2008-04-26 11:32:12 -0700679 while (cursor < to) {
680 if (!devmem_is_allowed(pfn)) {
681 printk(KERN_INFO
682 "Program %s tried to access /dev/mem between %Lx->%Lx.\n",
683 current->comm, from, to);
684 return 0;
685 }
686 cursor += PAGE_SIZE;
687 pfn++;
688 }
689 return 1;
690}
Ingo Molnard0926332008-07-18 00:26:59 +0200691#endif /* CONFIG_STRICT_DEVMEM */
Venki Pallipadi0124cec2008-04-26 11:32:12 -0700692
venkatesh.pallipadi@intel.comf0970c12008-03-18 17:00:20 -0700693int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
694 unsigned long size, pgprot_t *vma_prot)
695{
Suresh Siddha0c3c8a12009-04-09 14:26:52 -0700696 unsigned long flags = _PAGE_CACHE_WB;
venkatesh.pallipadi@intel.comf0970c12008-03-18 17:00:20 -0700697
Venki Pallipadi0124cec2008-04-26 11:32:12 -0700698 if (!range_is_allowed(pfn, size))
699 return 0;
700
Christoph Hellwig6b2f3d12009-10-27 11:05:28 +0100701 if (file->f_flags & O_DSYNC)
venkatesh.pallipadi@intel.com28df82e2008-08-20 16:45:52 -0700702 flags = _PAGE_CACHE_UC_MINUS;
venkatesh.pallipadi@intel.comf0970c12008-03-18 17:00:20 -0700703
704#ifdef CONFIG_X86_32
705 /*
706 * On the PPro and successors, the MTRRs are used to set
707 * memory types for physical addresses outside main memory,
708 * so blindly setting UC or PWT on those pages is wrong.
709 * For Pentiums and earlier, the surround logic should disable
710 * caching for the high addresses through the KEN pin, but
711 * we maintain the tradition of paranoia in this code.
712 */
Andreas Herrmann499f8f82008-06-10 16:06:21 +0200713 if (!pat_enabled &&
Andreas Herrmanncd7a4e92008-06-10 16:05:39 +0200714 !(boot_cpu_has(X86_FEATURE_MTRR) ||
715 boot_cpu_has(X86_FEATURE_K6_MTRR) ||
716 boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
717 boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) &&
718 (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
venkatesh.pallipadi@intel.come7f260a2008-03-18 17:00:21 -0700719 flags = _PAGE_CACHE_UC;
venkatesh.pallipadi@intel.comf0970c12008-03-18 17:00:20 -0700720 }
721#endif
722
venkatesh.pallipadi@intel.come7f260a2008-03-18 17:00:21 -0700723 *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
724 flags);
venkatesh.pallipadi@intel.comf0970c12008-03-18 17:00:20 -0700725 return 1;
726}
venkatesh.pallipadi@intel.come7f260a2008-03-18 17:00:21 -0700727
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800728/*
Venkatesh Pallipadi7880f742009-02-24 17:35:13 -0800729 * Change the memory type for the physial address range in kernel identity
730 * mapping space if that range is a part of identity map.
731 */
732int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
733{
734 unsigned long id_sz;
735
Venkatesh Pallipadi5fc51742009-07-10 09:57:32 -0700736 if (base >= __pa(high_memory))
Venkatesh Pallipadi7880f742009-02-24 17:35:13 -0800737 return 0;
738
739 id_sz = (__pa(high_memory) < base + size) ?
740 __pa(high_memory) - base :
741 size;
742
743 if (ioremap_change_attr((unsigned long)__va(base), id_sz, flags) < 0) {
744 printk(KERN_INFO
745 "%s:%d ioremap_change_attr failed %s "
746 "for %Lx-%Lx\n",
747 current->comm, current->pid,
748 cattr_name(flags),
749 base, (unsigned long long)(base + size));
750 return -EINVAL;
751 }
752 return 0;
753}
754
755/*
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800756 * Internal interface to reserve a range of physical memory with prot.
757 * Reserved non RAM regions only and after successful reserve_memtype,
758 * this func also keeps identity mapping (if any) in sync with this new prot.
759 */
venkatesh.pallipadi@intel.comcdecff62009-01-09 16:13:12 -0800760static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
761 int strict_prot)
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800762{
763 int is_ram = 0;
Venkatesh Pallipadi7880f742009-02-24 17:35:13 -0800764 int ret;
venkatesh.pallipadi@intel.comcdecff62009-01-09 16:13:12 -0800765 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
Suresh Siddha0c3c8a12009-04-09 14:26:52 -0700766 unsigned long flags = want_flags;
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800767
Suresh Siddhabe03d9e2009-02-11 11:20:23 -0800768 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800769
Suresh Siddhabe03d9e2009-02-11 11:20:23 -0800770 /*
Venkatesh Pallipadid886c732009-07-10 09:57:41 -0700771 * reserve_pfn_range() for RAM pages. We do not refcount to keep
772 * track of number of mappings of RAM pages. We can assert that
773 * the type requested matches the type of first page in the range.
Suresh Siddhabe03d9e2009-02-11 11:20:23 -0800774 */
Venkatesh Pallipadid886c732009-07-10 09:57:41 -0700775 if (is_ram) {
776 if (!pat_enabled)
777 return 0;
778
779 flags = lookup_memtype(paddr);
780 if (want_flags != flags) {
781 printk(KERN_WARNING
782 "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n",
783 current->comm, current->pid,
784 cattr_name(want_flags),
785 (unsigned long long)paddr,
786 (unsigned long long)(paddr + size),
787 cattr_name(flags));
788 *vma_prot = __pgprot((pgprot_val(*vma_prot) &
789 (~_PAGE_CACHE_MASK)) |
790 flags);
791 }
Pallipadi, Venkatesh4bb9c5c2009-03-12 17:45:27 -0700792 return 0;
Venkatesh Pallipadid886c732009-07-10 09:57:41 -0700793 }
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800794
795 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
796 if (ret)
797 return ret;
798
799 if (flags != want_flags) {
Suresh Siddha1adcaaf2009-08-17 13:23:50 -0700800 if (strict_prot ||
801 !is_new_memtype_allowed(paddr, size, want_flags, flags)) {
venkatesh.pallipadi@intel.comcdecff62009-01-09 16:13:12 -0800802 free_memtype(paddr, paddr + size);
803 printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
804 " for %Lx-%Lx, got %s\n",
805 current->comm, current->pid,
806 cattr_name(want_flags),
807 (unsigned long long)paddr,
808 (unsigned long long)(paddr + size),
809 cattr_name(flags));
810 return -EINVAL;
811 }
812 /*
813 * We allow returning different type than the one requested in
814 * non strict case.
815 */
816 *vma_prot = __pgprot((pgprot_val(*vma_prot) &
817 (~_PAGE_CACHE_MASK)) |
818 flags);
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800819 }
820
Venkatesh Pallipadi7880f742009-02-24 17:35:13 -0800821 if (kernel_map_sync_memtype(paddr, size, flags) < 0) {
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800822 free_memtype(paddr, paddr + size);
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800823 return -EINVAL;
824 }
825 return 0;
826}
827
828/*
829 * Internal interface to free a range of physical memory.
830 * Frees non RAM regions only.
831 */
832static void free_pfn_range(u64 paddr, unsigned long size)
833{
834 int is_ram;
835
Suresh Siddhabe03d9e2009-02-11 11:20:23 -0800836 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800837 if (is_ram == 0)
838 free_memtype(paddr, paddr + size);
839}
840
841/*
842 * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
843 * copied through copy_page_range().
844 *
845 * If the vma has a linear pfn mapping for the entire range, we get the prot
846 * from pte and reserve the entire vma range with single reserve_pfn_range call.
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800847 */
848int track_pfn_vma_copy(struct vm_area_struct *vma)
849{
H. Peter Anvinc1c15b62008-12-23 10:10:40 -0800850 resource_size_t paddr;
venkatesh.pallipadi@intel.com982d7892008-12-19 13:47:28 -0800851 unsigned long prot;
Pallipadi, Venkatesh4b065042009-04-08 15:37:16 -0700852 unsigned long vma_size = vma->vm_end - vma->vm_start;
venkatesh.pallipadi@intel.comcdecff62009-01-09 16:13:12 -0800853 pgprot_t pgprot;
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800854
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800855 if (is_linear_pfn_mapping(vma)) {
856 /*
venkatesh.pallipadi@intel.com982d7892008-12-19 13:47:28 -0800857 * reserve the whole chunk covered by vma. We need the
858 * starting address and protection from pte.
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800859 */
Pallipadi, Venkatesh4b065042009-04-08 15:37:16 -0700860 if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800861 WARN_ON_ONCE(1);
venkatesh.pallipadi@intel.com982d7892008-12-19 13:47:28 -0800862 return -EINVAL;
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800863 }
venkatesh.pallipadi@intel.comcdecff62009-01-09 16:13:12 -0800864 pgprot = __pgprot(prot);
865 return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800866 }
867
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800868 return 0;
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800869}
870
871/*
872 * track_pfn_vma_new is called when a _new_ pfn mapping is being established
873 * for physical range indicated by pfn and size.
874 *
875 * prot is passed in as a parameter for the new mapping. If the vma has a
876 * linear pfn mapping for the entire range reserve the entire vma range with
877 * single reserve_pfn_range call.
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800878 */
venkatesh.pallipadi@intel.come4b866e2009-01-09 16:13:11 -0800879int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800880 unsigned long pfn, unsigned long size)
881{
Venkatesh Pallipadi10876372009-07-10 09:57:40 -0700882 unsigned long flags;
H. Peter Anvinc1c15b62008-12-23 10:10:40 -0800883 resource_size_t paddr;
Pallipadi, Venkatesh4b065042009-04-08 15:37:16 -0700884 unsigned long vma_size = vma->vm_end - vma->vm_start;
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800885
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800886 if (is_linear_pfn_mapping(vma)) {
887 /* reserve the whole chunk starting from vm_pgoff */
H. Peter Anvinc1c15b62008-12-23 10:10:40 -0800888 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
venkatesh.pallipadi@intel.comcdecff62009-01-09 16:13:12 -0800889 return reserve_pfn_range(paddr, vma_size, prot, 0);
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800890 }
891
Venkatesh Pallipadi10876372009-07-10 09:57:40 -0700892 if (!pat_enabled)
893 return 0;
894
895 /* for vm_insert_pfn and friends, we set prot based on lookup */
896 flags = lookup_memtype(pfn << PAGE_SHIFT);
897 *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
898 flags);
899
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800900 return 0;
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800901}
902
903/*
904 * untrack_pfn_vma is called while unmapping a pfnmap for a region.
905 * untrack can be called for a specific region indicated by pfn and size or
906 * can be for the entire vma (in which case size can be zero).
907 */
908void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
909 unsigned long size)
910{
H. Peter Anvinc1c15b62008-12-23 10:10:40 -0800911 resource_size_t paddr;
Pallipadi, Venkatesh4b065042009-04-08 15:37:16 -0700912 unsigned long vma_size = vma->vm_end - vma->vm_start;
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800913
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800914 if (is_linear_pfn_mapping(vma)) {
915 /* free the whole chunk starting from vm_pgoff */
H. Peter Anvinc1c15b62008-12-23 10:10:40 -0800916 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800917 free_pfn_range(paddr, vma_size);
918 return;
919 }
venkatesh.pallipadi@intel.com58993292008-12-18 11:41:30 -0800920}
921
venkatesh.pallipadi@intel.com2520bd32008-12-18 11:41:32 -0800922pgprot_t pgprot_writecombine(pgprot_t prot)
923{
924 if (pat_enabled)
925 return __pgprot(pgprot_val(prot) | _PAGE_CACHE_WC);
926 else
927 return pgprot_noncached(prot);
928}
Ingo Molnar92b9af92009-02-28 14:09:27 +0100929EXPORT_SYMBOL_GPL(pgprot_writecombine);
venkatesh.pallipadi@intel.com2520bd32008-12-18 11:41:32 -0800930
Andreas Herrmann012f09e2008-08-06 16:23:08 +0200931#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
venkatesh.pallipadi@intel.comfec09622008-07-18 16:08:14 -0700932
933/* get Nth element of the linked list */
venkatesh.pallipadi@intel.combe5a0c12010-02-10 11:57:06 -0800934static int copy_memtype_nth_element(struct memtype *out, loff_t pos)
venkatesh.pallipadi@intel.comfec09622008-07-18 16:08:14 -0700935{
venkatesh.pallipadi@intel.combe5a0c12010-02-10 11:57:06 -0800936 struct memtype *list_node;
venkatesh.pallipadi@intel.comfec09622008-07-18 16:08:14 -0700937 int i = 1;
938
venkatesh.pallipadi@intel.combe5a0c12010-02-10 11:57:06 -0800939 list_for_each_entry(list_node, &memtype_list, nd) {
940 if (pos == i) {
941 *out = *list_node;
942 return 0;
943 }
944 ++i;
945 }
946 return 1;
947}
948
949static struct memtype *memtype_get_idx(loff_t pos)
950{
951 struct memtype *print_entry;
952 int ret;
953
954 print_entry = kzalloc(sizeof(struct memtype), GFP_KERNEL);
venkatesh.pallipadi@intel.comfec09622008-07-18 16:08:14 -0700955 if (!print_entry)
956 return NULL;
957
958 spin_lock(&memtype_lock);
venkatesh.pallipadi@intel.combe5a0c12010-02-10 11:57:06 -0800959 ret = copy_memtype_nth_element(print_entry, pos);
venkatesh.pallipadi@intel.comfec09622008-07-18 16:08:14 -0700960 spin_unlock(&memtype_lock);
Ingo Molnarad2cde12008-09-30 13:20:45 +0200961
venkatesh.pallipadi@intel.combe5a0c12010-02-10 11:57:06 -0800962 if (!ret) {
963 return print_entry;
964 } else {
965 kfree(print_entry);
966 return NULL;
967 }
venkatesh.pallipadi@intel.comfec09622008-07-18 16:08:14 -0700968}
969
970static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
971{
972 if (*pos == 0) {
973 ++*pos;
974 seq_printf(seq, "PAT memtype list:\n");
975 }
976
977 return memtype_get_idx(*pos);
978}
979
980static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
981{
982 ++*pos;
983 return memtype_get_idx(*pos);
984}
985
986static void memtype_seq_stop(struct seq_file *seq, void *v)
987{
988}
989
990static int memtype_seq_show(struct seq_file *seq, void *v)
991{
992 struct memtype *print_entry = (struct memtype *)v;
993
994 seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type),
995 print_entry->start, print_entry->end);
996 kfree(print_entry);
Ingo Molnarad2cde12008-09-30 13:20:45 +0200997
venkatesh.pallipadi@intel.comfec09622008-07-18 16:08:14 -0700998 return 0;
999}
1000
Tobias Klauserd535e432009-09-04 15:53:09 +02001001static const struct seq_operations memtype_seq_ops = {
venkatesh.pallipadi@intel.comfec09622008-07-18 16:08:14 -07001002 .start = memtype_seq_start,
1003 .next = memtype_seq_next,
1004 .stop = memtype_seq_stop,
1005 .show = memtype_seq_show,
1006};
1007
1008static int memtype_seq_open(struct inode *inode, struct file *file)
1009{
1010 return seq_open(file, &memtype_seq_ops);
1011}
1012
1013static const struct file_operations memtype_fops = {
1014 .open = memtype_seq_open,
1015 .read = seq_read,
1016 .llseek = seq_lseek,
1017 .release = seq_release,
1018};
1019
1020static int __init pat_memtype_list_init(void)
1021{
Xiaotian Fengdd4377b2009-11-26 19:53:48 +08001022 if (pat_enabled) {
1023 debugfs_create_file("pat_memtype_list", S_IRUSR,
1024 arch_debugfs_dir, NULL, &memtype_fops);
1025 }
venkatesh.pallipadi@intel.comfec09622008-07-18 16:08:14 -07001026 return 0;
1027}
1028
1029late_initcall(pat_memtype_list_init);
1030
Andreas Herrmann012f09e2008-08-06 16:23:08 +02001031#endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */