blob: e63db11f16a808fc0de3b73ea841ad2797ca597b [file] [log] [blame]
Russell Kingd111e8f2006-09-27 15:27:33 +01001/*
2 * linux/arch/arm/mm/mmu.c
3 *
4 * Copyright (C) 1995-2005 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
Russell Kingae8f1542006-09-27 15:38:34 +010010#include <linux/module.h>
Russell Kingd111e8f2006-09-27 15:27:33 +010011#include <linux/kernel.h>
12#include <linux/errno.h>
13#include <linux/init.h>
14#include <linux/bootmem.h>
15#include <linux/mman.h>
16#include <linux/nodemask.h>
17
Russell King0ba8b9b22008-08-10 18:08:10 +010018#include <asm/cputype.h>
Russell Kingd111e8f2006-09-27 15:27:33 +010019#include <asm/mach-types.h>
20#include <asm/setup.h>
21#include <asm/sizes.h>
22#include <asm/tlb.h>
23
24#include <asm/mach/arch.h>
25#include <asm/mach/map.h>
26
27#include "mm.h"
28
29DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
30
Russell Kingd111e8f2006-09-27 15:27:33 +010031/*
32 * empty_zero_page is a special page that is used for
33 * zero-initialized data and COW.
34 */
35struct page *empty_zero_page;
Aneesh Kumar K.V3653f3a2008-04-29 08:11:12 -040036EXPORT_SYMBOL(empty_zero_page);
Russell Kingd111e8f2006-09-27 15:27:33 +010037
38/*
39 * The pmd table for the upper-most set of pages.
40 */
41pmd_t *top_pmd;
42
Russell Kingae8f1542006-09-27 15:38:34 +010043#define CPOLICY_UNCACHED 0
44#define CPOLICY_BUFFERED 1
45#define CPOLICY_WRITETHROUGH 2
46#define CPOLICY_WRITEBACK 3
47#define CPOLICY_WRITEALLOC 4
48
49static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
50static unsigned int ecc_mask __initdata = 0;
Imre_Deak44b18692007-02-11 13:45:13 +010051pgprot_t pgprot_user;
Russell Kingae8f1542006-09-27 15:38:34 +010052pgprot_t pgprot_kernel;
53
Imre_Deak44b18692007-02-11 13:45:13 +010054EXPORT_SYMBOL(pgprot_user);
Russell Kingae8f1542006-09-27 15:38:34 +010055EXPORT_SYMBOL(pgprot_kernel);
56
57struct cachepolicy {
58 const char policy[16];
59 unsigned int cr_mask;
60 unsigned int pmd;
61 unsigned int pte;
62};
63
64static struct cachepolicy cache_policies[] __initdata = {
65 {
66 .policy = "uncached",
67 .cr_mask = CR_W|CR_C,
68 .pmd = PMD_SECT_UNCACHED,
Russell Kingbb30f362008-09-06 20:04:59 +010069 .pte = L_PTE_MT_UNCACHED,
Russell Kingae8f1542006-09-27 15:38:34 +010070 }, {
71 .policy = "buffered",
72 .cr_mask = CR_C,
73 .pmd = PMD_SECT_BUFFERED,
Russell Kingbb30f362008-09-06 20:04:59 +010074 .pte = L_PTE_MT_BUFFERABLE,
Russell Kingae8f1542006-09-27 15:38:34 +010075 }, {
76 .policy = "writethrough",
77 .cr_mask = 0,
78 .pmd = PMD_SECT_WT,
Russell Kingbb30f362008-09-06 20:04:59 +010079 .pte = L_PTE_MT_WRITETHROUGH,
Russell Kingae8f1542006-09-27 15:38:34 +010080 }, {
81 .policy = "writeback",
82 .cr_mask = 0,
83 .pmd = PMD_SECT_WB,
Russell Kingbb30f362008-09-06 20:04:59 +010084 .pte = L_PTE_MT_WRITEBACK,
Russell Kingae8f1542006-09-27 15:38:34 +010085 }, {
86 .policy = "writealloc",
87 .cr_mask = 0,
88 .pmd = PMD_SECT_WBWA,
Russell Kingbb30f362008-09-06 20:04:59 +010089 .pte = L_PTE_MT_WRITEALLOC,
Russell Kingae8f1542006-09-27 15:38:34 +010090 }
91};
92
93/*
Simon Arlott6cbdc8c2007-05-11 20:40:30 +010094 * These are useful for identifying cache coherency
Russell Kingae8f1542006-09-27 15:38:34 +010095 * problems by allowing the cache or the cache and
96 * writebuffer to be turned off. (Note: the write
97 * buffer should not be on and the cache off).
98 */
99static void __init early_cachepolicy(char **p)
100{
101 int i;
102
103 for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {
104 int len = strlen(cache_policies[i].policy);
105
106 if (memcmp(*p, cache_policies[i].policy, len) == 0) {
107 cachepolicy = i;
108 cr_alignment &= ~cache_policies[i].cr_mask;
109 cr_no_alignment &= ~cache_policies[i].cr_mask;
110 *p += len;
111 break;
112 }
113 }
114 if (i == ARRAY_SIZE(cache_policies))
115 printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n");
Catalin Marinas11179d82007-07-20 11:42:24 +0100116 if (cpu_architecture() >= CPU_ARCH_ARMv6) {
117 printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n");
118 cachepolicy = CPOLICY_WRITEBACK;
119 }
Russell Kingae8f1542006-09-27 15:38:34 +0100120 flush_cache_all();
121 set_cr(cr_alignment);
122}
123__early_param("cachepolicy=", early_cachepolicy);
124
125static void __init early_nocache(char **__unused)
126{
127 char *p = "buffered";
128 printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p);
129 early_cachepolicy(&p);
130}
131__early_param("nocache", early_nocache);
132
133static void __init early_nowrite(char **__unused)
134{
135 char *p = "uncached";
136 printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p);
137 early_cachepolicy(&p);
138}
139__early_param("nowb", early_nowrite);
140
141static void __init early_ecc(char **p)
142{
143 if (memcmp(*p, "on", 2) == 0) {
144 ecc_mask = PMD_PROTECTION;
145 *p += 2;
146 } else if (memcmp(*p, "off", 3) == 0) {
147 ecc_mask = 0;
148 *p += 3;
149 }
150}
151__early_param("ecc=", early_ecc);
152
153static int __init noalign_setup(char *__unused)
154{
155 cr_alignment &= ~CR_A;
156 cr_no_alignment &= ~CR_A;
157 set_cr(cr_alignment);
158 return 1;
159}
160__setup("noalign", noalign_setup);
161
Russell King255d1f82006-12-18 00:12:47 +0000162#ifndef CONFIG_SMP
163void adjust_cr(unsigned long mask, unsigned long set)
164{
165 unsigned long flags;
166
167 mask &= ~CR_A;
168
169 set &= mask;
170
171 local_irq_save(flags);
172
173 cr_no_alignment = (cr_no_alignment & ~mask) | set;
174 cr_alignment = (cr_alignment & ~mask) | set;
175
176 set_cr((get_cr() & ~mask) | set);
177
178 local_irq_restore(flags);
179}
180#endif
181
Russell King0af92be2007-05-05 20:28:16 +0100182#define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_WRITE
Russell Kingb1cce6b2008-11-04 10:52:28 +0000183#define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE
Russell King0af92be2007-05-05 20:28:16 +0100184
Russell Kingb29e9f52007-04-21 10:47:29 +0100185static struct mem_type mem_types[] = {
Russell King0af92be2007-05-05 20:28:16 +0100186 [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */
Russell Kingbb30f362008-09-06 20:04:59 +0100187 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
188 L_PTE_SHARED,
Russell King0af92be2007-05-05 20:28:16 +0100189 .prot_l1 = PMD_TYPE_TABLE,
Russell Kingb1cce6b2008-11-04 10:52:28 +0000190 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S,
Russell King0af92be2007-05-05 20:28:16 +0100191 .domain = DOMAIN_IO,
192 },
193 [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
Russell Kingbb30f362008-09-06 20:04:59 +0100194 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED,
Russell King0af92be2007-05-05 20:28:16 +0100195 .prot_l1 = PMD_TYPE_TABLE,
Russell Kingb1cce6b2008-11-04 10:52:28 +0000196 .prot_sect = PROT_SECT_DEVICE,
Russell King0af92be2007-05-05 20:28:16 +0100197 .domain = DOMAIN_IO,
198 },
199 [MT_DEVICE_CACHED] = { /* ioremap_cached */
Russell Kingbb30f362008-09-06 20:04:59 +0100200 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
Russell King0af92be2007-05-05 20:28:16 +0100201 .prot_l1 = PMD_TYPE_TABLE,
202 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB,
203 .domain = DOMAIN_IO,
204 },
Lennert Buytenhek1ad77a82008-09-05 13:17:11 +0100205 [MT_DEVICE_WC] = { /* ioremap_wc */
Russell Kingbb30f362008-09-06 20:04:59 +0100206 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
Russell King0af92be2007-05-05 20:28:16 +0100207 .prot_l1 = PMD_TYPE_TABLE,
Russell Kingb1cce6b2008-11-04 10:52:28 +0000208 .prot_sect = PROT_SECT_DEVICE,
Russell King0af92be2007-05-05 20:28:16 +0100209 .domain = DOMAIN_IO,
Russell Kingae8f1542006-09-27 15:38:34 +0100210 },
211 [MT_CACHECLEAN] = {
Russell King9ef79632007-05-05 20:03:35 +0100212 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
Russell Kingae8f1542006-09-27 15:38:34 +0100213 .domain = DOMAIN_KERNEL,
214 },
215 [MT_MINICLEAN] = {
Russell King9ef79632007-05-05 20:03:35 +0100216 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
Russell Kingae8f1542006-09-27 15:38:34 +0100217 .domain = DOMAIN_KERNEL,
218 },
219 [MT_LOW_VECTORS] = {
220 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
221 L_PTE_EXEC,
222 .prot_l1 = PMD_TYPE_TABLE,
223 .domain = DOMAIN_USER,
224 },
225 [MT_HIGH_VECTORS] = {
226 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
227 L_PTE_USER | L_PTE_EXEC,
228 .prot_l1 = PMD_TYPE_TABLE,
229 .domain = DOMAIN_USER,
230 },
231 [MT_MEMORY] = {
Russell King9ef79632007-05-05 20:03:35 +0100232 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
Russell Kingae8f1542006-09-27 15:38:34 +0100233 .domain = DOMAIN_KERNEL,
234 },
235 [MT_ROM] = {
Russell King9ef79632007-05-05 20:03:35 +0100236 .prot_sect = PMD_TYPE_SECT,
Russell Kingae8f1542006-09-27 15:38:34 +0100237 .domain = DOMAIN_KERNEL,
238 },
Russell Kingae8f1542006-09-27 15:38:34 +0100239};
240
Russell Kingb29e9f52007-04-21 10:47:29 +0100241const struct mem_type *get_mem_type(unsigned int type)
242{
243 return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
244}
245
Russell Kingae8f1542006-09-27 15:38:34 +0100246/*
247 * Adjust the PMD section entries according to the CPU in use.
248 */
249static void __init build_mem_type_table(void)
250{
251 struct cachepolicy *cp;
252 unsigned int cr = get_cr();
Russell Kingbb30f362008-09-06 20:04:59 +0100253 unsigned int user_pgprot, kern_pgprot, vecs_pgprot;
Russell Kingae8f1542006-09-27 15:38:34 +0100254 int cpu_arch = cpu_architecture();
255 int i;
256
Catalin Marinas11179d82007-07-20 11:42:24 +0100257 if (cpu_arch < CPU_ARCH_ARMv6) {
Russell Kingae8f1542006-09-27 15:38:34 +0100258#if defined(CONFIG_CPU_DCACHE_DISABLE)
Catalin Marinas11179d82007-07-20 11:42:24 +0100259 if (cachepolicy > CPOLICY_BUFFERED)
260 cachepolicy = CPOLICY_BUFFERED;
Russell Kingae8f1542006-09-27 15:38:34 +0100261#elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
Catalin Marinas11179d82007-07-20 11:42:24 +0100262 if (cachepolicy > CPOLICY_WRITETHROUGH)
263 cachepolicy = CPOLICY_WRITETHROUGH;
Russell Kingae8f1542006-09-27 15:38:34 +0100264#endif
Catalin Marinas11179d82007-07-20 11:42:24 +0100265 }
Russell Kingae8f1542006-09-27 15:38:34 +0100266 if (cpu_arch < CPU_ARCH_ARMv5) {
267 if (cachepolicy >= CPOLICY_WRITEALLOC)
268 cachepolicy = CPOLICY_WRITEBACK;
269 ecc_mask = 0;
270 }
Russell Kingbb30f362008-09-06 20:04:59 +0100271#ifdef CONFIG_SMP
272 cachepolicy = CPOLICY_WRITEALLOC;
273#endif
Russell Kingae8f1542006-09-27 15:38:34 +0100274
275 /*
Russell Kingb1cce6b2008-11-04 10:52:28 +0000276 * Strip out features not present on earlier architectures.
277 * Pre-ARMv5 CPUs don't have TEX bits. Pre-ARMv6 CPUs or those
278 * without extended page tables don't have the 'Shared' bit.
Lennert Buytenhek1ad77a82008-09-05 13:17:11 +0100279 */
Russell Kingb1cce6b2008-11-04 10:52:28 +0000280 if (cpu_arch < CPU_ARCH_ARMv5)
281 for (i = 0; i < ARRAY_SIZE(mem_types); i++)
282 mem_types[i].prot_sect &= ~PMD_SECT_TEX(7);
283 if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3())
284 for (i = 0; i < ARRAY_SIZE(mem_types); i++)
285 mem_types[i].prot_sect &= ~PMD_SECT_S;
Russell Kingae8f1542006-09-27 15:38:34 +0100286
287 /*
Russell Kingb1cce6b2008-11-04 10:52:28 +0000288 * ARMv5 and lower, bit 4 must be set for page tables (was: cache
289 * "update-able on write" bit on ARM610). However, Xscale and
290 * Xscale3 require this bit to be cleared.
Russell Kingae8f1542006-09-27 15:38:34 +0100291 */
Russell Kingb1cce6b2008-11-04 10:52:28 +0000292 if (cpu_is_xscale() || cpu_is_xsc3()) {
Russell King9ef79632007-05-05 20:03:35 +0100293 for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
Russell Kingae8f1542006-09-27 15:38:34 +0100294 mem_types[i].prot_sect &= ~PMD_BIT4;
Russell King9ef79632007-05-05 20:03:35 +0100295 mem_types[i].prot_l1 &= ~PMD_BIT4;
296 }
297 } else if (cpu_arch < CPU_ARCH_ARMv6) {
298 for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
Russell Kingae8f1542006-09-27 15:38:34 +0100299 if (mem_types[i].prot_l1)
300 mem_types[i].prot_l1 |= PMD_BIT4;
Russell King9ef79632007-05-05 20:03:35 +0100301 if (mem_types[i].prot_sect)
302 mem_types[i].prot_sect |= PMD_BIT4;
303 }
304 }
Russell Kingae8f1542006-09-27 15:38:34 +0100305
Russell Kingb1cce6b2008-11-04 10:52:28 +0000306 /*
307 * Mark the device areas according to the CPU/architecture.
308 */
309 if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) {
310 if (!cpu_is_xsc3()) {
311 /*
312 * Mark device regions on ARMv6+ as execute-never
313 * to prevent speculative instruction fetches.
314 */
315 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN;
316 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN;
317 mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN;
318 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN;
319 }
320 if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
321 /*
322 * For ARMv7 with TEX remapping,
323 * - shared device is SXCB=1100
324 * - nonshared device is SXCB=0100
325 * - write combine device mem is SXCB=0001
326 * (Uncached Normal memory)
327 */
328 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1);
329 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1);
330 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
331 } else if (cpu_is_xsc3()) {
332 /*
333 * For Xscale3,
334 * - shared device is TEXCB=00101
335 * - nonshared device is TEXCB=01000
336 * - write combine device mem is TEXCB=00100
337 * (Inner/Outer Uncacheable in xsc3 parlance)
338 */
339 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED;
340 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
341 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
342 } else {
343 /*
344 * For ARMv6 and ARMv7 without TEX remapping,
345 * - shared device is TEXCB=00001
346 * - nonshared device is TEXCB=01000
347 * - write combine device mem is TEXCB=00100
348 * (Uncached Normal in ARMv6 parlance).
349 */
350 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
351 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
352 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
353 }
354 } else {
355 /*
356 * On others, write combining is "Uncached/Buffered"
357 */
358 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
359 }
360
361 /*
362 * Now deal with the memory-type mappings
363 */
Russell Kingae8f1542006-09-27 15:38:34 +0100364 cp = &cache_policies[cachepolicy];
Russell Kingbb30f362008-09-06 20:04:59 +0100365 vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
366
367#ifndef CONFIG_SMP
368 /*
369 * Only use write-through for non-SMP systems
370 */
371 if (cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH)
372 vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte;
373#endif
Russell Kingae8f1542006-09-27 15:38:34 +0100374
375 /*
376 * Enable CPU-specific coherency if supported.
377 * (Only available on XSC3 at the moment.)
378 */
Russell Kingb1cce6b2008-11-04 10:52:28 +0000379 if (arch_is_coherent() && cpu_is_xsc3())
380 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
Russell Kingae8f1542006-09-27 15:38:34 +0100381
382 /*
383 * ARMv6 and above have extended page tables.
384 */
385 if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
386 /*
Russell Kingae8f1542006-09-27 15:38:34 +0100387 * Mark cache clean areas and XIP ROM read only
388 * from SVC mode and no access from userspace.
389 */
390 mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
391 mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
392 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
393
Russell Kingae8f1542006-09-27 15:38:34 +0100394#ifdef CONFIG_SMP
395 /*
396 * Mark memory with the "shared" attribute for SMP systems
397 */
398 user_pgprot |= L_PTE_SHARED;
399 kern_pgprot |= L_PTE_SHARED;
Russell Kingbb30f362008-09-06 20:04:59 +0100400 vecs_pgprot |= L_PTE_SHARED;
Russell Kingae8f1542006-09-27 15:38:34 +0100401 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
402#endif
403 }
404
405 for (i = 0; i < 16; i++) {
406 unsigned long v = pgprot_val(protection_map[i]);
Russell Kingbb30f362008-09-06 20:04:59 +0100407 protection_map[i] = __pgprot(v | user_pgprot);
Russell Kingae8f1542006-09-27 15:38:34 +0100408 }
409
Russell Kingbb30f362008-09-06 20:04:59 +0100410 mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot;
411 mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot;
Russell Kingae8f1542006-09-27 15:38:34 +0100412
Imre_Deak44b18692007-02-11 13:45:13 +0100413 pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
Russell Kingae8f1542006-09-27 15:38:34 +0100414 pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
415 L_PTE_DIRTY | L_PTE_WRITE |
416 L_PTE_EXEC | kern_pgprot);
417
418 mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
419 mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
420 mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
421 mem_types[MT_ROM].prot_sect |= cp->pmd;
422
423 switch (cp->pmd) {
424 case PMD_SECT_WT:
425 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
426 break;
427 case PMD_SECT_WB:
428 case PMD_SECT_WBWA:
429 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
430 break;
431 }
432 printk("Memory policy: ECC %sabled, Data cache %s\n",
433 ecc_mask ? "en" : "dis", cp->policy);
Russell King2497f0a2007-04-21 09:59:44 +0100434
435 for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
436 struct mem_type *t = &mem_types[i];
437 if (t->prot_l1)
438 t->prot_l1 |= PMD_DOMAIN(t->domain);
439 if (t->prot_sect)
440 t->prot_sect |= PMD_DOMAIN(t->domain);
441 }
Russell Kingae8f1542006-09-27 15:38:34 +0100442}
443
444#define vectors_base() (vectors_high() ? 0xffff0000 : 0)
445
Russell King24e6c692007-04-21 10:21:28 +0100446static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
447 unsigned long end, unsigned long pfn,
448 const struct mem_type *type)
Russell Kingae8f1542006-09-27 15:38:34 +0100449{
Russell King24e6c692007-04-21 10:21:28 +0100450 pte_t *pte;
Russell Kingae8f1542006-09-27 15:38:34 +0100451
Russell King24e6c692007-04-21 10:21:28 +0100452 if (pmd_none(*pmd)) {
453 pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t));
454 __pmd_populate(pmd, __pa(pte) | type->prot_l1);
455 }
Russell Kingae8f1542006-09-27 15:38:34 +0100456
Russell King24e6c692007-04-21 10:21:28 +0100457 pte = pte_offset_kernel(pmd, addr);
458 do {
Russell King40d192b2008-09-06 21:15:56 +0100459 set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0);
Russell King24e6c692007-04-21 10:21:28 +0100460 pfn++;
461 } while (pte++, addr += PAGE_SIZE, addr != end);
Russell Kingae8f1542006-09-27 15:38:34 +0100462}
463
Russell King24e6c692007-04-21 10:21:28 +0100464static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,
465 unsigned long end, unsigned long phys,
466 const struct mem_type *type)
Russell Kingae8f1542006-09-27 15:38:34 +0100467{
Russell King24e6c692007-04-21 10:21:28 +0100468 pmd_t *pmd = pmd_offset(pgd, addr);
Russell Kingae8f1542006-09-27 15:38:34 +0100469
Russell King24e6c692007-04-21 10:21:28 +0100470 /*
471 * Try a section mapping - end, addr and phys must all be aligned
472 * to a section boundary. Note that PMDs refer to the individual
473 * L1 entries, whereas PGDs refer to a group of L1 entries making
474 * up one logical pointer to an L2 table.
475 */
476 if (((addr | end | phys) & ~SECTION_MASK) == 0) {
477 pmd_t *p = pmd;
Russell Kingae8f1542006-09-27 15:38:34 +0100478
Russell King24e6c692007-04-21 10:21:28 +0100479 if (addr & SECTION_SIZE)
480 pmd++;
481
482 do {
483 *pmd = __pmd(phys | type->prot_sect);
484 phys += SECTION_SIZE;
485 } while (pmd++, addr += SECTION_SIZE, addr != end);
486
487 flush_pmd_entry(p);
488 } else {
489 /*
490 * No need to loop; pte's aren't interested in the
491 * individual L1 entries.
492 */
493 alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
Russell Kingae8f1542006-09-27 15:38:34 +0100494 }
Russell Kingae8f1542006-09-27 15:38:34 +0100495}
496
Russell King4a56c1e2007-04-21 10:16:48 +0100497static void __init create_36bit_mapping(struct map_desc *md,
498 const struct mem_type *type)
499{
500 unsigned long phys, addr, length, end;
501 pgd_t *pgd;
502
503 addr = md->virtual;
504 phys = (unsigned long)__pfn_to_phys(md->pfn);
505 length = PAGE_ALIGN(md->length);
506
507 if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) {
508 printk(KERN_ERR "MM: CPU does not support supersection "
509 "mapping for 0x%08llx at 0x%08lx\n",
510 __pfn_to_phys((u64)md->pfn), addr);
511 return;
512 }
513
514 /* N.B. ARMv6 supersections are only defined to work with domain 0.
515 * Since domain assignments can in fact be arbitrary, the
516 * 'domain == 0' check below is required to insure that ARMv6
517 * supersections are only allocated for domain 0 regardless
518 * of the actual domain assignments in use.
519 */
520 if (type->domain) {
521 printk(KERN_ERR "MM: invalid domain in supersection "
522 "mapping for 0x%08llx at 0x%08lx\n",
523 __pfn_to_phys((u64)md->pfn), addr);
524 return;
525 }
526
527 if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) {
528 printk(KERN_ERR "MM: cannot create mapping for "
529 "0x%08llx at 0x%08lx invalid alignment\n",
530 __pfn_to_phys((u64)md->pfn), addr);
531 return;
532 }
533
534 /*
535 * Shift bits [35:32] of address into bits [23:20] of PMD
536 * (See ARMv6 spec).
537 */
538 phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20);
539
540 pgd = pgd_offset_k(addr);
541 end = addr + length;
542 do {
543 pmd_t *pmd = pmd_offset(pgd, addr);
544 int i;
545
546 for (i = 0; i < 16; i++)
547 *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER);
548
549 addr += SUPERSECTION_SIZE;
550 phys += SUPERSECTION_SIZE;
551 pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT;
552 } while (addr != end);
553}
554
Russell Kingae8f1542006-09-27 15:38:34 +0100555/*
556 * Create the page directory entries and any necessary
557 * page tables for the mapping specified by `md'. We
558 * are able to cope here with varying sizes and address
559 * offsets, and we take full advantage of sections and
560 * supersections.
561 */
562void __init create_mapping(struct map_desc *md)
563{
Russell King24e6c692007-04-21 10:21:28 +0100564 unsigned long phys, addr, length, end;
Russell Kingd5c98172007-04-21 10:05:32 +0100565 const struct mem_type *type;
Russell King24e6c692007-04-21 10:21:28 +0100566 pgd_t *pgd;
Russell Kingae8f1542006-09-27 15:38:34 +0100567
568 if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
569 printk(KERN_WARNING "BUG: not creating mapping for "
570 "0x%08llx at 0x%08lx in user region\n",
571 __pfn_to_phys((u64)md->pfn), md->virtual);
572 return;
573 }
574
575 if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
576 md->virtual >= PAGE_OFFSET && md->virtual < VMALLOC_END) {
577 printk(KERN_WARNING "BUG: mapping for 0x%08llx at 0x%08lx "
578 "overlaps vmalloc space\n",
579 __pfn_to_phys((u64)md->pfn), md->virtual);
580 }
581
Russell Kingd5c98172007-04-21 10:05:32 +0100582 type = &mem_types[md->type];
Russell Kingae8f1542006-09-27 15:38:34 +0100583
584 /*
585 * Catch 36-bit addresses
586 */
Russell King4a56c1e2007-04-21 10:16:48 +0100587 if (md->pfn >= 0x100000) {
588 create_36bit_mapping(md, type);
589 return;
Russell Kingae8f1542006-09-27 15:38:34 +0100590 }
591
Russell King7b9c7b42007-07-04 21:16:33 +0100592 addr = md->virtual & PAGE_MASK;
Russell King24e6c692007-04-21 10:21:28 +0100593 phys = (unsigned long)__pfn_to_phys(md->pfn);
Russell King7b9c7b42007-07-04 21:16:33 +0100594 length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
Russell Kingae8f1542006-09-27 15:38:34 +0100595
Russell King24e6c692007-04-21 10:21:28 +0100596 if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
Russell Kingae8f1542006-09-27 15:38:34 +0100597 printk(KERN_WARNING "BUG: map for 0x%08lx at 0x%08lx can not "
598 "be mapped using pages, ignoring.\n",
Russell King24e6c692007-04-21 10:21:28 +0100599 __pfn_to_phys(md->pfn), addr);
Russell Kingae8f1542006-09-27 15:38:34 +0100600 return;
601 }
602
Russell King24e6c692007-04-21 10:21:28 +0100603 pgd = pgd_offset_k(addr);
604 end = addr + length;
605 do {
606 unsigned long next = pgd_addr_end(addr, end);
Russell Kingae8f1542006-09-27 15:38:34 +0100607
Russell King24e6c692007-04-21 10:21:28 +0100608 alloc_init_section(pgd, addr, next, phys, type);
Russell Kingae8f1542006-09-27 15:38:34 +0100609
Russell King24e6c692007-04-21 10:21:28 +0100610 phys += next - addr;
611 addr = next;
612 } while (pgd++, addr != end);
Russell Kingae8f1542006-09-27 15:38:34 +0100613}
614
615/*
616 * Create the architecture specific mappings
617 */
618void __init iotable_init(struct map_desc *io_desc, int nr)
619{
620 int i;
621
622 for (i = 0; i < nr; i++)
623 create_mapping(io_desc + i);
624}
625
Russell King6c5da7a2008-09-30 19:31:44 +0100626static unsigned long __initdata vmalloc_reserve = SZ_128M;
627
628/*
629 * vmalloc=size forces the vmalloc area to be exactly 'size'
630 * bytes. This can be used to increase (or decrease) the vmalloc
631 * area - the default is 128m.
632 */
633static void __init early_vmalloc(char **arg)
634{
635 vmalloc_reserve = memparse(*arg, arg);
636
637 if (vmalloc_reserve < SZ_16M) {
638 vmalloc_reserve = SZ_16M;
639 printk(KERN_WARNING
640 "vmalloc area too small, limiting to %luMB\n",
641 vmalloc_reserve >> 20);
642 }
643}
644__early_param("vmalloc=", early_vmalloc);
645
646#define VMALLOC_MIN (void *)(VMALLOC_END - vmalloc_reserve)
647
Lennert Buytenhek60296c72008-08-05 01:56:13 +0200648static int __init check_membank_valid(struct membank *mb)
649{
650 /*
Russell Kingeca73212008-09-30 19:29:25 +0100651 * Check whether this memory region has non-zero size or
652 * invalid node number.
Lennert Buytenhek60296c72008-08-05 01:56:13 +0200653 */
Russell Kingeca73212008-09-30 19:29:25 +0100654 if (mb->size == 0 || mb->node >= MAX_NUMNODES)
Lennert Buytenhek60296c72008-08-05 01:56:13 +0200655 return 0;
656
657 /*
658 * Check whether this memory region would entirely overlap
659 * the vmalloc area.
660 */
661 if (phys_to_virt(mb->start) >= VMALLOC_MIN) {
662 printk(KERN_NOTICE "Ignoring RAM at %.8lx-%.8lx "
663 "(vmalloc region overlap).\n",
664 mb->start, mb->start + mb->size - 1);
665 return 0;
666 }
667
668 /*
669 * Check whether this memory region would partially overlap
670 * the vmalloc area.
671 */
672 if (phys_to_virt(mb->start + mb->size) < phys_to_virt(mb->start) ||
673 phys_to_virt(mb->start + mb->size) > VMALLOC_MIN) {
674 unsigned long newsize = VMALLOC_MIN - phys_to_virt(mb->start);
675
676 printk(KERN_NOTICE "Truncating RAM at %.8lx-%.8lx "
677 "to -%.8lx (vmalloc region overlap).\n",
678 mb->start, mb->start + mb->size - 1,
679 mb->start + newsize - 1);
680 mb->size = newsize;
681 }
682
683 return 1;
684}
685
686static void __init sanity_check_meminfo(struct meminfo *mi)
687{
Russell Kingeca73212008-09-30 19:29:25 +0100688 int i, j;
Lennert Buytenhek60296c72008-08-05 01:56:13 +0200689
690 for (i = 0, j = 0; i < mi->nr_banks; i++) {
691 if (check_membank_valid(&mi->bank[i]))
692 mi->bank[j++] = mi->bank[i];
693 }
694 mi->nr_banks = j;
695}
696
Russell Kingd111e8f2006-09-27 15:27:33 +0100697static inline void prepare_page_table(struct meminfo *mi)
698{
699 unsigned long addr;
700
701 /*
702 * Clear out all the mappings below the kernel image.
703 */
Russell Kingab4f2ee2008-11-06 17:11:07 +0000704 for (addr = 0; addr < MODULES_VADDR; addr += PGDIR_SIZE)
Russell Kingd111e8f2006-09-27 15:27:33 +0100705 pmd_clear(pmd_off_k(addr));
706
707#ifdef CONFIG_XIP_KERNEL
708 /* The XIP kernel is mapped in the module area -- skip over it */
709 addr = ((unsigned long)&_etext + PGDIR_SIZE - 1) & PGDIR_MASK;
710#endif
711 for ( ; addr < PAGE_OFFSET; addr += PGDIR_SIZE)
712 pmd_clear(pmd_off_k(addr));
713
714 /*
715 * Clear out all the kernel space mappings, except for the first
716 * memory bank, up to the end of the vmalloc region.
717 */
718 for (addr = __phys_to_virt(mi->bank[0].start + mi->bank[0].size);
719 addr < VMALLOC_END; addr += PGDIR_SIZE)
720 pmd_clear(pmd_off_k(addr));
721}
722
723/*
724 * Reserve the various regions of node 0
725 */
726void __init reserve_node_zero(pg_data_t *pgdat)
727{
728 unsigned long res_size = 0;
729
730 /*
731 * Register the kernel text and data with bootmem.
732 * Note that this can only be in node 0.
733 */
734#ifdef CONFIG_XIP_KERNEL
Bernhard Walle72a7fe32008-02-07 00:15:17 -0800735 reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start,
736 BOOTMEM_DEFAULT);
Russell Kingd111e8f2006-09-27 15:27:33 +0100737#else
Bernhard Walle72a7fe32008-02-07 00:15:17 -0800738 reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext,
739 BOOTMEM_DEFAULT);
Russell Kingd111e8f2006-09-27 15:27:33 +0100740#endif
741
742 /*
743 * Reserve the page tables. These are already in use,
744 * and can only be in node 0.
745 */
746 reserve_bootmem_node(pgdat, __pa(swapper_pg_dir),
Bernhard Walle72a7fe32008-02-07 00:15:17 -0800747 PTRS_PER_PGD * sizeof(pgd_t), BOOTMEM_DEFAULT);
Russell Kingd111e8f2006-09-27 15:27:33 +0100748
749 /*
750 * Hmm... This should go elsewhere, but we really really need to
751 * stop things allocating the low memory; ideally we need a better
752 * implementation of GFP_DMA which does not assume that DMA-able
753 * memory starts at zero.
754 */
755 if (machine_is_integrator() || machine_is_cintegrator())
756 res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
757
758 /*
759 * These should likewise go elsewhere. They pre-reserve the
760 * screen memory region at the start of main system memory.
761 */
762 if (machine_is_edb7211())
763 res_size = 0x00020000;
764 if (machine_is_p720t())
765 res_size = 0x00014000;
766
Ben Dooksbbf6f282006-12-07 20:47:58 +0100767 /* H1940 and RX3715 need to reserve this for suspend */
768
769 if (machine_is_h1940() || machine_is_rx3715()) {
Bernhard Walle72a7fe32008-02-07 00:15:17 -0800770 reserve_bootmem_node(pgdat, 0x30003000, 0x1000,
771 BOOTMEM_DEFAULT);
772 reserve_bootmem_node(pgdat, 0x30081000, 0x1000,
773 BOOTMEM_DEFAULT);
Ben Dooks90733412006-12-06 01:50:24 +0100774 }
775
Russell Kingd111e8f2006-09-27 15:27:33 +0100776#ifdef CONFIG_SA1111
777 /*
778 * Because of the SA1111 DMA bug, we want to preserve our
779 * precious DMA-able memory...
780 */
781 res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
782#endif
783 if (res_size)
Bernhard Walle72a7fe32008-02-07 00:15:17 -0800784 reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size,
785 BOOTMEM_DEFAULT);
Russell Kingd111e8f2006-09-27 15:27:33 +0100786}
787
788/*
789 * Set up device the mappings. Since we clear out the page tables for all
790 * mappings above VMALLOC_END, we will remove any debug device mappings.
791 * This means you have to be careful how you debug this function, or any
792 * called function. This means you can't use any function or debugging
793 * method which may touch any device, otherwise the kernel _will_ crash.
794 */
795static void __init devicemaps_init(struct machine_desc *mdesc)
796{
797 struct map_desc map;
798 unsigned long addr;
799 void *vectors;
800
801 /*
802 * Allocate the vector page early.
803 */
804 vectors = alloc_bootmem_low_pages(PAGE_SIZE);
805 BUG_ON(!vectors);
806
807 for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
808 pmd_clear(pmd_off_k(addr));
809
810 /*
811 * Map the kernel if it is XIP.
812 * It is always first in the modulearea.
813 */
814#ifdef CONFIG_XIP_KERNEL
815 map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
Russell Kingab4f2ee2008-11-06 17:11:07 +0000816 map.virtual = MODULES_VADDR;
Russell Kingd111e8f2006-09-27 15:27:33 +0100817 map.length = ((unsigned long)&_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
818 map.type = MT_ROM;
819 create_mapping(&map);
820#endif
821
822 /*
823 * Map the cache flushing regions.
824 */
825#ifdef FLUSH_BASE
826 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS);
827 map.virtual = FLUSH_BASE;
828 map.length = SZ_1M;
829 map.type = MT_CACHECLEAN;
830 create_mapping(&map);
831#endif
832#ifdef FLUSH_BASE_MINICACHE
833 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M);
834 map.virtual = FLUSH_BASE_MINICACHE;
835 map.length = SZ_1M;
836 map.type = MT_MINICLEAN;
837 create_mapping(&map);
838#endif
839
840 /*
841 * Create a mapping for the machine vectors at the high-vectors
842 * location (0xffff0000). If we aren't using high-vectors, also
843 * create a mapping at the low-vectors virtual address.
844 */
845 map.pfn = __phys_to_pfn(virt_to_phys(vectors));
846 map.virtual = 0xffff0000;
847 map.length = PAGE_SIZE;
848 map.type = MT_HIGH_VECTORS;
849 create_mapping(&map);
850
851 if (!vectors_high()) {
852 map.virtual = 0;
853 map.type = MT_LOW_VECTORS;
854 create_mapping(&map);
855 }
856
857 /*
858 * Ask the machine support to map in the statically mapped devices.
859 */
860 if (mdesc->map_io)
861 mdesc->map_io();
862
863 /*
864 * Finally flush the caches and tlb to ensure that we're in a
865 * consistent state wrt the writebuffer. This also ensures that
866 * any write-allocated cache lines in the vector page are written
867 * back. After this point, we can start to touch devices again.
868 */
869 local_flush_tlb_all();
870 flush_cache_all();
871}
872
873/*
874 * paging_init() sets up the page tables, initialises the zone memory
875 * maps, and sets up the zero page, bad page and bad page tables.
876 */
877void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc)
878{
879 void *zero_page;
880
881 build_mem_type_table();
Lennert Buytenhek60296c72008-08-05 01:56:13 +0200882 sanity_check_meminfo(mi);
Russell Kingd111e8f2006-09-27 15:27:33 +0100883 prepare_page_table(mi);
884 bootmem_init(mi);
885 devicemaps_init(mdesc);
886
887 top_pmd = pmd_off_k(0xffff0000);
888
889 /*
890 * allocate the zero page. Note that we count on this going ok.
891 */
892 zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
893 memzero(zero_page, PAGE_SIZE);
894 empty_zero_page = virt_to_page(zero_page);
895 flush_dcache_page(empty_zero_page);
896}
Russell Kingae8f1542006-09-27 15:38:34 +0100897
898/*
899 * In order to soft-boot, we need to insert a 1:1 mapping in place of
900 * the user-mode pages. This will then ensure that we have predictable
901 * results when turning the mmu off
902 */
903void setup_mm_for_reboot(char mode)
904{
905 unsigned long base_pmdval;
906 pgd_t *pgd;
907 int i;
908
909 if (current->mm && current->mm->pgd)
910 pgd = current->mm->pgd;
911 else
912 pgd = init_mm.pgd;
913
914 base_pmdval = PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT;
915 if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale())
916 base_pmdval |= PMD_BIT4;
917
918 for (i = 0; i < FIRST_USER_PGD_NR + USER_PTRS_PER_PGD; i++, pgd++) {
919 unsigned long pmdval = (i << PGDIR_SHIFT) | base_pmdval;
920 pmd_t *pmd;
921
922 pmd = pmd_off(pgd, i << PGDIR_SHIFT);
923 pmd[0] = __pmd(pmdval);
924 pmd[1] = __pmd(pmdval + (1 << (PGDIR_SHIFT - 1)));
925 flush_pmd_entry(pmd);
926 }
927}