| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | *  linux/arch/i386/mm/pgtable.c | 
|  | 3 | */ | 
|  | 4 |  | 
|  | 5 | #include <linux/config.h> | 
|  | 6 | #include <linux/sched.h> | 
|  | 7 | #include <linux/kernel.h> | 
|  | 8 | #include <linux/errno.h> | 
|  | 9 | #include <linux/mm.h> | 
|  | 10 | #include <linux/swap.h> | 
|  | 11 | #include <linux/smp.h> | 
|  | 12 | #include <linux/highmem.h> | 
|  | 13 | #include <linux/slab.h> | 
|  | 14 | #include <linux/pagemap.h> | 
|  | 15 | #include <linux/spinlock.h> | 
|  | 16 |  | 
|  | 17 | #include <asm/system.h> | 
|  | 18 | #include <asm/pgtable.h> | 
|  | 19 | #include <asm/pgalloc.h> | 
|  | 20 | #include <asm/fixmap.h> | 
|  | 21 | #include <asm/e820.h> | 
|  | 22 | #include <asm/tlb.h> | 
|  | 23 | #include <asm/tlbflush.h> | 
|  | 24 |  | 
|  | 25 | void show_mem(void) | 
|  | 26 | { | 
|  | 27 | int total = 0, reserved = 0; | 
|  | 28 | int shared = 0, cached = 0; | 
|  | 29 | int highmem = 0; | 
|  | 30 | struct page *page; | 
|  | 31 | pg_data_t *pgdat; | 
|  | 32 | unsigned long i; | 
| Martin J. Bligh | 6f4e1e5 | 2005-06-23 00:08:08 -0700 | [diff] [blame] | 33 | struct page_state ps; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 34 |  | 
| Christophe Lucas | f90e718 | 2005-06-25 14:59:24 -0700 | [diff] [blame] | 35 | printk(KERN_INFO "Mem-info:\n"); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 36 | show_free_areas(); | 
| Christophe Lucas | f90e718 | 2005-06-25 14:59:24 -0700 | [diff] [blame] | 37 | printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 38 | for_each_pgdat(pgdat) { | 
|  | 39 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { | 
| Dave Hansen | 408fde8 | 2005-06-23 00:07:37 -0700 | [diff] [blame] | 40 | page = pgdat_page_nr(pgdat, i); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 41 | total++; | 
|  | 42 | if (PageHighMem(page)) | 
|  | 43 | highmem++; | 
|  | 44 | if (PageReserved(page)) | 
|  | 45 | reserved++; | 
|  | 46 | else if (PageSwapCache(page)) | 
|  | 47 | cached++; | 
|  | 48 | else if (page_count(page)) | 
|  | 49 | shared += page_count(page) - 1; | 
|  | 50 | } | 
|  | 51 | } | 
| Christophe Lucas | f90e718 | 2005-06-25 14:59:24 -0700 | [diff] [blame] | 52 | printk(KERN_INFO "%d pages of RAM\n", total); | 
|  | 53 | printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); | 
|  | 54 | printk(KERN_INFO "%d reserved pages\n", reserved); | 
|  | 55 | printk(KERN_INFO "%d pages shared\n", shared); | 
|  | 56 | printk(KERN_INFO "%d pages swap cached\n", cached); | 
| Martin J. Bligh | 6f4e1e5 | 2005-06-23 00:08:08 -0700 | [diff] [blame] | 57 |  | 
|  | 58 | get_page_state(&ps); | 
| Christophe Lucas | f90e718 | 2005-06-25 14:59:24 -0700 | [diff] [blame] | 59 | printk(KERN_INFO "%lu pages dirty\n", ps.nr_dirty); | 
|  | 60 | printk(KERN_INFO "%lu pages writeback\n", ps.nr_writeback); | 
|  | 61 | printk(KERN_INFO "%lu pages mapped\n", ps.nr_mapped); | 
|  | 62 | printk(KERN_INFO "%lu pages slab\n", ps.nr_slab); | 
|  | 63 | printk(KERN_INFO "%lu pages pagetables\n", ps.nr_page_table_pages); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 64 | } | 
|  | 65 |  | 
|  | 66 | /* | 
|  | 67 | * Associate a virtual page frame with a given physical page frame | 
|  | 68 | * and protection flags for that frame. | 
|  | 69 | */ | 
|  | 70 | static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) | 
|  | 71 | { | 
|  | 72 | pgd_t *pgd; | 
|  | 73 | pud_t *pud; | 
|  | 74 | pmd_t *pmd; | 
|  | 75 | pte_t *pte; | 
|  | 76 |  | 
|  | 77 | pgd = swapper_pg_dir + pgd_index(vaddr); | 
|  | 78 | if (pgd_none(*pgd)) { | 
|  | 79 | BUG(); | 
|  | 80 | return; | 
|  | 81 | } | 
|  | 82 | pud = pud_offset(pgd, vaddr); | 
|  | 83 | if (pud_none(*pud)) { | 
|  | 84 | BUG(); | 
|  | 85 | return; | 
|  | 86 | } | 
|  | 87 | pmd = pmd_offset(pud, vaddr); | 
|  | 88 | if (pmd_none(*pmd)) { | 
|  | 89 | BUG(); | 
|  | 90 | return; | 
|  | 91 | } | 
|  | 92 | pte = pte_offset_kernel(pmd, vaddr); | 
|  | 93 | /* <pfn,flags> stored as-is, to permit clearing entries */ | 
|  | 94 | set_pte(pte, pfn_pte(pfn, flags)); | 
|  | 95 |  | 
|  | 96 | /* | 
|  | 97 | * It's enough to flush this one mapping. | 
|  | 98 | * (PGE mappings get flushed as well) | 
|  | 99 | */ | 
|  | 100 | __flush_tlb_one(vaddr); | 
|  | 101 | } | 
|  | 102 |  | 
|  | 103 | /* | 
|  | 104 | * Associate a large virtual page frame with a given physical page frame | 
|  | 105 | * and protection flags for that frame. pfn is for the base of the page, | 
|  | 106 | * vaddr is what the page gets mapped to - both must be properly aligned. | 
|  | 107 | * The pmd must already be instantiated. Assumes PAE mode. | 
|  | 108 | */ | 
|  | 109 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) | 
|  | 110 | { | 
|  | 111 | pgd_t *pgd; | 
|  | 112 | pud_t *pud; | 
|  | 113 | pmd_t *pmd; | 
|  | 114 |  | 
|  | 115 | if (vaddr & (PMD_SIZE-1)) {		/* vaddr is misaligned */ | 
| Christophe Lucas | f90e718 | 2005-06-25 14:59:24 -0700 | [diff] [blame] | 116 | printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n"); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 117 | return; /* BUG(); */ | 
|  | 118 | } | 
|  | 119 | if (pfn & (PTRS_PER_PTE-1)) {		/* pfn is misaligned */ | 
| Christophe Lucas | f90e718 | 2005-06-25 14:59:24 -0700 | [diff] [blame] | 120 | printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n"); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 121 | return; /* BUG(); */ | 
|  | 122 | } | 
|  | 123 | pgd = swapper_pg_dir + pgd_index(vaddr); | 
|  | 124 | if (pgd_none(*pgd)) { | 
| Christophe Lucas | f90e718 | 2005-06-25 14:59:24 -0700 | [diff] [blame] | 125 | printk(KERN_WARNING "set_pmd_pfn: pgd_none\n"); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 126 | return; /* BUG(); */ | 
|  | 127 | } | 
|  | 128 | pud = pud_offset(pgd, vaddr); | 
|  | 129 | pmd = pmd_offset(pud, vaddr); | 
|  | 130 | set_pmd(pmd, pfn_pmd(pfn, flags)); | 
|  | 131 | /* | 
|  | 132 | * It's enough to flush this one mapping. | 
|  | 133 | * (PGE mappings get flushed as well) | 
|  | 134 | */ | 
|  | 135 | __flush_tlb_one(vaddr); | 
|  | 136 | } | 
|  | 137 |  | 
|  | 138 | void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) | 
|  | 139 | { | 
|  | 140 | unsigned long address = __fix_to_virt(idx); | 
|  | 141 |  | 
|  | 142 | if (idx >= __end_of_fixed_addresses) { | 
|  | 143 | BUG(); | 
|  | 144 | return; | 
|  | 145 | } | 
|  | 146 | set_pte_pfn(address, phys >> PAGE_SHIFT, flags); | 
|  | 147 | } | 
|  | 148 |  | 
|  | 149 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | 
|  | 150 | { | 
|  | 151 | return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); | 
|  | 152 | } | 
|  | 153 |  | 
|  | 154 | struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) | 
|  | 155 | { | 
|  | 156 | struct page *pte; | 
|  | 157 |  | 
|  | 158 | #ifdef CONFIG_HIGHPTE | 
|  | 159 | pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); | 
|  | 160 | #else | 
|  | 161 | pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); | 
|  | 162 | #endif | 
|  | 163 | return pte; | 
|  | 164 | } | 
|  | 165 |  | 
|  | 166 | void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags) | 
|  | 167 | { | 
|  | 168 | memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); | 
|  | 169 | } | 
|  | 170 |  | 
|  | 171 | /* | 
|  | 172 | * List of all pgd's needed for non-PAE so it can invalidate entries | 
|  | 173 | * in both cached and uncached pgd's; not needed for PAE since the | 
|  | 174 | * kernel pmd is shared. If PAE were not to share the pmd a similar | 
|  | 175 | * tactic would be needed. This is essentially codepath-based locking | 
|  | 176 | * against pageattr.c; it is the unique case in which a valid change | 
|  | 177 | * of kernel pagetables can't be lazily synchronized by vmalloc faults. | 
|  | 178 | * vmalloc faults work because attached pagetables are never freed. | 
|  | 179 | * The locking scheme was chosen on the basis of manfred's | 
|  | 180 | * recommendations and having no core impact whatsoever. | 
|  | 181 | * -- wli | 
|  | 182 | */ | 
|  | 183 | DEFINE_SPINLOCK(pgd_lock); | 
|  | 184 | struct page *pgd_list; | 
|  | 185 |  | 
|  | 186 | static inline void pgd_list_add(pgd_t *pgd) | 
|  | 187 | { | 
|  | 188 | struct page *page = virt_to_page(pgd); | 
|  | 189 | page->index = (unsigned long)pgd_list; | 
|  | 190 | if (pgd_list) | 
|  | 191 | pgd_list->private = (unsigned long)&page->index; | 
|  | 192 | pgd_list = page; | 
|  | 193 | page->private = (unsigned long)&pgd_list; | 
|  | 194 | } | 
|  | 195 |  | 
|  | 196 | static inline void pgd_list_del(pgd_t *pgd) | 
|  | 197 | { | 
|  | 198 | struct page *next, **pprev, *page = virt_to_page(pgd); | 
|  | 199 | next = (struct page *)page->index; | 
|  | 200 | pprev = (struct page **)page->private; | 
|  | 201 | *pprev = next; | 
|  | 202 | if (next) | 
|  | 203 | next->private = (unsigned long)pprev; | 
|  | 204 | } | 
|  | 205 |  | 
|  | 206 | void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) | 
|  | 207 | { | 
|  | 208 | unsigned long flags; | 
|  | 209 |  | 
|  | 210 | if (PTRS_PER_PMD == 1) | 
|  | 211 | spin_lock_irqsave(&pgd_lock, flags); | 
|  | 212 |  | 
|  | 213 | memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD, | 
|  | 214 | swapper_pg_dir + USER_PTRS_PER_PGD, | 
|  | 215 | (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); | 
|  | 216 |  | 
|  | 217 | if (PTRS_PER_PMD > 1) | 
|  | 218 | return; | 
|  | 219 |  | 
|  | 220 | pgd_list_add(pgd); | 
|  | 221 | spin_unlock_irqrestore(&pgd_lock, flags); | 
|  | 222 | memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); | 
|  | 223 | } | 
|  | 224 |  | 
|  | 225 | /* never called when PTRS_PER_PMD > 1 */ | 
|  | 226 | void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused) | 
|  | 227 | { | 
|  | 228 | unsigned long flags; /* can be called from interrupt context */ | 
|  | 229 |  | 
|  | 230 | spin_lock_irqsave(&pgd_lock, flags); | 
|  | 231 | pgd_list_del(pgd); | 
|  | 232 | spin_unlock_irqrestore(&pgd_lock, flags); | 
|  | 233 | } | 
|  | 234 |  | 
|  | 235 | pgd_t *pgd_alloc(struct mm_struct *mm) | 
|  | 236 | { | 
|  | 237 | int i; | 
|  | 238 | pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL); | 
|  | 239 |  | 
|  | 240 | if (PTRS_PER_PMD == 1 || !pgd) | 
|  | 241 | return pgd; | 
|  | 242 |  | 
|  | 243 | for (i = 0; i < USER_PTRS_PER_PGD; ++i) { | 
|  | 244 | pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); | 
|  | 245 | if (!pmd) | 
|  | 246 | goto out_oom; | 
|  | 247 | set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); | 
|  | 248 | } | 
|  | 249 | return pgd; | 
|  | 250 |  | 
|  | 251 | out_oom: | 
|  | 252 | for (i--; i >= 0; i--) | 
|  | 253 | kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); | 
|  | 254 | kmem_cache_free(pgd_cache, pgd); | 
|  | 255 | return NULL; | 
|  | 256 | } | 
|  | 257 |  | 
|  | 258 | void pgd_free(pgd_t *pgd) | 
|  | 259 | { | 
|  | 260 | int i; | 
|  | 261 |  | 
|  | 262 | /* in the PAE case user pgd entries are overwritten before usage */ | 
|  | 263 | if (PTRS_PER_PMD > 1) | 
|  | 264 | for (i = 0; i < USER_PTRS_PER_PGD; ++i) | 
|  | 265 | kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); | 
| Hugh Dickins | e0da382 | 2005-04-19 13:29:15 -0700 | [diff] [blame] | 266 | /* in the non-PAE case, free_pgtables() clears user pgd entries */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 267 | kmem_cache_free(pgd_cache, pgd); | 
|  | 268 | } |