| Martin Schwidefsky | 3610cce | 2007-10-22 12:52:47 +0200 | [diff] [blame] | 1 | /* | 
|  | 2 | *  arch/s390/mm/pgtable.c | 
|  | 3 | * | 
|  | 4 | *    Copyright IBM Corp. 2007 | 
|  | 5 | *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> | 
|  | 6 | */ | 
|  | 7 |  | 
|  | 8 | #include <linux/sched.h> | 
|  | 9 | #include <linux/kernel.h> | 
|  | 10 | #include <linux/errno.h> | 
|  | 11 | #include <linux/mm.h> | 
|  | 12 | #include <linux/swap.h> | 
|  | 13 | #include <linux/smp.h> | 
|  | 14 | #include <linux/highmem.h> | 
|  | 15 | #include <linux/slab.h> | 
|  | 16 | #include <linux/pagemap.h> | 
|  | 17 | #include <linux/spinlock.h> | 
|  | 18 | #include <linux/module.h> | 
|  | 19 | #include <linux/quicklist.h> | 
|  | 20 |  | 
|  | 21 | #include <asm/system.h> | 
|  | 22 | #include <asm/pgtable.h> | 
|  | 23 | #include <asm/pgalloc.h> | 
|  | 24 | #include <asm/tlb.h> | 
|  | 25 | #include <asm/tlbflush.h> | 
| Martin Schwidefsky | 6252d70 | 2008-02-09 18:24:37 +0100 | [diff] [blame] | 26 | #include <asm/mmu_context.h> | 
| Martin Schwidefsky | 3610cce | 2007-10-22 12:52:47 +0200 | [diff] [blame] | 27 |  | 
|  | 28 | #ifndef CONFIG_64BIT | 
|  | 29 | #define ALLOC_ORDER	1 | 
| Martin Schwidefsky | 146e4b3 | 2008-02-09 18:24:35 +0100 | [diff] [blame] | 30 | #define TABLES_PER_PAGE	4 | 
|  | 31 | #define FRAG_MASK	15UL | 
|  | 32 | #define SECOND_HALVES	10UL | 
| Carsten Otte | 402b086 | 2008-03-25 18:47:10 +0100 | [diff] [blame] | 33 |  | 
|  | 34 | void clear_table_pgstes(unsigned long *table) | 
|  | 35 | { | 
|  | 36 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); | 
|  | 37 | memset(table + 256, 0, PAGE_SIZE/4); | 
|  | 38 | clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); | 
|  | 39 | memset(table + 768, 0, PAGE_SIZE/4); | 
|  | 40 | } | 
|  | 41 |  | 
| Martin Schwidefsky | 3610cce | 2007-10-22 12:52:47 +0200 | [diff] [blame] | 42 | #else | 
|  | 43 | #define ALLOC_ORDER	2 | 
| Martin Schwidefsky | 146e4b3 | 2008-02-09 18:24:35 +0100 | [diff] [blame] | 44 | #define TABLES_PER_PAGE	2 | 
|  | 45 | #define FRAG_MASK	3UL | 
|  | 46 | #define SECOND_HALVES	2UL | 
| Carsten Otte | 402b086 | 2008-03-25 18:47:10 +0100 | [diff] [blame] | 47 |  | 
|  | 48 | void clear_table_pgstes(unsigned long *table) | 
|  | 49 | { | 
|  | 50 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); | 
|  | 51 | memset(table + 256, 0, PAGE_SIZE/2); | 
|  | 52 | } | 
|  | 53 |  | 
| Martin Schwidefsky | 3610cce | 2007-10-22 12:52:47 +0200 | [diff] [blame] | 54 | #endif | 
|  | 55 |  | 
|  | 56 | unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) | 
|  | 57 | { | 
|  | 58 | struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | 
|  | 59 |  | 
|  | 60 | if (!page) | 
|  | 61 | return NULL; | 
|  | 62 | page->index = 0; | 
|  | 63 | if (noexec) { | 
|  | 64 | struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | 
|  | 65 | if (!shadow) { | 
|  | 66 | __free_pages(page, ALLOC_ORDER); | 
|  | 67 | return NULL; | 
|  | 68 | } | 
|  | 69 | page->index = page_to_phys(shadow); | 
|  | 70 | } | 
| Martin Schwidefsky | 146e4b3 | 2008-02-09 18:24:35 +0100 | [diff] [blame] | 71 | spin_lock(&mm->page_table_lock); | 
|  | 72 | list_add(&page->lru, &mm->context.crst_list); | 
|  | 73 | spin_unlock(&mm->page_table_lock); | 
| Martin Schwidefsky | 3610cce | 2007-10-22 12:52:47 +0200 | [diff] [blame] | 74 | return (unsigned long *) page_to_phys(page); | 
|  | 75 | } | 
|  | 76 |  | 
| Martin Schwidefsky | 146e4b3 | 2008-02-09 18:24:35 +0100 | [diff] [blame] | 77 | void crst_table_free(struct mm_struct *mm, unsigned long *table) | 
| Martin Schwidefsky | 3610cce | 2007-10-22 12:52:47 +0200 | [diff] [blame] | 78 | { | 
|  | 79 | unsigned long *shadow = get_shadow_table(table); | 
| Martin Schwidefsky | 146e4b3 | 2008-02-09 18:24:35 +0100 | [diff] [blame] | 80 | struct page *page = virt_to_page(table); | 
| Martin Schwidefsky | 3610cce | 2007-10-22 12:52:47 +0200 | [diff] [blame] | 81 |  | 
| Martin Schwidefsky | 146e4b3 | 2008-02-09 18:24:35 +0100 | [diff] [blame] | 82 | spin_lock(&mm->page_table_lock); | 
|  | 83 | list_del(&page->lru); | 
|  | 84 | spin_unlock(&mm->page_table_lock); | 
| Martin Schwidefsky | 3610cce | 2007-10-22 12:52:47 +0200 | [diff] [blame] | 85 | if (shadow) | 
|  | 86 | free_pages((unsigned long) shadow, ALLOC_ORDER); | 
|  | 87 | free_pages((unsigned long) table, ALLOC_ORDER); | 
|  | 88 | } | 
|  | 89 |  | 
| Martin Schwidefsky | 6252d70 | 2008-02-09 18:24:37 +0100 | [diff] [blame] | 90 | #ifdef CONFIG_64BIT | 
|  | 91 | int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) | 
|  | 92 | { | 
|  | 93 | unsigned long *table, *pgd; | 
|  | 94 | unsigned long entry; | 
|  | 95 |  | 
|  | 96 | BUG_ON(limit > (1UL << 53)); | 
|  | 97 | repeat: | 
|  | 98 | table = crst_table_alloc(mm, mm->context.noexec); | 
|  | 99 | if (!table) | 
|  | 100 | return -ENOMEM; | 
|  | 101 | spin_lock(&mm->page_table_lock); | 
|  | 102 | if (mm->context.asce_limit < limit) { | 
|  | 103 | pgd = (unsigned long *) mm->pgd; | 
|  | 104 | if (mm->context.asce_limit <= (1UL << 31)) { | 
|  | 105 | entry = _REGION3_ENTRY_EMPTY; | 
|  | 106 | mm->context.asce_limit = 1UL << 42; | 
|  | 107 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | 
|  | 108 | _ASCE_USER_BITS | | 
|  | 109 | _ASCE_TYPE_REGION3; | 
|  | 110 | } else { | 
|  | 111 | entry = _REGION2_ENTRY_EMPTY; | 
|  | 112 | mm->context.asce_limit = 1UL << 53; | 
|  | 113 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | 
|  | 114 | _ASCE_USER_BITS | | 
|  | 115 | _ASCE_TYPE_REGION2; | 
|  | 116 | } | 
|  | 117 | crst_table_init(table, entry); | 
|  | 118 | pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); | 
|  | 119 | mm->pgd = (pgd_t *) table; | 
|  | 120 | table = NULL; | 
|  | 121 | } | 
|  | 122 | spin_unlock(&mm->page_table_lock); | 
|  | 123 | if (table) | 
|  | 124 | crst_table_free(mm, table); | 
|  | 125 | if (mm->context.asce_limit < limit) | 
|  | 126 | goto repeat; | 
|  | 127 | update_mm(mm, current); | 
|  | 128 | return 0; | 
|  | 129 | } | 
|  | 130 |  | 
|  | 131 | void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) | 
|  | 132 | { | 
|  | 133 | pgd_t *pgd; | 
|  | 134 |  | 
|  | 135 | if (mm->context.asce_limit <= limit) | 
|  | 136 | return; | 
|  | 137 | __tlb_flush_mm(mm); | 
|  | 138 | while (mm->context.asce_limit > limit) { | 
|  | 139 | pgd = mm->pgd; | 
|  | 140 | switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { | 
|  | 141 | case _REGION_ENTRY_TYPE_R2: | 
|  | 142 | mm->context.asce_limit = 1UL << 42; | 
|  | 143 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | 
|  | 144 | _ASCE_USER_BITS | | 
|  | 145 | _ASCE_TYPE_REGION3; | 
|  | 146 | break; | 
|  | 147 | case _REGION_ENTRY_TYPE_R3: | 
|  | 148 | mm->context.asce_limit = 1UL << 31; | 
|  | 149 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | 
|  | 150 | _ASCE_USER_BITS | | 
|  | 151 | _ASCE_TYPE_SEGMENT; | 
|  | 152 | break; | 
|  | 153 | default: | 
|  | 154 | BUG(); | 
|  | 155 | } | 
|  | 156 | mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); | 
|  | 157 | crst_table_free(mm, (unsigned long *) pgd); | 
|  | 158 | } | 
|  | 159 | update_mm(mm, current); | 
|  | 160 | } | 
|  | 161 | #endif | 
|  | 162 |  | 
| Martin Schwidefsky | 3610cce | 2007-10-22 12:52:47 +0200 | [diff] [blame] | 163 | /* | 
|  | 164 | * page table entry allocation/free routines. | 
|  | 165 | */ | 
| Martin Schwidefsky | 146e4b3 | 2008-02-09 18:24:35 +0100 | [diff] [blame] | 166 | unsigned long *page_table_alloc(struct mm_struct *mm) | 
| Martin Schwidefsky | 3610cce | 2007-10-22 12:52:47 +0200 | [diff] [blame] | 167 | { | 
| Martin Schwidefsky | 146e4b3 | 2008-02-09 18:24:35 +0100 | [diff] [blame] | 168 | struct page *page; | 
| Martin Schwidefsky | 3610cce | 2007-10-22 12:52:47 +0200 | [diff] [blame] | 169 | unsigned long *table; | 
| Martin Schwidefsky | 146e4b3 | 2008-02-09 18:24:35 +0100 | [diff] [blame] | 170 | unsigned long bits; | 
| Martin Schwidefsky | 3610cce | 2007-10-22 12:52:47 +0200 | [diff] [blame] | 171 |  | 
| Carsten Otte | 402b086 | 2008-03-25 18:47:10 +0100 | [diff] [blame] | 172 | bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; | 
| Martin Schwidefsky | 146e4b3 | 2008-02-09 18:24:35 +0100 | [diff] [blame] | 173 | spin_lock(&mm->page_table_lock); | 
|  | 174 | page = NULL; | 
|  | 175 | if (!list_empty(&mm->context.pgtable_list)) { | 
|  | 176 | page = list_first_entry(&mm->context.pgtable_list, | 
|  | 177 | struct page, lru); | 
|  | 178 | if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) | 
|  | 179 | page = NULL; | 
| Martin Schwidefsky | 3610cce | 2007-10-22 12:52:47 +0200 | [diff] [blame] | 180 | } | 
| Martin Schwidefsky | 146e4b3 | 2008-02-09 18:24:35 +0100 | [diff] [blame] | 181 | if (!page) { | 
|  | 182 | spin_unlock(&mm->page_table_lock); | 
|  | 183 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); | 
|  | 184 | if (!page) | 
|  | 185 | return NULL; | 
|  | 186 | pgtable_page_ctor(page); | 
|  | 187 | page->flags &= ~FRAG_MASK; | 
|  | 188 | table = (unsigned long *) page_to_phys(page); | 
| Carsten Otte | 402b086 | 2008-03-25 18:47:10 +0100 | [diff] [blame] | 189 | if (mm->context.pgstes) | 
|  | 190 | clear_table_pgstes(table); | 
|  | 191 | else | 
|  | 192 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); | 
| Martin Schwidefsky | 146e4b3 | 2008-02-09 18:24:35 +0100 | [diff] [blame] | 193 | spin_lock(&mm->page_table_lock); | 
|  | 194 | list_add(&page->lru, &mm->context.pgtable_list); | 
|  | 195 | } | 
| Martin Schwidefsky | 3610cce | 2007-10-22 12:52:47 +0200 | [diff] [blame] | 196 | table = (unsigned long *) page_to_phys(page); | 
| Martin Schwidefsky | 146e4b3 | 2008-02-09 18:24:35 +0100 | [diff] [blame] | 197 | while (page->flags & bits) { | 
|  | 198 | table += 256; | 
|  | 199 | bits <<= 1; | 
|  | 200 | } | 
|  | 201 | page->flags |= bits; | 
|  | 202 | if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) | 
|  | 203 | list_move_tail(&page->lru, &mm->context.pgtable_list); | 
|  | 204 | spin_unlock(&mm->page_table_lock); | 
| Martin Schwidefsky | 3610cce | 2007-10-22 12:52:47 +0200 | [diff] [blame] | 205 | return table; | 
|  | 206 | } | 
|  | 207 |  | 
| Martin Schwidefsky | 146e4b3 | 2008-02-09 18:24:35 +0100 | [diff] [blame] | 208 | void page_table_free(struct mm_struct *mm, unsigned long *table) | 
| Martin Schwidefsky | 3610cce | 2007-10-22 12:52:47 +0200 | [diff] [blame] | 209 | { | 
| Martin Schwidefsky | 146e4b3 | 2008-02-09 18:24:35 +0100 | [diff] [blame] | 210 | struct page *page; | 
|  | 211 | unsigned long bits; | 
| Martin Schwidefsky | 3610cce | 2007-10-22 12:52:47 +0200 | [diff] [blame] | 212 |  | 
| Carsten Otte | 402b086 | 2008-03-25 18:47:10 +0100 | [diff] [blame] | 213 | bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; | 
| Martin Schwidefsky | 146e4b3 | 2008-02-09 18:24:35 +0100 | [diff] [blame] | 214 | bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); | 
|  | 215 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 
|  | 216 | spin_lock(&mm->page_table_lock); | 
|  | 217 | page->flags ^= bits; | 
|  | 218 | if (page->flags & FRAG_MASK) { | 
|  | 219 | /* Page now has some free pgtable fragments. */ | 
|  | 220 | list_move(&page->lru, &mm->context.pgtable_list); | 
|  | 221 | page = NULL; | 
|  | 222 | } else | 
|  | 223 | /* All fragments of the 4K page have been freed. */ | 
|  | 224 | list_del(&page->lru); | 
|  | 225 | spin_unlock(&mm->page_table_lock); | 
|  | 226 | if (page) { | 
|  | 227 | pgtable_page_dtor(page); | 
|  | 228 | __free_page(page); | 
|  | 229 | } | 
|  | 230 | } | 
| Martin Schwidefsky | 3610cce | 2007-10-22 12:52:47 +0200 | [diff] [blame] | 231 |  | 
| Martin Schwidefsky | 146e4b3 | 2008-02-09 18:24:35 +0100 | [diff] [blame] | 232 | void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) | 
|  | 233 | { | 
|  | 234 | struct page *page; | 
|  | 235 |  | 
|  | 236 | spin_lock(&mm->page_table_lock); | 
|  | 237 | /* Free shadow region and segment tables. */ | 
|  | 238 | list_for_each_entry(page, &mm->context.crst_list, lru) | 
|  | 239 | if (page->index) { | 
|  | 240 | free_pages((unsigned long) page->index, ALLOC_ORDER); | 
|  | 241 | page->index = 0; | 
|  | 242 | } | 
|  | 243 | /* "Free" second halves of page tables. */ | 
|  | 244 | list_for_each_entry(page, &mm->context.pgtable_list, lru) | 
|  | 245 | page->flags &= ~SECOND_HALVES; | 
|  | 246 | spin_unlock(&mm->page_table_lock); | 
|  | 247 | mm->context.noexec = 0; | 
|  | 248 | update_mm(mm, tsk); | 
| Martin Schwidefsky | 3610cce | 2007-10-22 12:52:47 +0200 | [diff] [blame] | 249 | } | 
| Carsten Otte | 402b086 | 2008-03-25 18:47:10 +0100 | [diff] [blame] | 250 |  | 
|  | 251 | /* | 
|  | 252 | * switch on pgstes for its userspace process (for kvm) | 
|  | 253 | */ | 
|  | 254 | int s390_enable_sie(void) | 
|  | 255 | { | 
|  | 256 | struct task_struct *tsk = current; | 
| Christian Borntraeger | 74b6b52 | 2008-05-21 13:37:29 +0200 | [diff] [blame] | 257 | struct mm_struct *mm, *old_mm; | 
| Carsten Otte | 402b086 | 2008-03-25 18:47:10 +0100 | [diff] [blame] | 258 |  | 
| Christian Borntraeger | 74b6b52 | 2008-05-21 13:37:29 +0200 | [diff] [blame] | 259 | /* Do we have pgstes? if yes, we are done */ | 
| Carsten Otte | 402b086 | 2008-03-25 18:47:10 +0100 | [diff] [blame] | 260 | if (tsk->mm->context.pgstes) | 
| Christian Borntraeger | 74b6b52 | 2008-05-21 13:37:29 +0200 | [diff] [blame] | 261 | return 0; | 
| Carsten Otte | 402b086 | 2008-03-25 18:47:10 +0100 | [diff] [blame] | 262 |  | 
| Christian Borntraeger | 74b6b52 | 2008-05-21 13:37:29 +0200 | [diff] [blame] | 263 | /* lets check if we are allowed to replace the mm */ | 
|  | 264 | task_lock(tsk); | 
| Carsten Otte | 402b086 | 2008-03-25 18:47:10 +0100 | [diff] [blame] | 265 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | 
| Christian Borntraeger | 74b6b52 | 2008-05-21 13:37:29 +0200 | [diff] [blame] | 266 | tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) { | 
|  | 267 | task_unlock(tsk); | 
|  | 268 | return -EINVAL; | 
|  | 269 | } | 
|  | 270 | task_unlock(tsk); | 
| Carsten Otte | 402b086 | 2008-03-25 18:47:10 +0100 | [diff] [blame] | 271 |  | 
| Christian Borntraeger | 74b6b52 | 2008-05-21 13:37:29 +0200 | [diff] [blame] | 272 | /* we copy the mm with pgstes enabled */ | 
|  | 273 | tsk->mm->context.pgstes = 1; | 
| Carsten Otte | 402b086 | 2008-03-25 18:47:10 +0100 | [diff] [blame] | 274 | mm = dup_mm(tsk); | 
|  | 275 | tsk->mm->context.pgstes = 0; | 
| Carsten Otte | 402b086 | 2008-03-25 18:47:10 +0100 | [diff] [blame] | 276 | if (!mm) | 
| Christian Borntraeger | 74b6b52 | 2008-05-21 13:37:29 +0200 | [diff] [blame] | 277 | return -ENOMEM; | 
|  | 278 |  | 
|  | 279 | /* Now lets check again if somebody attached ptrace etc */ | 
|  | 280 | task_lock(tsk); | 
|  | 281 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | 
|  | 282 | tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) { | 
|  | 283 | mmput(mm); | 
|  | 284 | task_unlock(tsk); | 
|  | 285 | return -EINVAL; | 
|  | 286 | } | 
|  | 287 |  | 
|  | 288 | /* ok, we are alone. No ptrace, no threads, etc. */ | 
|  | 289 | old_mm = tsk->mm; | 
| Carsten Otte | 402b086 | 2008-03-25 18:47:10 +0100 | [diff] [blame] | 290 | tsk->mm = tsk->active_mm = mm; | 
|  | 291 | preempt_disable(); | 
|  | 292 | update_mm(mm, tsk); | 
|  | 293 | cpu_set(smp_processor_id(), mm->cpu_vm_mask); | 
|  | 294 | preempt_enable(); | 
| Carsten Otte | 402b086 | 2008-03-25 18:47:10 +0100 | [diff] [blame] | 295 | task_unlock(tsk); | 
| Christian Borntraeger | 74b6b52 | 2008-05-21 13:37:29 +0200 | [diff] [blame] | 296 | mmput(old_mm); | 
|  | 297 | return 0; | 
| Carsten Otte | 402b086 | 2008-03-25 18:47:10 +0100 | [diff] [blame] | 298 | } | 
|  | 299 | EXPORT_SYMBOL_GPL(s390_enable_sie); |