blob: 08d0117d90fa1a615abaedd1845ca4b797a9727e [file] [log] [blame]
Paul Mundt26ff6c12006-09-27 15:13:36 +09001/*
2 * Page fault handler for SH with an MMU.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 * Copyright (C) 1999 Niibe Yutaka
Paul Mundt0f60bb22009-07-05 03:18:47 +09005 * Copyright (C) 2003 - 2009 Paul Mundt
Linus Torvalds1da177e2005-04-16 15:20:36 -07006 *
7 * Based on linux/arch/i386/mm/fault.c:
8 * Copyright (C) 1995 Linus Torvalds
Paul Mundt26ff6c12006-09-27 15:13:36 +09009 *
10 * This file is subject to the terms and conditions of the GNU General Public
11 * License. See the file "COPYING" in the main directory of this archive
12 * for more details.
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include <linux/mm.h>
Paul Mundt0f08f332006-09-27 17:03:56 +090016#include <linux/hardirq.h>
17#include <linux/kprobes.h>
Paul Mundt7433ab7702009-06-25 02:30:10 +090018#include <linux/perf_counter.h>
Magnus Damme7cc9a72008-02-07 20:18:21 +090019#include <asm/io_trapped.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <asm/system.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <asm/mmu_context.h>
Paul Mundtdb2e1fa2007-02-14 14:13:10 +090022#include <asm/tlbflush.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023
Paul Mundt7433ab7702009-06-25 02:30:10 +090024static inline int notify_page_fault(struct pt_regs *regs, int trap)
25{
26 int ret = 0;
27
Paul Mundtc63c3102009-07-05 02:50:10 +090028 if (kprobes_built_in() && !user_mode(regs)) {
Paul Mundt7433ab7702009-06-25 02:30:10 +090029 preempt_disable();
30 if (kprobe_running() && kprobe_fault_handler(regs, trap))
31 ret = 1;
32 preempt_enable();
33 }
Paul Mundt7433ab7702009-06-25 02:30:10 +090034
35 return ret;
36}
37
Paul Mundt0f60bb22009-07-05 03:18:47 +090038static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
39{
40 unsigned index = pgd_index(address);
41 pgd_t *pgd_k;
42 pud_t *pud, *pud_k;
43 pmd_t *pmd, *pmd_k;
44
45 pgd += index;
46 pgd_k = init_mm.pgd + index;
47
48 if (!pgd_present(*pgd_k))
49 return NULL;
50
51 pud = pud_offset(pgd, address);
52 pud_k = pud_offset(pgd_k, address);
53 if (!pud_present(*pud_k))
54 return NULL;
55
56 pmd = pmd_offset(pud, address);
57 pmd_k = pmd_offset(pud_k, address);
58 if (!pmd_present(*pmd_k))
59 return NULL;
60
61 if (!pmd_present(*pmd))
62 set_pmd(pmd, *pmd_k);
63 else
64 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
65
66 return pmd_k;
67}
68
69/*
70 * Handle a fault on the vmalloc or module mapping area
71 */
72static noinline int vmalloc_fault(unsigned long address)
73{
74 pgd_t *pgd_k;
75 pmd_t *pmd_k;
76 pte_t *pte_k;
77
78 /* Make sure we are in vmalloc area: */
79 if (!(address >= VMALLOC_START && address < VMALLOC_END))
80 return -1;
81
82 /*
83 * Synchronize this task's top level page-table
84 * with the 'reference' page table.
85 *
86 * Do _not_ use "current" here. We might be inside
87 * an interrupt in the middle of a task switch..
88 */
89 pgd_k = get_TTB();
90 pmd_k = vmalloc_sync_one(__va((unsigned long)pgd_k), address);
91 if (!pmd_k)
92 return -1;
93
94 pte_k = pte_offset_kernel(pmd_k, address);
95 if (!pte_present(*pte_k))
96 return -1;
97
98 return 0;
99}
100
101static int fault_in_kernel_space(unsigned long address)
102{
103 return address >= TASK_SIZE;
104}
105
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106/*
107 * This routine handles page faults. It determines the address,
108 * and the problem, and then passes it off to one of the appropriate
109 * routines.
110 */
Stuart Menefyb5a1bcb2006-11-21 13:34:04 +0900111asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
112 unsigned long writeaccess,
113 unsigned long address)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114{
Paul Mundt0f60bb22009-07-05 03:18:47 +0900115 unsigned long vec;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116 struct task_struct *tsk;
117 struct mm_struct *mm;
118 struct vm_area_struct * vma;
Stuart Menefyb5a1bcb2006-11-21 13:34:04 +0900119 int si_code;
Nick Piggin83c54072007-07-19 01:47:05 -0700120 int fault;
Stuart Menefyb5a1bcb2006-11-21 13:34:04 +0900121 siginfo_t info;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123 tsk = current;
Paul Mundt0f60bb22009-07-05 03:18:47 +0900124 mm = tsk->mm;
Stuart Menefyb5a1bcb2006-11-21 13:34:04 +0900125 si_code = SEGV_MAPERR;
Paul Mundt0f60bb22009-07-05 03:18:47 +0900126 vec = lookup_exception_vector();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127
Paul Mundt0f60bb22009-07-05 03:18:47 +0900128 /*
129 * We fault-in kernel-space virtual memory on-demand. The
130 * 'reference' page table is init_mm.pgd.
131 *
132 * NOTE! We MUST NOT take any locks for this case. We may
133 * be in an interrupt or a critical region, and should
134 * only copy the information from the master page table,
135 * nothing more.
136 */
137 if (unlikely(fault_in_kernel_space(address))) {
138 if (vmalloc_fault(address) >= 0)
Stuart Menefy99a596f2006-11-21 15:38:05 +0900139 return;
Paul Mundt0f60bb22009-07-05 03:18:47 +0900140 if (notify_page_fault(regs, vec))
Stuart Menefy96e14e52008-09-05 16:17:15 +0900141 return;
Stuart Menefy99a596f2006-11-21 15:38:05 +0900142
Paul Mundt0f60bb22009-07-05 03:18:47 +0900143 goto bad_area_nosemaphore;
Stuart Menefy99a596f2006-11-21 15:38:05 +0900144 }
145
Paul Mundt0f60bb22009-07-05 03:18:47 +0900146 if (unlikely(notify_page_fault(regs, vec)))
Paul Mundt7433ab7702009-06-25 02:30:10 +0900147 return;
148
149 /* Only enable interrupts if they were on before the fault */
150 if ((regs->sr & SR_IMASK) != SR_IMASK)
151 local_irq_enable();
152
153 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
154
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155 /*
Paul Mundt0f60bb22009-07-05 03:18:47 +0900156 * If we're in an interrupt, have no user context or are running
157 * in an atomic region then we must not take the fault:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 */
159 if (in_atomic() || !mm)
160 goto no_context;
161
162 down_read(&mm->mmap_sem);
163
164 vma = find_vma(mm, address);
165 if (!vma)
166 goto bad_area;
167 if (vma->vm_start <= address)
168 goto good_area;
169 if (!(vma->vm_flags & VM_GROWSDOWN))
170 goto bad_area;
171 if (expand_stack(vma, address))
172 goto bad_area;
Paul Mundt0f60bb22009-07-05 03:18:47 +0900173
174 /*
175 * Ok, we have a good vm_area for this memory access, so
176 * we can handle it..
177 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178good_area:
Stuart Menefyb5a1bcb2006-11-21 13:34:04 +0900179 si_code = SEGV_ACCERR;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 if (writeaccess) {
181 if (!(vma->vm_flags & VM_WRITE))
182 goto bad_area;
183 } else {
Jason Barondf67b3d2006-09-29 01:58:58 -0700184 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185 goto bad_area;
186 }
187
188 /*
189 * If for any reason at all we couldn't handle the fault,
190 * make sure we exit gracefully rather than endlessly redo
191 * the fault.
192 */
193survive:
Linus Torvaldsd06063c2009-04-10 09:01:23 -0700194 fault = handle_mm_fault(mm, vma, address, writeaccess ? FAULT_FLAG_WRITE : 0);
Nick Piggin83c54072007-07-19 01:47:05 -0700195 if (unlikely(fault & VM_FAULT_ERROR)) {
196 if (fault & VM_FAULT_OOM)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 goto out_of_memory;
Nick Piggin83c54072007-07-19 01:47:05 -0700198 else if (fault & VM_FAULT_SIGBUS)
199 goto do_sigbus;
200 BUG();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 }
Paul Mundt7433ab7702009-06-25 02:30:10 +0900202 if (fault & VM_FAULT_MAJOR) {
Nick Piggin83c54072007-07-19 01:47:05 -0700203 tsk->maj_flt++;
Paul Mundt7433ab7702009-06-25 02:30:10 +0900204 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
205 regs, address);
206 } else {
Nick Piggin83c54072007-07-19 01:47:05 -0700207 tsk->min_flt++;
Paul Mundt7433ab7702009-06-25 02:30:10 +0900208 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
209 regs, address);
210 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211
212 up_read(&mm->mmap_sem);
213 return;
214
Paul Mundt0f60bb22009-07-05 03:18:47 +0900215 /*
216 * Something tried to access memory that isn't in our memory map..
217 * Fix it, but check if it's kernel or user first..
218 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219bad_area:
220 up_read(&mm->mmap_sem);
221
Stuart Menefy99a596f2006-11-21 15:38:05 +0900222bad_area_nosemaphore:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223 if (user_mode(regs)) {
Stuart Menefyb5a1bcb2006-11-21 13:34:04 +0900224 info.si_signo = SIGSEGV;
225 info.si_errno = 0;
226 info.si_code = si_code;
227 info.si_addr = (void *) address;
228 force_sig_info(SIGSEGV, &info, tsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 return;
230 }
231
232no_context:
233 /* Are we prepared to handle this kernel fault? */
234 if (fixup_exception(regs))
235 return;
236
Magnus Damme7cc9a72008-02-07 20:18:21 +0900237 if (handle_trapped_io(regs, address))
238 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239/*
240 * Oops. The kernel tried to access some bad page. We'll have to
241 * terminate things with extreme prejudice.
242 *
243 */
Paul Mundt0630e452007-06-18 19:02:47 +0900244
245 bust_spinlocks(1);
246
247 if (oops_may_print()) {
Paul Mundtb62ad832008-01-10 14:07:03 +0900248 unsigned long page;
Paul Mundt0630e452007-06-18 19:02:47 +0900249
250 if (address < PAGE_SIZE)
251 printk(KERN_ALERT "Unable to handle kernel NULL "
252 "pointer dereference");
253 else
254 printk(KERN_ALERT "Unable to handle kernel paging "
255 "request");
256 printk(" at virtual address %08lx\n", address);
257 printk(KERN_ALERT "pc = %08lx\n", regs->pc);
258 page = (unsigned long)get_TTB();
259 if (page) {
Paul Mundt06f862c2007-08-01 16:39:51 +0900260 page = ((__typeof__(page) *)page)[address >> PGDIR_SHIFT];
Paul Mundt0630e452007-06-18 19:02:47 +0900261 printk(KERN_ALERT "*pde = %08lx\n", page);
262 if (page & _PAGE_PRESENT) {
263 page &= PAGE_MASK;
264 address &= 0x003ff000;
265 page = ((__typeof__(page) *)
266 __va(page))[address >>
267 PAGE_SHIFT];
268 printk(KERN_ALERT "*pte = %08lx\n", page);
269 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270 }
271 }
Paul Mundt0630e452007-06-18 19:02:47 +0900272
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273 die("Oops", regs, writeaccess);
Paul Mundt0630e452007-06-18 19:02:47 +0900274 bust_spinlocks(0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 do_exit(SIGKILL);
276
277/*
278 * We ran out of memory, or some other thing happened to us that made
279 * us unable to handle the page fault gracefully.
280 */
281out_of_memory:
282 up_read(&mm->mmap_sem);
Serge E. Hallynb460cbc2007-10-18 23:39:52 -0700283 if (is_global_init(current)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 yield();
285 down_read(&mm->mmap_sem);
286 goto survive;
287 }
288 printk("VM: killing process %s\n", tsk->comm);
289 if (user_mode(regs))
Will Schmidtdcca2bd2007-10-16 01:24:18 -0700290 do_group_exit(SIGKILL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 goto no_context;
292
293do_sigbus:
294 up_read(&mm->mmap_sem);
295
296 /*
297 * Send a sigbus, regardless of whether we were in kernel
298 * or user mode.
299 */
Stuart Menefyb5a1bcb2006-11-21 13:34:04 +0900300 info.si_signo = SIGBUS;
301 info.si_errno = 0;
302 info.si_code = BUS_ADRERR;
303 info.si_addr = (void *)address;
304 force_sig_info(SIGBUS, &info, tsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305
306 /* Kernel mode? Handle exceptions or die */
307 if (!user_mode(regs))
308 goto no_context;
309}
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900310
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900311/*
312 * Called with interrupts disabled.
313 */
314asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
315 unsigned long writeaccess,
316 unsigned long address)
317{
318 pgd_t *pgd;
319 pud_t *pud;
320 pmd_t *pmd;
321 pte_t *pte;
322 pte_t entry;
Paul Mundt7433ab7702009-06-25 02:30:10 +0900323 int ret = 1;
Paul Mundt3d586952008-09-21 13:56:39 +0900324
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900325 /*
326 * We don't take page faults for P1, P2, and parts of P4, these
327 * are always mapped, whether it be due to legacy behaviour in
328 * 29-bit mode, or due to PMB configuration in 32-bit mode.
329 */
330 if (address >= P3SEG && address < P3_ADDR_MAX) {
331 pgd = pgd_offset_k(address);
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900332 } else {
Paul Mundt0f1a3942007-11-19 13:05:18 +0900333 if (unlikely(address >= TASK_SIZE || !current->mm))
Paul Mundt3d586952008-09-21 13:56:39 +0900334 goto out;
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900335
Paul Mundt0f1a3942007-11-19 13:05:18 +0900336 pgd = pgd_offset(current->mm, address);
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900337 }
338
339 pud = pud_offset(pgd, address);
340 if (pud_none_or_clear_bad(pud))
Paul Mundt3d586952008-09-21 13:56:39 +0900341 goto out;
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900342 pmd = pmd_offset(pud, address);
343 if (pmd_none_or_clear_bad(pmd))
Paul Mundt3d586952008-09-21 13:56:39 +0900344 goto out;
Paul Mundt0f1a3942007-11-19 13:05:18 +0900345 pte = pte_offset_kernel(pmd, address);
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900346 entry = *pte;
347 if (unlikely(pte_none(entry) || pte_not_present(entry)))
Paul Mundt3d586952008-09-21 13:56:39 +0900348 goto out;
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900349 if (unlikely(writeaccess && !pte_write(entry)))
Paul Mundt3d586952008-09-21 13:56:39 +0900350 goto out;
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900351
352 if (writeaccess)
353 entry = pte_mkdirty(entry);
354 entry = pte_mkyoung(entry);
355
Hideo Saitoa602cc02008-02-14 14:45:08 +0900356#if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP)
357 /*
358 * ITLB is not affected by "ldtlb" instruction.
359 * So, we need to flush the entry by ourselves.
360 */
361 local_flush_tlb_one(get_asid(), address & PAGE_MASK);
362#endif
363
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900364 set_pte(pte, entry);
365 update_mmu_cache(NULL, address, entry);
Paul Mundt0f1a3942007-11-19 13:05:18 +0900366
Paul Mundt3d586952008-09-21 13:56:39 +0900367 ret = 0;
368out:
Paul Mundt3d586952008-09-21 13:56:39 +0900369 return ret;
Paul Mundtdb2e1fa2007-02-14 14:13:10 +0900370}