blob: 6931e5115bcd1a2544e6be684915278dd12a90d2 [file] [log] [blame]
Vegard Nossumdfec0722008-04-04 00:51:41 +02001/**
2 * kmemcheck - a heavyweight memory checker for the linux kernel
3 * Copyright (C) 2007, 2008 Vegard Nossum <vegardno@ifi.uio.no>
4 * (With a lot of help from Ingo Molnar and Pekka Enberg.)
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2) as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/init.h>
12#include <linux/interrupt.h>
13#include <linux/kallsyms.h>
14#include <linux/kernel.h>
15#include <linux/kmemcheck.h>
16#include <linux/mm.h>
17#include <linux/module.h>
18#include <linux/page-flags.h>
19#include <linux/percpu.h>
20#include <linux/ptrace.h>
21#include <linux/string.h>
22#include <linux/types.h>
23
24#include <asm/cacheflush.h>
25#include <asm/kmemcheck.h>
26#include <asm/pgtable.h>
27#include <asm/tlbflush.h>
28
29#include "error.h"
30#include "opcode.h"
31#include "pte.h"
32#include "shadow.h"
33
34#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
35# define KMEMCHECK_ENABLED 0
36#endif
37
38#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
39# define KMEMCHECK_ENABLED 1
40#endif
41
42#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
43# define KMEMCHECK_ENABLED 2
44#endif
45
46int kmemcheck_enabled = KMEMCHECK_ENABLED;
47
48int __init kmemcheck_init(void)
49{
Pekka Enberg5b53b762009-02-26 11:11:59 +020050 printk(KERN_INFO "kmemcheck: Initialized\n");
Vegard Nossumdfec0722008-04-04 00:51:41 +020051
52#ifdef CONFIG_SMP
53 /*
54 * Limit SMP to use a single CPU. We rely on the fact that this code
55 * runs before SMP is set up.
56 */
57 if (setup_max_cpus > 1) {
58 printk(KERN_INFO
59 "kmemcheck: Limiting number of CPUs to 1.\n");
60 setup_max_cpus = 1;
61 }
62#endif
63
64 return 0;
65}
66
67early_initcall(kmemcheck_init);
68
69#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
Pekka Enberg0c33cac2009-02-26 11:17:31 +020070# define KMEMCHECK_ENABLED 0
Vegard Nossumdfec0722008-04-04 00:51:41 +020071#endif
72
73#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
Pekka Enberg0c33cac2009-02-26 11:17:31 +020074# define KMEMCHECK_ENABLED 1
Vegard Nossumdfec0722008-04-04 00:51:41 +020075#endif
76
77#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
Pekka Enberg0c33cac2009-02-26 11:17:31 +020078# define KMEMCHECK_ENABLED 2
Vegard Nossumdfec0722008-04-04 00:51:41 +020079#endif
80
Pekka Enberg0c33cac2009-02-26 11:17:31 +020081int kmemcheck_enabled = KMEMCHECK_ENABLED;
82
Vegard Nossumdfec0722008-04-04 00:51:41 +020083/*
84 * We need to parse the kmemcheck= option before any memory is allocated.
85 */
86static int __init param_kmemcheck(char *str)
87{
88 if (!str)
89 return -EINVAL;
90
91 sscanf(str, "%d", &kmemcheck_enabled);
92 return 0;
93}
94
95early_param("kmemcheck", param_kmemcheck);
96
97int kmemcheck_show_addr(unsigned long address)
98{
99 pte_t *pte;
100
101 pte = kmemcheck_pte_lookup(address);
102 if (!pte)
103 return 0;
104
105 set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
106 __flush_tlb_one(address);
107 return 1;
108}
109
110int kmemcheck_hide_addr(unsigned long address)
111{
112 pte_t *pte;
113
114 pte = kmemcheck_pte_lookup(address);
115 if (!pte)
116 return 0;
117
118 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
119 __flush_tlb_one(address);
120 return 1;
121}
122
123struct kmemcheck_context {
124 bool busy;
125 int balance;
126
127 /*
128 * There can be at most two memory operands to an instruction, but
129 * each address can cross a page boundary -- so we may need up to
130 * four addresses that must be hidden/revealed for each fault.
131 */
132 unsigned long addr[4];
133 unsigned long n_addrs;
134 unsigned long flags;
135
136 /* Data size of the instruction that caused a fault. */
137 unsigned int size;
138};
139
140static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context);
141
142bool kmemcheck_active(struct pt_regs *regs)
143{
144 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
145
146 return data->balance > 0;
147}
148
149/* Save an address that needs to be shown/hidden */
150static void kmemcheck_save_addr(unsigned long addr)
151{
152 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
153
154 BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr));
155 data->addr[data->n_addrs++] = addr;
156}
157
158static unsigned int kmemcheck_show_all(void)
159{
160 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
161 unsigned int i;
162 unsigned int n;
163
164 n = 0;
165 for (i = 0; i < data->n_addrs; ++i)
166 n += kmemcheck_show_addr(data->addr[i]);
167
168 return n;
169}
170
171static unsigned int kmemcheck_hide_all(void)
172{
173 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
174 unsigned int i;
175 unsigned int n;
176
177 n = 0;
178 for (i = 0; i < data->n_addrs; ++i)
179 n += kmemcheck_hide_addr(data->addr[i]);
180
181 return n;
182}
183
184/*
185 * Called from the #PF handler.
186 */
187void kmemcheck_show(struct pt_regs *regs)
188{
189 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
190
191 BUG_ON(!irqs_disabled());
192
193 if (unlikely(data->balance != 0)) {
194 kmemcheck_show_all();
195 kmemcheck_error_save_bug(regs);
196 data->balance = 0;
197 return;
198 }
199
200 /*
201 * None of the addresses actually belonged to kmemcheck. Note that
202 * this is not an error.
203 */
204 if (kmemcheck_show_all() == 0)
205 return;
206
207 ++data->balance;
208
209 /*
210 * The IF needs to be cleared as well, so that the faulting
211 * instruction can run "uninterrupted". Otherwise, we might take
212 * an interrupt and start executing that before we've had a chance
213 * to hide the page again.
214 *
215 * NOTE: In the rare case of multiple faults, we must not override
216 * the original flags:
217 */
218 if (!(regs->flags & X86_EFLAGS_TF))
219 data->flags = regs->flags;
220
221 regs->flags |= X86_EFLAGS_TF;
222 regs->flags &= ~X86_EFLAGS_IF;
223}
224
225/*
226 * Called from the #DB handler.
227 */
228void kmemcheck_hide(struct pt_regs *regs)
229{
230 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
231 int n;
232
233 BUG_ON(!irqs_disabled());
234
235 if (data->balance == 0)
236 return;
237
238 if (unlikely(data->balance != 1)) {
239 kmemcheck_show_all();
240 kmemcheck_error_save_bug(regs);
241 data->n_addrs = 0;
242 data->balance = 0;
243
244 if (!(data->flags & X86_EFLAGS_TF))
245 regs->flags &= ~X86_EFLAGS_TF;
246 if (data->flags & X86_EFLAGS_IF)
247 regs->flags |= X86_EFLAGS_IF;
248 return;
249 }
250
251 if (kmemcheck_enabled)
252 n = kmemcheck_hide_all();
253 else
254 n = kmemcheck_show_all();
255
256 if (n == 0)
257 return;
258
259 --data->balance;
260
261 data->n_addrs = 0;
262
263 if (!(data->flags & X86_EFLAGS_TF))
264 regs->flags &= ~X86_EFLAGS_TF;
265 if (data->flags & X86_EFLAGS_IF)
266 regs->flags |= X86_EFLAGS_IF;
267}
268
269void kmemcheck_show_pages(struct page *p, unsigned int n)
270{
271 unsigned int i;
272
273 for (i = 0; i < n; ++i) {
274 unsigned long address;
275 pte_t *pte;
276 unsigned int level;
277
278 address = (unsigned long) page_address(&p[i]);
279 pte = lookup_address(address, &level);
280 BUG_ON(!pte);
281 BUG_ON(level != PG_LEVEL_4K);
282
283 set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
284 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN));
285 __flush_tlb_one(address);
286 }
287}
288
289bool kmemcheck_page_is_tracked(struct page *p)
290{
291 /* This will also check the "hidden" flag of the PTE. */
292 return kmemcheck_pte_lookup((unsigned long) page_address(p));
293}
294
295void kmemcheck_hide_pages(struct page *p, unsigned int n)
296{
297 unsigned int i;
298
299 for (i = 0; i < n; ++i) {
300 unsigned long address;
301 pte_t *pte;
302 unsigned int level;
303
304 address = (unsigned long) page_address(&p[i]);
305 pte = lookup_address(address, &level);
306 BUG_ON(!pte);
307 BUG_ON(level != PG_LEVEL_4K);
308
309 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
310 set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN));
311 __flush_tlb_one(address);
312 }
313}
314
315/* Access may NOT cross page boundary */
316static void kmemcheck_read_strict(struct pt_regs *regs,
317 unsigned long addr, unsigned int size)
318{
319 void *shadow;
320 enum kmemcheck_shadow status;
321
322 shadow = kmemcheck_shadow_lookup(addr);
323 if (!shadow)
324 return;
325
326 kmemcheck_save_addr(addr);
327 status = kmemcheck_shadow_test(shadow, size);
328 if (status == KMEMCHECK_SHADOW_INITIALIZED)
329 return;
330
331 if (kmemcheck_enabled)
332 kmemcheck_error_save(status, addr, size, regs);
333
334 if (kmemcheck_enabled == 2)
335 kmemcheck_enabled = 0;
336
337 /* Don't warn about it again. */
338 kmemcheck_shadow_set(shadow, size);
339}
340
341/* Access may cross page boundary */
342static void kmemcheck_read(struct pt_regs *regs,
343 unsigned long addr, unsigned int size)
344{
345 unsigned long page = addr & PAGE_MASK;
346 unsigned long next_addr = addr + size - 1;
347 unsigned long next_page = next_addr & PAGE_MASK;
348
349 if (likely(page == next_page)) {
350 kmemcheck_read_strict(regs, addr, size);
351 return;
352 }
353
354 /*
355 * What we do is basically to split the access across the
356 * two pages and handle each part separately. Yes, this means
357 * that we may now see reads that are 3 + 5 bytes, for
358 * example (and if both are uninitialized, there will be two
359 * reports), but it makes the code a lot simpler.
360 */
361 kmemcheck_read_strict(regs, addr, next_page - addr);
362 kmemcheck_read_strict(regs, next_page, next_addr - next_page);
363}
364
365static void kmemcheck_write_strict(struct pt_regs *regs,
366 unsigned long addr, unsigned int size)
367{
368 void *shadow;
369
370 shadow = kmemcheck_shadow_lookup(addr);
371 if (!shadow)
372 return;
373
374 kmemcheck_save_addr(addr);
375 kmemcheck_shadow_set(shadow, size);
376}
377
378static void kmemcheck_write(struct pt_regs *regs,
379 unsigned long addr, unsigned int size)
380{
381 unsigned long page = addr & PAGE_MASK;
382 unsigned long next_addr = addr + size - 1;
383 unsigned long next_page = next_addr & PAGE_MASK;
384
385 if (likely(page == next_page)) {
386 kmemcheck_write_strict(regs, addr, size);
387 return;
388 }
389
390 /* See comment in kmemcheck_read(). */
391 kmemcheck_write_strict(regs, addr, next_page - addr);
392 kmemcheck_write_strict(regs, next_page, next_addr - next_page);
393}
394
395/*
396 * Copying is hard. We have two addresses, each of which may be split across
397 * a page (and each page will have different shadow addresses).
398 */
399static void kmemcheck_copy(struct pt_regs *regs,
400 unsigned long src_addr, unsigned long dst_addr, unsigned int size)
401{
402 uint8_t shadow[8];
403 enum kmemcheck_shadow status;
404
405 unsigned long page;
406 unsigned long next_addr;
407 unsigned long next_page;
408
409 uint8_t *x;
410 unsigned int i;
411 unsigned int n;
412
413 BUG_ON(size > sizeof(shadow));
414
415 page = src_addr & PAGE_MASK;
416 next_addr = src_addr + size - 1;
417 next_page = next_addr & PAGE_MASK;
418
419 if (likely(page == next_page)) {
420 /* Same page */
421 x = kmemcheck_shadow_lookup(src_addr);
422 if (x) {
423 kmemcheck_save_addr(src_addr);
424 for (i = 0; i < size; ++i)
425 shadow[i] = x[i];
426 } else {
427 for (i = 0; i < size; ++i)
428 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
429 }
430 } else {
431 n = next_page - src_addr;
432 BUG_ON(n > sizeof(shadow));
433
434 /* First page */
435 x = kmemcheck_shadow_lookup(src_addr);
436 if (x) {
437 kmemcheck_save_addr(src_addr);
438 for (i = 0; i < n; ++i)
439 shadow[i] = x[i];
440 } else {
441 /* Not tracked */
442 for (i = 0; i < n; ++i)
443 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
444 }
445
446 /* Second page */
447 x = kmemcheck_shadow_lookup(next_page);
448 if (x) {
449 kmemcheck_save_addr(next_page);
450 for (i = n; i < size; ++i)
451 shadow[i] = x[i - n];
452 } else {
453 /* Not tracked */
454 for (i = n; i < size; ++i)
455 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
456 }
457 }
458
459 page = dst_addr & PAGE_MASK;
460 next_addr = dst_addr + size - 1;
461 next_page = next_addr & PAGE_MASK;
462
463 if (likely(page == next_page)) {
464 /* Same page */
465 x = kmemcheck_shadow_lookup(dst_addr);
466 if (x) {
467 kmemcheck_save_addr(dst_addr);
468 for (i = 0; i < size; ++i) {
469 x[i] = shadow[i];
470 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
471 }
472 }
473 } else {
474 n = next_page - dst_addr;
475 BUG_ON(n > sizeof(shadow));
476
477 /* First page */
478 x = kmemcheck_shadow_lookup(dst_addr);
479 if (x) {
480 kmemcheck_save_addr(dst_addr);
481 for (i = 0; i < n; ++i) {
482 x[i] = shadow[i];
483 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
484 }
485 }
486
487 /* Second page */
488 x = kmemcheck_shadow_lookup(next_page);
489 if (x) {
490 kmemcheck_save_addr(next_page);
491 for (i = n; i < size; ++i) {
492 x[i - n] = shadow[i];
493 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
494 }
495 }
496 }
497
498 status = kmemcheck_shadow_test(shadow, size);
499 if (status == KMEMCHECK_SHADOW_INITIALIZED)
500 return;
501
502 if (kmemcheck_enabled)
503 kmemcheck_error_save(status, src_addr, size, regs);
504
505 if (kmemcheck_enabled == 2)
506 kmemcheck_enabled = 0;
507}
508
509enum kmemcheck_method {
510 KMEMCHECK_READ,
511 KMEMCHECK_WRITE,
512};
513
514static void kmemcheck_access(struct pt_regs *regs,
515 unsigned long fallback_address, enum kmemcheck_method fallback_method)
516{
517 const uint8_t *insn;
518 const uint8_t *insn_primary;
519 unsigned int size;
520
521 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
522
523 /* Recursive fault -- ouch. */
524 if (data->busy) {
525 kmemcheck_show_addr(fallback_address);
526 kmemcheck_error_save_bug(regs);
527 return;
528 }
529
530 data->busy = true;
531
532 insn = (const uint8_t *) regs->ip;
533 insn_primary = kmemcheck_opcode_get_primary(insn);
534
535 kmemcheck_opcode_decode(insn, &size);
536
537 switch (insn_primary[0]) {
538#ifdef CONFIG_KMEMCHECK_BITOPS_OK
539 /* AND, OR, XOR */
540 /*
541 * Unfortunately, these instructions have to be excluded from
542 * our regular checking since they access only some (and not
543 * all) bits. This clears out "bogus" bitfield-access warnings.
544 */
545 case 0x80:
546 case 0x81:
547 case 0x82:
548 case 0x83:
549 switch ((insn_primary[1] >> 3) & 7) {
550 /* OR */
551 case 1:
552 /* AND */
553 case 4:
554 /* XOR */
555 case 6:
556 kmemcheck_write(regs, fallback_address, size);
557 goto out;
558
559 /* ADD */
560 case 0:
561 /* ADC */
562 case 2:
563 /* SBB */
564 case 3:
565 /* SUB */
566 case 5:
567 /* CMP */
568 case 7:
569 break;
570 }
571 break;
572#endif
573
574 /* MOVS, MOVSB, MOVSW, MOVSD */
575 case 0xa4:
576 case 0xa5:
577 /*
578 * These instructions are special because they take two
579 * addresses, but we only get one page fault.
580 */
581 kmemcheck_copy(regs, regs->si, regs->di, size);
582 goto out;
583
584 /* CMPS, CMPSB, CMPSW, CMPSD */
585 case 0xa6:
586 case 0xa7:
587 kmemcheck_read(regs, regs->si, size);
588 kmemcheck_read(regs, regs->di, size);
589 goto out;
590 }
591
592 /*
593 * If the opcode isn't special in any way, we use the data from the
594 * page fault handler to determine the address and type of memory
595 * access.
596 */
597 switch (fallback_method) {
598 case KMEMCHECK_READ:
599 kmemcheck_read(regs, fallback_address, size);
600 goto out;
601 case KMEMCHECK_WRITE:
602 kmemcheck_write(regs, fallback_address, size);
603 goto out;
604 }
605
606out:
607 data->busy = false;
608}
609
610bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
611 unsigned long error_code)
612{
613 pte_t *pte;
Vegard Nossumdfec0722008-04-04 00:51:41 +0200614
615 /*
616 * XXX: Is it safe to assume that memory accesses from virtual 86
617 * mode or non-kernel code segments will _never_ access kernel
618 * memory (e.g. tracked pages)? For now, we need this to avoid
619 * invoking kmemcheck for PnP BIOS calls.
620 */
621 if (regs->flags & X86_VM_MASK)
622 return false;
623 if (regs->cs != __KERNEL_CS)
624 return false;
625
Pekka Enbergf8b4ece2009-02-26 11:53:11 +0200626 pte = kmemcheck_pte_lookup(address);
Vegard Nossumdfec0722008-04-04 00:51:41 +0200627 if (!pte)
628 return false;
Vegard Nossumdfec0722008-04-04 00:51:41 +0200629
630 if (error_code & 2)
631 kmemcheck_access(regs, address, KMEMCHECK_WRITE);
632 else
633 kmemcheck_access(regs, address, KMEMCHECK_READ);
634
635 kmemcheck_show(regs);
636 return true;
637}
638
639bool kmemcheck_trap(struct pt_regs *regs)
640{
641 if (!kmemcheck_active(regs))
642 return false;
643
644 /* We're done. */
645 kmemcheck_hide(regs);
646 return true;
647}