blob: c814394a966d20cbb365dbf76d54de5c01afb484 [file] [log] [blame]
Avi Kivity6aa8b732006-12-10 02:21:36 -08001/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * This module enables machines with Intel VT-x extensions to run virtual
5 * machines without emulation or binary translation.
6 *
7 * MMU support
8 *
9 * Copyright (C) 2006 Qumranet, Inc.
10 *
11 * Authors:
12 * Yaniv Kamay <yaniv@qumranet.com>
13 * Avi Kivity <avi@qumranet.com>
14 *
15 * This work is licensed under the terms of the GNU GPL, version 2. See
16 * the COPYING file in the top-level directory.
17 *
18 */
19#include <linux/types.h>
20#include <linux/string.h>
21#include <asm/page.h>
22#include <linux/mm.h>
23#include <linux/highmem.h>
24#include <linux/module.h>
25
26#include "vmx.h"
27#include "kvm.h"
28
Avi Kivity37a7d8b2007-01-05 16:36:56 -080029#undef MMU_DEBUG
30
31#undef AUDIT
32
33#ifdef AUDIT
34static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg);
35#else
36static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {}
37#endif
38
39#ifdef MMU_DEBUG
40
41#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
42#define rmap_printk(x...) do { if (dbg) printk(x); } while (0)
43
44#else
45
46#define pgprintk(x...) do { } while (0)
47#define rmap_printk(x...) do { } while (0)
48
49#endif
50
51#if defined(MMU_DEBUG) || defined(AUDIT)
52static int dbg = 1;
53#endif
Avi Kivity6aa8b732006-12-10 02:21:36 -080054
55#define ASSERT(x) \
56 if (!(x)) { \
57 printk(KERN_WARNING "assertion failed %s:%d: %s\n", \
58 __FILE__, __LINE__, #x); \
59 }
60
Avi Kivitycea0f0e2007-01-05 16:36:43 -080061#define PT64_PT_BITS 9
62#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
63#define PT32_PT_BITS 10
64#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
Avi Kivity6aa8b732006-12-10 02:21:36 -080065
66#define PT_WRITABLE_SHIFT 1
67
68#define PT_PRESENT_MASK (1ULL << 0)
69#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
70#define PT_USER_MASK (1ULL << 2)
71#define PT_PWT_MASK (1ULL << 3)
72#define PT_PCD_MASK (1ULL << 4)
73#define PT_ACCESSED_MASK (1ULL << 5)
74#define PT_DIRTY_MASK (1ULL << 6)
75#define PT_PAGE_SIZE_MASK (1ULL << 7)
76#define PT_PAT_MASK (1ULL << 7)
77#define PT_GLOBAL_MASK (1ULL << 8)
78#define PT64_NX_MASK (1ULL << 63)
79
80#define PT_PAT_SHIFT 7
81#define PT_DIR_PAT_SHIFT 12
82#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
83
84#define PT32_DIR_PSE36_SIZE 4
85#define PT32_DIR_PSE36_SHIFT 13
86#define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
87
88
89#define PT32_PTE_COPY_MASK \
Avi Kivity8c7bb722006-12-13 00:34:02 -080090 (PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_GLOBAL_MASK)
Avi Kivity6aa8b732006-12-10 02:21:36 -080091
Avi Kivity8c7bb722006-12-13 00:34:02 -080092#define PT64_PTE_COPY_MASK (PT64_NX_MASK | PT32_PTE_COPY_MASK)
Avi Kivity6aa8b732006-12-10 02:21:36 -080093
94#define PT_FIRST_AVAIL_BITS_SHIFT 9
95#define PT64_SECOND_AVAIL_BITS_SHIFT 52
96
97#define PT_SHADOW_PS_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
98#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
99
100#define PT_SHADOW_WRITABLE_SHIFT (PT_FIRST_AVAIL_BITS_SHIFT + 1)
101#define PT_SHADOW_WRITABLE_MASK (1ULL << PT_SHADOW_WRITABLE_SHIFT)
102
103#define PT_SHADOW_USER_SHIFT (PT_SHADOW_WRITABLE_SHIFT + 1)
104#define PT_SHADOW_USER_MASK (1ULL << (PT_SHADOW_USER_SHIFT))
105
106#define PT_SHADOW_BITS_OFFSET (PT_SHADOW_WRITABLE_SHIFT - PT_WRITABLE_SHIFT)
107
108#define VALID_PAGE(x) ((x) != INVALID_PAGE)
109
110#define PT64_LEVEL_BITS 9
111
112#define PT64_LEVEL_SHIFT(level) \
113 ( PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS )
114
115#define PT64_LEVEL_MASK(level) \
116 (((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level))
117
118#define PT64_INDEX(address, level)\
119 (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1))
120
121
122#define PT32_LEVEL_BITS 10
123
124#define PT32_LEVEL_SHIFT(level) \
125 ( PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS )
126
127#define PT32_LEVEL_MASK(level) \
128 (((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level))
129
130#define PT32_INDEX(address, level)\
131 (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
132
133
Avi Kivity27aba762007-03-09 13:04:31 +0200134#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
Avi Kivity6aa8b732006-12-10 02:21:36 -0800135#define PT64_DIR_BASE_ADDR_MASK \
136 (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1))
137
138#define PT32_BASE_ADDR_MASK PAGE_MASK
139#define PT32_DIR_BASE_ADDR_MASK \
140 (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1))
141
142
143#define PFERR_PRESENT_MASK (1U << 0)
144#define PFERR_WRITE_MASK (1U << 1)
145#define PFERR_USER_MASK (1U << 2)
Avi Kivity73b10872007-01-26 00:56:41 -0800146#define PFERR_FETCH_MASK (1U << 4)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800147
148#define PT64_ROOT_LEVEL 4
149#define PT32_ROOT_LEVEL 2
150#define PT32E_ROOT_LEVEL 3
151
152#define PT_DIRECTORY_LEVEL 2
153#define PT_PAGE_TABLE_LEVEL 1
154
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800155#define RMAP_EXT 4
156
157struct kvm_rmap_desc {
158 u64 *shadow_ptes[RMAP_EXT];
159 struct kvm_rmap_desc *more;
160};
161
Avi Kivityb5a33a72007-04-15 16:31:09 +0300162static struct kmem_cache *pte_chain_cache;
163static struct kmem_cache *rmap_desc_cache;
164
Avi Kivity6aa8b732006-12-10 02:21:36 -0800165static int is_write_protection(struct kvm_vcpu *vcpu)
166{
167 return vcpu->cr0 & CR0_WP_MASK;
168}
169
170static int is_cpuid_PSE36(void)
171{
172 return 1;
173}
174
Avi Kivity73b10872007-01-26 00:56:41 -0800175static int is_nx(struct kvm_vcpu *vcpu)
176{
177 return vcpu->shadow_efer & EFER_NX;
178}
179
Avi Kivity6aa8b732006-12-10 02:21:36 -0800180static int is_present_pte(unsigned long pte)
181{
182 return pte & PT_PRESENT_MASK;
183}
184
185static int is_writeble_pte(unsigned long pte)
186{
187 return pte & PT_WRITABLE_MASK;
188}
189
190static int is_io_pte(unsigned long pte)
191{
192 return pte & PT_SHADOW_IO_MARK;
193}
194
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800195static int is_rmap_pte(u64 pte)
196{
197 return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK))
198 == (PT_WRITABLE_MASK | PT_PRESENT_MASK);
199}
200
Avi Kivitye2dec932007-01-05 16:36:54 -0800201static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
Avi Kivity8c438502007-04-16 11:53:17 +0300202 struct kmem_cache *base_cache, int min,
203 gfp_t gfp_flags)
Avi Kivity714b93d2007-01-05 16:36:53 -0800204{
205 void *obj;
206
207 if (cache->nobjs >= min)
Avi Kivitye2dec932007-01-05 16:36:54 -0800208 return 0;
Avi Kivity714b93d2007-01-05 16:36:53 -0800209 while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
Avi Kivity8c438502007-04-16 11:53:17 +0300210 obj = kmem_cache_zalloc(base_cache, gfp_flags);
Avi Kivity714b93d2007-01-05 16:36:53 -0800211 if (!obj)
Avi Kivitye2dec932007-01-05 16:36:54 -0800212 return -ENOMEM;
Avi Kivity714b93d2007-01-05 16:36:53 -0800213 cache->objects[cache->nobjs++] = obj;
214 }
Avi Kivitye2dec932007-01-05 16:36:54 -0800215 return 0;
Avi Kivity714b93d2007-01-05 16:36:53 -0800216}
217
218static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
219{
220 while (mc->nobjs)
221 kfree(mc->objects[--mc->nobjs]);
222}
223
Avi Kivity8c438502007-04-16 11:53:17 +0300224static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags)
Avi Kivity714b93d2007-01-05 16:36:53 -0800225{
Avi Kivitye2dec932007-01-05 16:36:54 -0800226 int r;
227
228 r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache,
Avi Kivity8c438502007-04-16 11:53:17 +0300229 pte_chain_cache, 4, gfp_flags);
Avi Kivitye2dec932007-01-05 16:36:54 -0800230 if (r)
231 goto out;
232 r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache,
Avi Kivity8c438502007-04-16 11:53:17 +0300233 rmap_desc_cache, 1, gfp_flags);
Avi Kivitye2dec932007-01-05 16:36:54 -0800234out:
235 return r;
Avi Kivity714b93d2007-01-05 16:36:53 -0800236}
237
Avi Kivity8c438502007-04-16 11:53:17 +0300238static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
239{
240 int r;
241
242 r = __mmu_topup_memory_caches(vcpu, GFP_NOWAIT);
243 if (r < 0) {
244 spin_unlock(&vcpu->kvm->lock);
245 kvm_arch_ops->vcpu_put(vcpu);
246 r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL);
247 kvm_arch_ops->vcpu_load(vcpu);
248 spin_lock(&vcpu->kvm->lock);
249 }
250 return r;
251}
252
Avi Kivity714b93d2007-01-05 16:36:53 -0800253static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
254{
255 mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache);
256 mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache);
257}
258
259static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
260 size_t size)
261{
262 void *p;
263
264 BUG_ON(!mc->nobjs);
265 p = mc->objects[--mc->nobjs];
266 memset(p, 0, size);
267 return p;
268}
269
270static void mmu_memory_cache_free(struct kvm_mmu_memory_cache *mc, void *obj)
271{
272 if (mc->nobjs < KVM_NR_MEM_OBJS)
273 mc->objects[mc->nobjs++] = obj;
274 else
275 kfree(obj);
276}
277
278static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu)
279{
280 return mmu_memory_cache_alloc(&vcpu->mmu_pte_chain_cache,
281 sizeof(struct kvm_pte_chain));
282}
283
284static void mmu_free_pte_chain(struct kvm_vcpu *vcpu,
285 struct kvm_pte_chain *pc)
286{
287 mmu_memory_cache_free(&vcpu->mmu_pte_chain_cache, pc);
288}
289
290static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu)
291{
292 return mmu_memory_cache_alloc(&vcpu->mmu_rmap_desc_cache,
293 sizeof(struct kvm_rmap_desc));
294}
295
296static void mmu_free_rmap_desc(struct kvm_vcpu *vcpu,
297 struct kvm_rmap_desc *rd)
298{
299 mmu_memory_cache_free(&vcpu->mmu_rmap_desc_cache, rd);
300}
301
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800302/*
303 * Reverse mapping data structures:
304 *
305 * If page->private bit zero is zero, then page->private points to the
306 * shadow page table entry that points to page_address(page).
307 *
308 * If page->private bit zero is one, (then page->private & ~1) points
309 * to a struct kvm_rmap_desc containing more mappings.
310 */
Avi Kivity714b93d2007-01-05 16:36:53 -0800311static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte)
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800312{
313 struct page *page;
314 struct kvm_rmap_desc *desc;
315 int i;
316
317 if (!is_rmap_pte(*spte))
318 return;
319 page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
Markus Rechberger5972e952007-02-19 14:37:47 +0200320 if (!page_private(page)) {
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800321 rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
Markus Rechberger5972e952007-02-19 14:37:47 +0200322 set_page_private(page,(unsigned long)spte);
323 } else if (!(page_private(page) & 1)) {
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800324 rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
Avi Kivity714b93d2007-01-05 16:36:53 -0800325 desc = mmu_alloc_rmap_desc(vcpu);
Markus Rechberger5972e952007-02-19 14:37:47 +0200326 desc->shadow_ptes[0] = (u64 *)page_private(page);
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800327 desc->shadow_ptes[1] = spte;
Markus Rechberger5972e952007-02-19 14:37:47 +0200328 set_page_private(page,(unsigned long)desc | 1);
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800329 } else {
330 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
Markus Rechberger5972e952007-02-19 14:37:47 +0200331 desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800332 while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
333 desc = desc->more;
334 if (desc->shadow_ptes[RMAP_EXT-1]) {
Avi Kivity714b93d2007-01-05 16:36:53 -0800335 desc->more = mmu_alloc_rmap_desc(vcpu);
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800336 desc = desc->more;
337 }
338 for (i = 0; desc->shadow_ptes[i]; ++i)
339 ;
340 desc->shadow_ptes[i] = spte;
341 }
342}
343
Avi Kivity714b93d2007-01-05 16:36:53 -0800344static void rmap_desc_remove_entry(struct kvm_vcpu *vcpu,
345 struct page *page,
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800346 struct kvm_rmap_desc *desc,
347 int i,
348 struct kvm_rmap_desc *prev_desc)
349{
350 int j;
351
352 for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j)
353 ;
354 desc->shadow_ptes[i] = desc->shadow_ptes[j];
Al Viro11718b4d2007-02-09 16:39:20 +0000355 desc->shadow_ptes[j] = NULL;
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800356 if (j != 0)
357 return;
358 if (!prev_desc && !desc->more)
Markus Rechberger5972e952007-02-19 14:37:47 +0200359 set_page_private(page,(unsigned long)desc->shadow_ptes[0]);
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800360 else
361 if (prev_desc)
362 prev_desc->more = desc->more;
363 else
Markus Rechberger5972e952007-02-19 14:37:47 +0200364 set_page_private(page,(unsigned long)desc->more | 1);
Avi Kivity714b93d2007-01-05 16:36:53 -0800365 mmu_free_rmap_desc(vcpu, desc);
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800366}
367
Avi Kivity714b93d2007-01-05 16:36:53 -0800368static void rmap_remove(struct kvm_vcpu *vcpu, u64 *spte)
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800369{
370 struct page *page;
371 struct kvm_rmap_desc *desc;
372 struct kvm_rmap_desc *prev_desc;
373 int i;
374
375 if (!is_rmap_pte(*spte))
376 return;
377 page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
Markus Rechberger5972e952007-02-19 14:37:47 +0200378 if (!page_private(page)) {
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800379 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
380 BUG();
Markus Rechberger5972e952007-02-19 14:37:47 +0200381 } else if (!(page_private(page) & 1)) {
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800382 rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte);
Markus Rechberger5972e952007-02-19 14:37:47 +0200383 if ((u64 *)page_private(page) != spte) {
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800384 printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n",
385 spte, *spte);
386 BUG();
387 }
Markus Rechberger5972e952007-02-19 14:37:47 +0200388 set_page_private(page,0);
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800389 } else {
390 rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte);
Markus Rechberger5972e952007-02-19 14:37:47 +0200391 desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800392 prev_desc = NULL;
393 while (desc) {
394 for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
395 if (desc->shadow_ptes[i] == spte) {
Avi Kivity714b93d2007-01-05 16:36:53 -0800396 rmap_desc_remove_entry(vcpu, page,
397 desc, i,
Avi Kivitycd4a4e52007-01-05 16:36:38 -0800398 prev_desc);
399 return;
400 }
401 prev_desc = desc;
402 desc = desc->more;
403 }
404 BUG();
405 }
406}
407
Avi Kivity714b93d2007-01-05 16:36:53 -0800408static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
Avi Kivity374cbac2007-01-05 16:36:43 -0800409{
Avi Kivity714b93d2007-01-05 16:36:53 -0800410 struct kvm *kvm = vcpu->kvm;
Avi Kivity374cbac2007-01-05 16:36:43 -0800411 struct page *page;
Avi Kivity374cbac2007-01-05 16:36:43 -0800412 struct kvm_rmap_desc *desc;
413 u64 *spte;
414
Avi Kivity954bbbc2007-03-30 14:02:32 +0300415 page = gfn_to_page(kvm, gfn);
416 BUG_ON(!page);
Avi Kivity374cbac2007-01-05 16:36:43 -0800417
Markus Rechberger5972e952007-02-19 14:37:47 +0200418 while (page_private(page)) {
419 if (!(page_private(page) & 1))
420 spte = (u64 *)page_private(page);
Avi Kivity374cbac2007-01-05 16:36:43 -0800421 else {
Markus Rechberger5972e952007-02-19 14:37:47 +0200422 desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
Avi Kivity374cbac2007-01-05 16:36:43 -0800423 spte = desc->shadow_ptes[0];
424 }
425 BUG_ON(!spte);
Avi Kivity27aba762007-03-09 13:04:31 +0200426 BUG_ON((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT
427 != page_to_pfn(page));
Avi Kivity374cbac2007-01-05 16:36:43 -0800428 BUG_ON(!(*spte & PT_PRESENT_MASK));
429 BUG_ON(!(*spte & PT_WRITABLE_MASK));
430 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
Avi Kivity714b93d2007-01-05 16:36:53 -0800431 rmap_remove(vcpu, spte);
Avi Kivity40907d52007-01-05 16:36:55 -0800432 kvm_arch_ops->tlb_flush(vcpu);
Avi Kivity374cbac2007-01-05 16:36:43 -0800433 *spte &= ~(u64)PT_WRITABLE_MASK;
434 }
435}
436
Avi Kivity6aa8b732006-12-10 02:21:36 -0800437static int is_empty_shadow_page(hpa_t page_hpa)
438{
Avi Kivity139bdb22007-01-05 16:36:50 -0800439 u64 *pos;
440 u64 *end;
441
442 for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u64);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800443 pos != end; pos++)
Avi Kivity139bdb22007-01-05 16:36:50 -0800444 if (*pos != 0) {
445 printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__,
446 pos, *pos);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800447 return 0;
Avi Kivity139bdb22007-01-05 16:36:50 -0800448 }
Avi Kivity6aa8b732006-12-10 02:21:36 -0800449 return 1;
450}
451
Avi Kivity260746c2007-01-05 16:36:49 -0800452static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa)
453{
454 struct kvm_mmu_page *page_head = page_header(page_hpa);
455
Avi Kivity5f1e0b62007-01-05 16:36:49 -0800456 ASSERT(is_empty_shadow_page(page_hpa));
Avi Kivity260746c2007-01-05 16:36:49 -0800457 page_head->page_hpa = page_hpa;
Avi Kivity36868f72007-03-26 19:31:52 +0200458 list_move(&page_head->link, &vcpu->free_pages);
Avi Kivity260746c2007-01-05 16:36:49 -0800459 ++vcpu->kvm->n_free_mmu_pages;
460}
461
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800462static unsigned kvm_page_table_hashfn(gfn_t gfn)
463{
464 return gfn;
465}
466
Avi Kivity25c0de22007-01-05 16:36:42 -0800467static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
468 u64 *parent_pte)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800469{
470 struct kvm_mmu_page *page;
471
472 if (list_empty(&vcpu->free_pages))
Avi Kivity25c0de22007-01-05 16:36:42 -0800473 return NULL;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800474
475 page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link);
Avi Kivity36868f72007-03-26 19:31:52 +0200476 list_move(&page->link, &vcpu->kvm->active_mmu_pages);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800477 ASSERT(is_empty_shadow_page(page->page_hpa));
478 page->slot_bitmap = 0;
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800479 page->multimapped = 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800480 page->parent_pte = parent_pte;
Avi Kivityebeace82007-01-05 16:36:47 -0800481 --vcpu->kvm->n_free_mmu_pages;
Avi Kivity25c0de22007-01-05 16:36:42 -0800482 return page;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800483}
484
Avi Kivity714b93d2007-01-05 16:36:53 -0800485static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
486 struct kvm_mmu_page *page, u64 *parent_pte)
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800487{
488 struct kvm_pte_chain *pte_chain;
489 struct hlist_node *node;
490 int i;
491
492 if (!parent_pte)
493 return;
494 if (!page->multimapped) {
495 u64 *old = page->parent_pte;
496
497 if (!old) {
498 page->parent_pte = parent_pte;
499 return;
500 }
501 page->multimapped = 1;
Avi Kivity714b93d2007-01-05 16:36:53 -0800502 pte_chain = mmu_alloc_pte_chain(vcpu);
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800503 INIT_HLIST_HEAD(&page->parent_ptes);
504 hlist_add_head(&pte_chain->link, &page->parent_ptes);
505 pte_chain->parent_ptes[0] = old;
506 }
507 hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link) {
508 if (pte_chain->parent_ptes[NR_PTE_CHAIN_ENTRIES-1])
509 continue;
510 for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i)
511 if (!pte_chain->parent_ptes[i]) {
512 pte_chain->parent_ptes[i] = parent_pte;
513 return;
514 }
515 }
Avi Kivity714b93d2007-01-05 16:36:53 -0800516 pte_chain = mmu_alloc_pte_chain(vcpu);
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800517 BUG_ON(!pte_chain);
518 hlist_add_head(&pte_chain->link, &page->parent_ptes);
519 pte_chain->parent_ptes[0] = parent_pte;
520}
521
Avi Kivity714b93d2007-01-05 16:36:53 -0800522static void mmu_page_remove_parent_pte(struct kvm_vcpu *vcpu,
523 struct kvm_mmu_page *page,
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800524 u64 *parent_pte)
525{
526 struct kvm_pte_chain *pte_chain;
527 struct hlist_node *node;
528 int i;
529
530 if (!page->multimapped) {
531 BUG_ON(page->parent_pte != parent_pte);
532 page->parent_pte = NULL;
533 return;
534 }
535 hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link)
536 for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
537 if (!pte_chain->parent_ptes[i])
538 break;
539 if (pte_chain->parent_ptes[i] != parent_pte)
540 continue;
Avi Kivity697fe2e2007-01-05 16:36:46 -0800541 while (i + 1 < NR_PTE_CHAIN_ENTRIES
542 && pte_chain->parent_ptes[i + 1]) {
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800543 pte_chain->parent_ptes[i]
544 = pte_chain->parent_ptes[i + 1];
545 ++i;
546 }
547 pte_chain->parent_ptes[i] = NULL;
Avi Kivity697fe2e2007-01-05 16:36:46 -0800548 if (i == 0) {
549 hlist_del(&pte_chain->link);
Avi Kivity714b93d2007-01-05 16:36:53 -0800550 mmu_free_pte_chain(vcpu, pte_chain);
Avi Kivity697fe2e2007-01-05 16:36:46 -0800551 if (hlist_empty(&page->parent_ptes)) {
552 page->multimapped = 0;
553 page->parent_pte = NULL;
554 }
555 }
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800556 return;
557 }
558 BUG();
559}
560
561static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm_vcpu *vcpu,
562 gfn_t gfn)
563{
564 unsigned index;
565 struct hlist_head *bucket;
566 struct kvm_mmu_page *page;
567 struct hlist_node *node;
568
569 pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
570 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
571 bucket = &vcpu->kvm->mmu_page_hash[index];
572 hlist_for_each_entry(page, node, bucket, hash_link)
573 if (page->gfn == gfn && !page->role.metaphysical) {
574 pgprintk("%s: found role %x\n",
575 __FUNCTION__, page->role.word);
576 return page;
577 }
578 return NULL;
579}
580
581static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
582 gfn_t gfn,
583 gva_t gaddr,
584 unsigned level,
585 int metaphysical,
Avi Kivityd28c6cf2007-03-23 09:55:25 +0200586 unsigned hugepage_access,
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800587 u64 *parent_pte)
588{
589 union kvm_mmu_page_role role;
590 unsigned index;
591 unsigned quadrant;
592 struct hlist_head *bucket;
593 struct kvm_mmu_page *page;
594 struct hlist_node *node;
595
596 role.word = 0;
597 role.glevels = vcpu->mmu.root_level;
598 role.level = level;
599 role.metaphysical = metaphysical;
Avi Kivityd28c6cf2007-03-23 09:55:25 +0200600 role.hugepage_access = hugepage_access;
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800601 if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) {
602 quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
603 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
604 role.quadrant = quadrant;
605 }
606 pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__,
607 gfn, role.word);
608 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
609 bucket = &vcpu->kvm->mmu_page_hash[index];
610 hlist_for_each_entry(page, node, bucket, hash_link)
611 if (page->gfn == gfn && page->role.word == role.word) {
Avi Kivity714b93d2007-01-05 16:36:53 -0800612 mmu_page_add_parent_pte(vcpu, page, parent_pte);
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800613 pgprintk("%s: found\n", __FUNCTION__);
614 return page;
615 }
616 page = kvm_mmu_alloc_page(vcpu, parent_pte);
617 if (!page)
618 return page;
619 pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word);
620 page->gfn = gfn;
621 page->role = role;
622 hlist_add_head(&page->hash_link, bucket);
Avi Kivity374cbac2007-01-05 16:36:43 -0800623 if (!metaphysical)
Avi Kivity714b93d2007-01-05 16:36:53 -0800624 rmap_write_protect(vcpu, gfn);
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800625 return page;
626}
627
Avi Kivitya4360362007-01-05 16:36:45 -0800628static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu,
629 struct kvm_mmu_page *page)
630{
Avi Kivity697fe2e2007-01-05 16:36:46 -0800631 unsigned i;
632 u64 *pt;
633 u64 ent;
634
635 pt = __va(page->page_hpa);
636
637 if (page->role.level == PT_PAGE_TABLE_LEVEL) {
638 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
639 if (pt[i] & PT_PRESENT_MASK)
Avi Kivity714b93d2007-01-05 16:36:53 -0800640 rmap_remove(vcpu, &pt[i]);
Avi Kivity697fe2e2007-01-05 16:36:46 -0800641 pt[i] = 0;
642 }
Avi Kivity40907d52007-01-05 16:36:55 -0800643 kvm_arch_ops->tlb_flush(vcpu);
Avi Kivity697fe2e2007-01-05 16:36:46 -0800644 return;
645 }
646
647 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
648 ent = pt[i];
649
650 pt[i] = 0;
651 if (!(ent & PT_PRESENT_MASK))
652 continue;
653 ent &= PT64_BASE_ADDR_MASK;
Avi Kivity714b93d2007-01-05 16:36:53 -0800654 mmu_page_remove_parent_pte(vcpu, page_header(ent), &pt[i]);
Avi Kivity697fe2e2007-01-05 16:36:46 -0800655 }
Avi Kivitya4360362007-01-05 16:36:45 -0800656}
657
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800658static void kvm_mmu_put_page(struct kvm_vcpu *vcpu,
659 struct kvm_mmu_page *page,
660 u64 *parent_pte)
661{
Avi Kivity714b93d2007-01-05 16:36:53 -0800662 mmu_page_remove_parent_pte(vcpu, page, parent_pte);
Avi Kivitya4360362007-01-05 16:36:45 -0800663}
664
665static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu,
666 struct kvm_mmu_page *page)
667{
668 u64 *parent_pte;
669
670 while (page->multimapped || page->parent_pte) {
671 if (!page->multimapped)
672 parent_pte = page->parent_pte;
673 else {
674 struct kvm_pte_chain *chain;
675
676 chain = container_of(page->parent_ptes.first,
677 struct kvm_pte_chain, link);
678 parent_pte = chain->parent_ptes[0];
679 }
Avi Kivity697fe2e2007-01-05 16:36:46 -0800680 BUG_ON(!parent_pte);
Avi Kivitya4360362007-01-05 16:36:45 -0800681 kvm_mmu_put_page(vcpu, page, parent_pte);
682 *parent_pte = 0;
683 }
Avi Kivitycc4529e2007-01-05 16:36:47 -0800684 kvm_mmu_page_unlink_children(vcpu, page);
Avi Kivity3bb65a22007-01-05 16:36:51 -0800685 if (!page->root_count) {
686 hlist_del(&page->hash_link);
687 kvm_mmu_free_page(vcpu, page->page_hpa);
Avi Kivity36868f72007-03-26 19:31:52 +0200688 } else
689 list_move(&page->link, &vcpu->kvm->active_mmu_pages);
Avi Kivitya4360362007-01-05 16:36:45 -0800690}
691
692static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn)
693{
694 unsigned index;
695 struct hlist_head *bucket;
696 struct kvm_mmu_page *page;
697 struct hlist_node *node, *n;
698 int r;
699
700 pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
701 r = 0;
702 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
703 bucket = &vcpu->kvm->mmu_page_hash[index];
704 hlist_for_each_entry_safe(page, node, n, bucket, hash_link)
705 if (page->gfn == gfn && !page->role.metaphysical) {
Avi Kivity697fe2e2007-01-05 16:36:46 -0800706 pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn,
707 page->role.word);
Avi Kivitya4360362007-01-05 16:36:45 -0800708 kvm_mmu_zap_page(vcpu, page);
709 r = 1;
710 }
711 return r;
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800712}
713
Avi Kivity6aa8b732006-12-10 02:21:36 -0800714static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa)
715{
716 int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT));
717 struct kvm_mmu_page *page_head = page_header(__pa(pte));
718
719 __set_bit(slot, &page_head->slot_bitmap);
720}
721
722hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
723{
724 hpa_t hpa = gpa_to_hpa(vcpu, gpa);
725
726 return is_error_hpa(hpa) ? bad_page_address | (gpa & ~PAGE_MASK): hpa;
727}
728
729hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
730{
Avi Kivity6aa8b732006-12-10 02:21:36 -0800731 struct page *page;
732
733 ASSERT((gpa & HPA_ERR_MASK) == 0);
Avi Kivity954bbbc2007-03-30 14:02:32 +0300734 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
735 if (!page)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800736 return gpa | HPA_ERR_MASK;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800737 return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT)
738 | (gpa & (PAGE_SIZE-1));
739}
740
741hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
742{
743 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
744
745 if (gpa == UNMAPPED_GVA)
746 return UNMAPPED_GVA;
747 return gpa_to_hpa(vcpu, gpa);
748}
749
Avi Kivity039576c2007-03-20 12:46:50 +0200750struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
751{
752 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
753
754 if (gpa == UNMAPPED_GVA)
755 return NULL;
756 return pfn_to_page(gpa_to_hpa(vcpu, gpa) >> PAGE_SHIFT);
757}
758
Avi Kivity6aa8b732006-12-10 02:21:36 -0800759static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
760{
761}
762
763static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
764{
765 int level = PT32E_ROOT_LEVEL;
766 hpa_t table_addr = vcpu->mmu.root_hpa;
767
768 for (; ; level--) {
769 u32 index = PT64_INDEX(v, level);
770 u64 *table;
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800771 u64 pte;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800772
773 ASSERT(VALID_PAGE(table_addr));
774 table = __va(table_addr);
775
776 if (level == 1) {
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800777 pte = table[index];
778 if (is_present_pte(pte) && is_writeble_pte(pte))
779 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800780 mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT);
781 page_header_update_slot(vcpu->kvm, table, v);
782 table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
783 PT_USER_MASK;
Avi Kivity714b93d2007-01-05 16:36:53 -0800784 rmap_add(vcpu, &table[index]);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800785 return 0;
786 }
787
788 if (table[index] == 0) {
Avi Kivity25c0de22007-01-05 16:36:42 -0800789 struct kvm_mmu_page *new_table;
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800790 gfn_t pseudo_gfn;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800791
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800792 pseudo_gfn = (v & PT64_DIR_BASE_ADDR_MASK)
793 >> PAGE_SHIFT;
794 new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
795 v, level - 1,
Avi Kivityd28c6cf2007-03-23 09:55:25 +0200796 1, 0, &table[index]);
Avi Kivity25c0de22007-01-05 16:36:42 -0800797 if (!new_table) {
Avi Kivity6aa8b732006-12-10 02:21:36 -0800798 pgprintk("nonpaging_map: ENOMEM\n");
799 return -ENOMEM;
800 }
801
Avi Kivity25c0de22007-01-05 16:36:42 -0800802 table[index] = new_table->page_hpa | PT_PRESENT_MASK
803 | PT_WRITABLE_MASK | PT_USER_MASK;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800804 }
805 table_addr = table[index] & PT64_BASE_ADDR_MASK;
806 }
807}
808
Avi Kivity17ac10a2007-01-05 16:36:40 -0800809static void mmu_free_roots(struct kvm_vcpu *vcpu)
810{
811 int i;
Avi Kivity3bb65a22007-01-05 16:36:51 -0800812 struct kvm_mmu_page *page;
Avi Kivity17ac10a2007-01-05 16:36:40 -0800813
814#ifdef CONFIG_X86_64
815 if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) {
816 hpa_t root = vcpu->mmu.root_hpa;
817
818 ASSERT(VALID_PAGE(root));
Avi Kivity3bb65a22007-01-05 16:36:51 -0800819 page = page_header(root);
820 --page->root_count;
Avi Kivity17ac10a2007-01-05 16:36:40 -0800821 vcpu->mmu.root_hpa = INVALID_PAGE;
822 return;
823 }
824#endif
825 for (i = 0; i < 4; ++i) {
826 hpa_t root = vcpu->mmu.pae_root[i];
827
Avi Kivity417726a2007-04-12 17:35:58 +0300828 if (root) {
829 ASSERT(VALID_PAGE(root));
830 root &= PT64_BASE_ADDR_MASK;
831 page = page_header(root);
832 --page->root_count;
833 }
Avi Kivity17ac10a2007-01-05 16:36:40 -0800834 vcpu->mmu.pae_root[i] = INVALID_PAGE;
835 }
836 vcpu->mmu.root_hpa = INVALID_PAGE;
837}
838
839static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
840{
841 int i;
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800842 gfn_t root_gfn;
Avi Kivity3bb65a22007-01-05 16:36:51 -0800843 struct kvm_mmu_page *page;
844
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800845 root_gfn = vcpu->cr3 >> PAGE_SHIFT;
Avi Kivity17ac10a2007-01-05 16:36:40 -0800846
847#ifdef CONFIG_X86_64
848 if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) {
849 hpa_t root = vcpu->mmu.root_hpa;
850
851 ASSERT(!VALID_PAGE(root));
Ingo Molnar68a99f62007-01-05 16:36:59 -0800852 page = kvm_mmu_get_page(vcpu, root_gfn, 0,
Avi Kivityd28c6cf2007-03-23 09:55:25 +0200853 PT64_ROOT_LEVEL, 0, 0, NULL);
Ingo Molnar68a99f62007-01-05 16:36:59 -0800854 root = page->page_hpa;
Avi Kivity3bb65a22007-01-05 16:36:51 -0800855 ++page->root_count;
Avi Kivity17ac10a2007-01-05 16:36:40 -0800856 vcpu->mmu.root_hpa = root;
857 return;
858 }
859#endif
860 for (i = 0; i < 4; ++i) {
861 hpa_t root = vcpu->mmu.pae_root[i];
862
863 ASSERT(!VALID_PAGE(root));
Avi Kivity417726a2007-04-12 17:35:58 +0300864 if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) {
865 if (!is_present_pte(vcpu->pdptrs[i])) {
866 vcpu->mmu.pae_root[i] = 0;
867 continue;
868 }
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800869 root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT;
Avi Kivity417726a2007-04-12 17:35:58 +0300870 } else if (vcpu->mmu.root_level == 0)
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800871 root_gfn = 0;
Ingo Molnar68a99f62007-01-05 16:36:59 -0800872 page = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800873 PT32_ROOT_LEVEL, !is_paging(vcpu),
Avi Kivityd28c6cf2007-03-23 09:55:25 +0200874 0, NULL);
Ingo Molnar68a99f62007-01-05 16:36:59 -0800875 root = page->page_hpa;
Avi Kivity3bb65a22007-01-05 16:36:51 -0800876 ++page->root_count;
Avi Kivity17ac10a2007-01-05 16:36:40 -0800877 vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK;
878 }
879 vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root);
880}
881
Avi Kivity6aa8b732006-12-10 02:21:36 -0800882static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
883{
884 return vaddr;
885}
886
887static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
888 u32 error_code)
889{
Avi Kivity6aa8b732006-12-10 02:21:36 -0800890 gpa_t addr = gva;
Avi Kivityebeace82007-01-05 16:36:47 -0800891 hpa_t paddr;
Avi Kivitye2dec932007-01-05 16:36:54 -0800892 int r;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800893
Avi Kivitye2dec932007-01-05 16:36:54 -0800894 r = mmu_topup_memory_caches(vcpu);
895 if (r)
896 return r;
Avi Kivity714b93d2007-01-05 16:36:53 -0800897
Avi Kivity6aa8b732006-12-10 02:21:36 -0800898 ASSERT(vcpu);
899 ASSERT(VALID_PAGE(vcpu->mmu.root_hpa));
900
Avi Kivity6aa8b732006-12-10 02:21:36 -0800901
Avi Kivityebeace82007-01-05 16:36:47 -0800902 paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800903
Avi Kivityebeace82007-01-05 16:36:47 -0800904 if (is_error_hpa(paddr))
905 return 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800906
Avi Kivityebeace82007-01-05 16:36:47 -0800907 return nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800908}
909
Avi Kivity6aa8b732006-12-10 02:21:36 -0800910static void nonpaging_free(struct kvm_vcpu *vcpu)
911{
Avi Kivity17ac10a2007-01-05 16:36:40 -0800912 mmu_free_roots(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800913}
914
915static int nonpaging_init_context(struct kvm_vcpu *vcpu)
916{
917 struct kvm_mmu *context = &vcpu->mmu;
918
919 context->new_cr3 = nonpaging_new_cr3;
920 context->page_fault = nonpaging_page_fault;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800921 context->gva_to_gpa = nonpaging_gva_to_gpa;
922 context->free = nonpaging_free;
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800923 context->root_level = 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800924 context->shadow_root_level = PT32E_ROOT_LEVEL;
Avi Kivity17ac10a2007-01-05 16:36:40 -0800925 mmu_alloc_roots(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800926 ASSERT(VALID_PAGE(context->root_hpa));
927 kvm_arch_ops->set_cr3(vcpu, context->root_hpa);
928 return 0;
929}
930
Avi Kivity6aa8b732006-12-10 02:21:36 -0800931static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
932{
Avi Kivity6aa8b732006-12-10 02:21:36 -0800933 ++kvm_stat.tlb_flush;
934 kvm_arch_ops->tlb_flush(vcpu);
935}
936
937static void paging_new_cr3(struct kvm_vcpu *vcpu)
938{
Avi Kivity374cbac2007-01-05 16:36:43 -0800939 pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3);
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800940 mmu_free_roots(vcpu);
Ingo Molnar7f7417d2007-01-05 16:36:57 -0800941 if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
942 kvm_mmu_free_some_pages(vcpu);
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800943 mmu_alloc_roots(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800944 kvm_mmu_flush_tlb(vcpu);
Avi Kivitycea0f0e2007-01-05 16:36:43 -0800945 kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800946}
947
Avi Kivity6aa8b732006-12-10 02:21:36 -0800948static inline void set_pte_common(struct kvm_vcpu *vcpu,
949 u64 *shadow_pte,
950 gpa_t gaddr,
951 int dirty,
Avi Kivity815af8d2007-01-05 16:36:44 -0800952 u64 access_bits,
953 gfn_t gfn)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800954{
955 hpa_t paddr;
956
957 *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET;
958 if (!dirty)
959 access_bits &= ~PT_WRITABLE_MASK;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800960
961 paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK);
962
Avi Kivity374cbac2007-01-05 16:36:43 -0800963 *shadow_pte |= access_bits;
964
Avi Kivity6aa8b732006-12-10 02:21:36 -0800965 if (is_error_hpa(paddr)) {
966 *shadow_pte |= gaddr;
967 *shadow_pte |= PT_SHADOW_IO_MARK;
968 *shadow_pte &= ~PT_PRESENT_MASK;
Avi Kivity374cbac2007-01-05 16:36:43 -0800969 return;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800970 }
Avi Kivity374cbac2007-01-05 16:36:43 -0800971
972 *shadow_pte |= paddr;
973
974 if (access_bits & PT_WRITABLE_MASK) {
975 struct kvm_mmu_page *shadow;
976
Avi Kivity815af8d2007-01-05 16:36:44 -0800977 shadow = kvm_mmu_lookup_page(vcpu, gfn);
Avi Kivity374cbac2007-01-05 16:36:43 -0800978 if (shadow) {
979 pgprintk("%s: found shadow page for %lx, marking ro\n",
Avi Kivity815af8d2007-01-05 16:36:44 -0800980 __FUNCTION__, gfn);
Avi Kivity374cbac2007-01-05 16:36:43 -0800981 access_bits &= ~PT_WRITABLE_MASK;
Avi Kivity40907d52007-01-05 16:36:55 -0800982 if (is_writeble_pte(*shadow_pte)) {
983 *shadow_pte &= ~PT_WRITABLE_MASK;
984 kvm_arch_ops->tlb_flush(vcpu);
985 }
Avi Kivity374cbac2007-01-05 16:36:43 -0800986 }
987 }
988
989 if (access_bits & PT_WRITABLE_MASK)
990 mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT);
991
992 page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
Avi Kivity714b93d2007-01-05 16:36:53 -0800993 rmap_add(vcpu, shadow_pte);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800994}
995
996static void inject_page_fault(struct kvm_vcpu *vcpu,
997 u64 addr,
998 u32 err_code)
999{
1000 kvm_arch_ops->inject_page_fault(vcpu, addr, err_code);
1001}
1002
1003static inline int fix_read_pf(u64 *shadow_ent)
1004{
1005 if ((*shadow_ent & PT_SHADOW_USER_MASK) &&
1006 !(*shadow_ent & PT_USER_MASK)) {
1007 /*
1008 * If supervisor write protect is disabled, we shadow kernel
1009 * pages as user pages so we can trap the write access.
1010 */
1011 *shadow_ent |= PT_USER_MASK;
1012 *shadow_ent &= ~PT_WRITABLE_MASK;
1013
1014 return 1;
1015
1016 }
1017 return 0;
1018}
1019
Avi Kivity6aa8b732006-12-10 02:21:36 -08001020static void paging_free(struct kvm_vcpu *vcpu)
1021{
1022 nonpaging_free(vcpu);
1023}
1024
1025#define PTTYPE 64
1026#include "paging_tmpl.h"
1027#undef PTTYPE
1028
1029#define PTTYPE 32
1030#include "paging_tmpl.h"
1031#undef PTTYPE
1032
Avi Kivity17ac10a2007-01-05 16:36:40 -08001033static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001034{
1035 struct kvm_mmu *context = &vcpu->mmu;
1036
1037 ASSERT(is_pae(vcpu));
1038 context->new_cr3 = paging_new_cr3;
1039 context->page_fault = paging64_page_fault;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001040 context->gva_to_gpa = paging64_gva_to_gpa;
1041 context->free = paging_free;
Avi Kivity17ac10a2007-01-05 16:36:40 -08001042 context->root_level = level;
1043 context->shadow_root_level = level;
1044 mmu_alloc_roots(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001045 ASSERT(VALID_PAGE(context->root_hpa));
1046 kvm_arch_ops->set_cr3(vcpu, context->root_hpa |
1047 (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
1048 return 0;
1049}
1050
Avi Kivity17ac10a2007-01-05 16:36:40 -08001051static int paging64_init_context(struct kvm_vcpu *vcpu)
1052{
1053 return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
1054}
1055
Avi Kivity6aa8b732006-12-10 02:21:36 -08001056static int paging32_init_context(struct kvm_vcpu *vcpu)
1057{
1058 struct kvm_mmu *context = &vcpu->mmu;
1059
1060 context->new_cr3 = paging_new_cr3;
1061 context->page_fault = paging32_page_fault;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001062 context->gva_to_gpa = paging32_gva_to_gpa;
1063 context->free = paging_free;
1064 context->root_level = PT32_ROOT_LEVEL;
1065 context->shadow_root_level = PT32E_ROOT_LEVEL;
Avi Kivity17ac10a2007-01-05 16:36:40 -08001066 mmu_alloc_roots(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001067 ASSERT(VALID_PAGE(context->root_hpa));
1068 kvm_arch_ops->set_cr3(vcpu, context->root_hpa |
1069 (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
1070 return 0;
1071}
1072
1073static int paging32E_init_context(struct kvm_vcpu *vcpu)
1074{
Avi Kivity17ac10a2007-01-05 16:36:40 -08001075 return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001076}
1077
1078static int init_kvm_mmu(struct kvm_vcpu *vcpu)
1079{
1080 ASSERT(vcpu);
1081 ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
1082
1083 if (!is_paging(vcpu))
1084 return nonpaging_init_context(vcpu);
Avi Kivitya9058ec2006-12-29 16:49:37 -08001085 else if (is_long_mode(vcpu))
Avi Kivity6aa8b732006-12-10 02:21:36 -08001086 return paging64_init_context(vcpu);
1087 else if (is_pae(vcpu))
1088 return paging32E_init_context(vcpu);
1089 else
1090 return paging32_init_context(vcpu);
1091}
1092
1093static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
1094{
1095 ASSERT(vcpu);
1096 if (VALID_PAGE(vcpu->mmu.root_hpa)) {
1097 vcpu->mmu.free(vcpu);
1098 vcpu->mmu.root_hpa = INVALID_PAGE;
1099 }
1100}
1101
1102int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
1103{
Avi Kivity714b93d2007-01-05 16:36:53 -08001104 int r;
1105
Avi Kivity6aa8b732006-12-10 02:21:36 -08001106 destroy_kvm_mmu(vcpu);
Avi Kivity714b93d2007-01-05 16:36:53 -08001107 r = init_kvm_mmu(vcpu);
1108 if (r < 0)
1109 goto out;
Avi Kivitye2dec932007-01-05 16:36:54 -08001110 r = mmu_topup_memory_caches(vcpu);
Avi Kivity714b93d2007-01-05 16:36:53 -08001111out:
1112 return r;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001113}
1114
Avi Kivityac1b7142007-03-08 17:13:32 +02001115static void mmu_pre_write_zap_pte(struct kvm_vcpu *vcpu,
1116 struct kvm_mmu_page *page,
1117 u64 *spte)
1118{
1119 u64 pte;
1120 struct kvm_mmu_page *child;
1121
1122 pte = *spte;
1123 if (is_present_pte(pte)) {
1124 if (page->role.level == PT_PAGE_TABLE_LEVEL)
1125 rmap_remove(vcpu, spte);
1126 else {
1127 child = page_header(pte & PT64_BASE_ADDR_MASK);
1128 mmu_page_remove_parent_pte(vcpu, child, spte);
1129 }
1130 }
1131 *spte = 0;
1132}
1133
Avi Kivityda4a00f2007-01-05 16:36:44 -08001134void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
1135{
Avi Kivity9b7a0322007-01-05 16:36:45 -08001136 gfn_t gfn = gpa >> PAGE_SHIFT;
1137 struct kvm_mmu_page *page;
Avi Kivity0e7bc4b2007-01-05 16:36:48 -08001138 struct hlist_node *node, *n;
Avi Kivity9b7a0322007-01-05 16:36:45 -08001139 struct hlist_head *bucket;
1140 unsigned index;
1141 u64 *spte;
Avi Kivity9b7a0322007-01-05 16:36:45 -08001142 unsigned offset = offset_in_page(gpa);
Avi Kivity0e7bc4b2007-01-05 16:36:48 -08001143 unsigned pte_size;
Avi Kivity9b7a0322007-01-05 16:36:45 -08001144 unsigned page_offset;
Avi Kivity0e7bc4b2007-01-05 16:36:48 -08001145 unsigned misaligned;
Avi Kivity9b7a0322007-01-05 16:36:45 -08001146 int level;
Avi Kivity86a5ba02007-01-05 16:36:50 -08001147 int flooded = 0;
Avi Kivityac1b7142007-03-08 17:13:32 +02001148 int npte;
Avi Kivity9b7a0322007-01-05 16:36:45 -08001149
Avi Kivityda4a00f2007-01-05 16:36:44 -08001150 pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
Avi Kivity86a5ba02007-01-05 16:36:50 -08001151 if (gfn == vcpu->last_pt_write_gfn) {
1152 ++vcpu->last_pt_write_count;
1153 if (vcpu->last_pt_write_count >= 3)
1154 flooded = 1;
1155 } else {
1156 vcpu->last_pt_write_gfn = gfn;
1157 vcpu->last_pt_write_count = 1;
1158 }
Avi Kivity9b7a0322007-01-05 16:36:45 -08001159 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
1160 bucket = &vcpu->kvm->mmu_page_hash[index];
Avi Kivity0e7bc4b2007-01-05 16:36:48 -08001161 hlist_for_each_entry_safe(page, node, n, bucket, hash_link) {
Avi Kivity9b7a0322007-01-05 16:36:45 -08001162 if (page->gfn != gfn || page->role.metaphysical)
1163 continue;
Avi Kivity0e7bc4b2007-01-05 16:36:48 -08001164 pte_size = page->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
1165 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
Avi Kivity86a5ba02007-01-05 16:36:50 -08001166 if (misaligned || flooded) {
Avi Kivity0e7bc4b2007-01-05 16:36:48 -08001167 /*
1168 * Misaligned accesses are too much trouble to fix
1169 * up; also, they usually indicate a page is not used
1170 * as a page table.
Avi Kivity86a5ba02007-01-05 16:36:50 -08001171 *
1172 * If we're seeing too many writes to a page,
1173 * it may no longer be a page table, or we may be
1174 * forking, in which case it is better to unmap the
1175 * page.
Avi Kivity0e7bc4b2007-01-05 16:36:48 -08001176 */
1177 pgprintk("misaligned: gpa %llx bytes %d role %x\n",
1178 gpa, bytes, page->role.word);
1179 kvm_mmu_zap_page(vcpu, page);
1180 continue;
1181 }
Avi Kivity9b7a0322007-01-05 16:36:45 -08001182 page_offset = offset;
1183 level = page->role.level;
Avi Kivityac1b7142007-03-08 17:13:32 +02001184 npte = 1;
Avi Kivity9b7a0322007-01-05 16:36:45 -08001185 if (page->role.glevels == PT32_ROOT_LEVEL) {
Avi Kivityac1b7142007-03-08 17:13:32 +02001186 page_offset <<= 1; /* 32->64 */
1187 /*
1188 * A 32-bit pde maps 4MB while the shadow pdes map
1189 * only 2MB. So we need to double the offset again
1190 * and zap two pdes instead of one.
1191 */
1192 if (level == PT32_ROOT_LEVEL) {
Avi Kivity6b8d0f92007-04-18 11:18:18 +03001193 page_offset &= ~7; /* kill rounding error */
Avi Kivityac1b7142007-03-08 17:13:32 +02001194 page_offset <<= 1;
1195 npte = 2;
1196 }
Avi Kivity9b7a0322007-01-05 16:36:45 -08001197 page_offset &= ~PAGE_MASK;
1198 }
1199 spte = __va(page->page_hpa);
1200 spte += page_offset / sizeof(*spte);
Avi Kivityac1b7142007-03-08 17:13:32 +02001201 while (npte--) {
1202 mmu_pre_write_zap_pte(vcpu, page, spte);
1203 ++spte;
Avi Kivity9b7a0322007-01-05 16:36:45 -08001204 }
Avi Kivity9b7a0322007-01-05 16:36:45 -08001205 }
Avi Kivityda4a00f2007-01-05 16:36:44 -08001206}
1207
1208void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
1209{
1210}
1211
Avi Kivitya4360362007-01-05 16:36:45 -08001212int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
1213{
1214 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
1215
1216 return kvm_mmu_unprotect_page(vcpu, gpa >> PAGE_SHIFT);
1217}
1218
Avi Kivityebeace82007-01-05 16:36:47 -08001219void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
1220{
1221 while (vcpu->kvm->n_free_mmu_pages < KVM_REFILL_PAGES) {
1222 struct kvm_mmu_page *page;
1223
1224 page = container_of(vcpu->kvm->active_mmu_pages.prev,
1225 struct kvm_mmu_page, link);
1226 kvm_mmu_zap_page(vcpu, page);
1227 }
1228}
1229EXPORT_SYMBOL_GPL(kvm_mmu_free_some_pages);
1230
Avi Kivity6aa8b732006-12-10 02:21:36 -08001231static void free_mmu_pages(struct kvm_vcpu *vcpu)
1232{
Avi Kivityf51234c2007-01-05 16:36:52 -08001233 struct kvm_mmu_page *page;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001234
Avi Kivityf51234c2007-01-05 16:36:52 -08001235 while (!list_empty(&vcpu->kvm->active_mmu_pages)) {
1236 page = container_of(vcpu->kvm->active_mmu_pages.next,
1237 struct kvm_mmu_page, link);
1238 kvm_mmu_zap_page(vcpu, page);
1239 }
1240 while (!list_empty(&vcpu->free_pages)) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08001241 page = list_entry(vcpu->free_pages.next,
1242 struct kvm_mmu_page, link);
1243 list_del(&page->link);
1244 __free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT));
1245 page->page_hpa = INVALID_PAGE;
1246 }
Avi Kivity17ac10a2007-01-05 16:36:40 -08001247 free_page((unsigned long)vcpu->mmu.pae_root);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001248}
1249
1250static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
1251{
Avi Kivity17ac10a2007-01-05 16:36:40 -08001252 struct page *page;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001253 int i;
1254
1255 ASSERT(vcpu);
1256
1257 for (i = 0; i < KVM_NUM_MMU_PAGES; i++) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08001258 struct kvm_mmu_page *page_header = &vcpu->page_header_buf[i];
1259
1260 INIT_LIST_HEAD(&page_header->link);
Avi Kivity17ac10a2007-01-05 16:36:40 -08001261 if ((page = alloc_page(GFP_KERNEL)) == NULL)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001262 goto error_1;
Markus Rechberger5972e952007-02-19 14:37:47 +02001263 set_page_private(page, (unsigned long)page_header);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001264 page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT;
1265 memset(__va(page_header->page_hpa), 0, PAGE_SIZE);
1266 list_add(&page_header->link, &vcpu->free_pages);
Avi Kivityebeace82007-01-05 16:36:47 -08001267 ++vcpu->kvm->n_free_mmu_pages;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001268 }
Avi Kivity17ac10a2007-01-05 16:36:40 -08001269
1270 /*
1271 * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
1272 * Therefore we need to allocate shadow page tables in the first
1273 * 4GB of memory, which happens to fit the DMA32 zone.
1274 */
1275 page = alloc_page(GFP_KERNEL | __GFP_DMA32);
1276 if (!page)
1277 goto error_1;
1278 vcpu->mmu.pae_root = page_address(page);
1279 for (i = 0; i < 4; ++i)
1280 vcpu->mmu.pae_root[i] = INVALID_PAGE;
1281
Avi Kivity6aa8b732006-12-10 02:21:36 -08001282 return 0;
1283
1284error_1:
1285 free_mmu_pages(vcpu);
1286 return -ENOMEM;
1287}
1288
Ingo Molnar8018c272006-12-29 16:50:01 -08001289int kvm_mmu_create(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001290{
Avi Kivity6aa8b732006-12-10 02:21:36 -08001291 ASSERT(vcpu);
1292 ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
1293 ASSERT(list_empty(&vcpu->free_pages));
1294
Ingo Molnar8018c272006-12-29 16:50:01 -08001295 return alloc_mmu_pages(vcpu);
1296}
Avi Kivity6aa8b732006-12-10 02:21:36 -08001297
Ingo Molnar8018c272006-12-29 16:50:01 -08001298int kvm_mmu_setup(struct kvm_vcpu *vcpu)
1299{
1300 ASSERT(vcpu);
1301 ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
1302 ASSERT(!list_empty(&vcpu->free_pages));
Avi Kivity2c264952006-12-22 01:05:28 -08001303
Ingo Molnar8018c272006-12-29 16:50:01 -08001304 return init_kvm_mmu(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001305}
1306
1307void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
1308{
1309 ASSERT(vcpu);
1310
1311 destroy_kvm_mmu(vcpu);
1312 free_mmu_pages(vcpu);
Avi Kivity714b93d2007-01-05 16:36:53 -08001313 mmu_free_memory_caches(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001314}
1315
Avi Kivity714b93d2007-01-05 16:36:53 -08001316void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001317{
Avi Kivity714b93d2007-01-05 16:36:53 -08001318 struct kvm *kvm = vcpu->kvm;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001319 struct kvm_mmu_page *page;
1320
1321 list_for_each_entry(page, &kvm->active_mmu_pages, link) {
1322 int i;
1323 u64 *pt;
1324
1325 if (!test_bit(slot, &page->slot_bitmap))
1326 continue;
1327
1328 pt = __va(page->page_hpa);
1329 for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
1330 /* avoid RMW */
Avi Kivitycd4a4e52007-01-05 16:36:38 -08001331 if (pt[i] & PT_WRITABLE_MASK) {
Avi Kivity714b93d2007-01-05 16:36:53 -08001332 rmap_remove(vcpu, &pt[i]);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001333 pt[i] &= ~PT_WRITABLE_MASK;
Avi Kivitycd4a4e52007-01-05 16:36:38 -08001334 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08001335 }
1336}
Avi Kivity37a7d8b2007-01-05 16:36:56 -08001337
Dor Laore0fa8262007-03-30 13:06:33 +03001338void kvm_mmu_zap_all(struct kvm_vcpu *vcpu)
1339{
1340 destroy_kvm_mmu(vcpu);
1341
1342 while (!list_empty(&vcpu->kvm->active_mmu_pages)) {
1343 struct kvm_mmu_page *page;
1344
1345 page = container_of(vcpu->kvm->active_mmu_pages.next,
1346 struct kvm_mmu_page, link);
1347 kvm_mmu_zap_page(vcpu, page);
1348 }
1349
1350 mmu_free_memory_caches(vcpu);
1351 kvm_arch_ops->tlb_flush(vcpu);
1352 init_kvm_mmu(vcpu);
1353}
1354
Avi Kivityb5a33a72007-04-15 16:31:09 +03001355void kvm_mmu_module_exit(void)
1356{
1357 if (pte_chain_cache)
1358 kmem_cache_destroy(pte_chain_cache);
1359 if (rmap_desc_cache)
1360 kmem_cache_destroy(rmap_desc_cache);
1361}
1362
1363int kvm_mmu_module_init(void)
1364{
1365 pte_chain_cache = kmem_cache_create("kvm_pte_chain",
1366 sizeof(struct kvm_pte_chain),
1367 0, 0, NULL, NULL);
1368 if (!pte_chain_cache)
1369 goto nomem;
1370 rmap_desc_cache = kmem_cache_create("kvm_rmap_desc",
1371 sizeof(struct kvm_rmap_desc),
1372 0, 0, NULL, NULL);
1373 if (!rmap_desc_cache)
1374 goto nomem;
1375
1376 return 0;
1377
1378nomem:
1379 kvm_mmu_module_exit();
1380 return -ENOMEM;
1381}
1382
Avi Kivity37a7d8b2007-01-05 16:36:56 -08001383#ifdef AUDIT
1384
1385static const char *audit_msg;
1386
1387static gva_t canonicalize(gva_t gva)
1388{
1389#ifdef CONFIG_X86_64
1390 gva = (long long)(gva << 16) >> 16;
1391#endif
1392 return gva;
1393}
1394
1395static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
1396 gva_t va, int level)
1397{
1398 u64 *pt = __va(page_pte & PT64_BASE_ADDR_MASK);
1399 int i;
1400 gva_t va_delta = 1ul << (PAGE_SHIFT + 9 * (level - 1));
1401
1402 for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) {
1403 u64 ent = pt[i];
1404
1405 if (!ent & PT_PRESENT_MASK)
1406 continue;
1407
1408 va = canonicalize(va);
1409 if (level > 1)
1410 audit_mappings_page(vcpu, ent, va, level - 1);
1411 else {
1412 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va);
1413 hpa_t hpa = gpa_to_hpa(vcpu, gpa);
1414
1415 if ((ent & PT_PRESENT_MASK)
1416 && (ent & PT64_BASE_ADDR_MASK) != hpa)
1417 printk(KERN_ERR "audit error: (%s) levels %d"
1418 " gva %lx gpa %llx hpa %llx ent %llx\n",
1419 audit_msg, vcpu->mmu.root_level,
1420 va, gpa, hpa, ent);
1421 }
1422 }
1423}
1424
1425static void audit_mappings(struct kvm_vcpu *vcpu)
1426{
Avi Kivity1ea252a2007-03-08 11:48:09 +02001427 unsigned i;
Avi Kivity37a7d8b2007-01-05 16:36:56 -08001428
1429 if (vcpu->mmu.root_level == 4)
1430 audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4);
1431 else
1432 for (i = 0; i < 4; ++i)
1433 if (vcpu->mmu.pae_root[i] & PT_PRESENT_MASK)
1434 audit_mappings_page(vcpu,
1435 vcpu->mmu.pae_root[i],
1436 i << 30,
1437 2);
1438}
1439
1440static int count_rmaps(struct kvm_vcpu *vcpu)
1441{
1442 int nmaps = 0;
1443 int i, j, k;
1444
1445 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
1446 struct kvm_memory_slot *m = &vcpu->kvm->memslots[i];
1447 struct kvm_rmap_desc *d;
1448
1449 for (j = 0; j < m->npages; ++j) {
1450 struct page *page = m->phys_mem[j];
1451
1452 if (!page->private)
1453 continue;
1454 if (!(page->private & 1)) {
1455 ++nmaps;
1456 continue;
1457 }
1458 d = (struct kvm_rmap_desc *)(page->private & ~1ul);
1459 while (d) {
1460 for (k = 0; k < RMAP_EXT; ++k)
1461 if (d->shadow_ptes[k])
1462 ++nmaps;
1463 else
1464 break;
1465 d = d->more;
1466 }
1467 }
1468 }
1469 return nmaps;
1470}
1471
1472static int count_writable_mappings(struct kvm_vcpu *vcpu)
1473{
1474 int nmaps = 0;
1475 struct kvm_mmu_page *page;
1476 int i;
1477
1478 list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
1479 u64 *pt = __va(page->page_hpa);
1480
1481 if (page->role.level != PT_PAGE_TABLE_LEVEL)
1482 continue;
1483
1484 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
1485 u64 ent = pt[i];
1486
1487 if (!(ent & PT_PRESENT_MASK))
1488 continue;
1489 if (!(ent & PT_WRITABLE_MASK))
1490 continue;
1491 ++nmaps;
1492 }
1493 }
1494 return nmaps;
1495}
1496
1497static void audit_rmap(struct kvm_vcpu *vcpu)
1498{
1499 int n_rmap = count_rmaps(vcpu);
1500 int n_actual = count_writable_mappings(vcpu);
1501
1502 if (n_rmap != n_actual)
1503 printk(KERN_ERR "%s: (%s) rmap %d actual %d\n",
1504 __FUNCTION__, audit_msg, n_rmap, n_actual);
1505}
1506
1507static void audit_write_protection(struct kvm_vcpu *vcpu)
1508{
1509 struct kvm_mmu_page *page;
1510
1511 list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
1512 hfn_t hfn;
1513 struct page *pg;
1514
1515 if (page->role.metaphysical)
1516 continue;
1517
1518 hfn = gpa_to_hpa(vcpu, (gpa_t)page->gfn << PAGE_SHIFT)
1519 >> PAGE_SHIFT;
1520 pg = pfn_to_page(hfn);
1521 if (pg->private)
1522 printk(KERN_ERR "%s: (%s) shadow page has writable"
1523 " mappings: gfn %lx role %x\n",
1524 __FUNCTION__, audit_msg, page->gfn,
1525 page->role.word);
1526 }
1527}
1528
1529static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg)
1530{
1531 int olddbg = dbg;
1532
1533 dbg = 0;
1534 audit_msg = msg;
1535 audit_rmap(vcpu);
1536 audit_write_protection(vcpu);
1537 audit_mappings(vcpu);
1538 dbg = olddbg;
1539}
1540
1541#endif