| Avi Kivity | 6aa8b73 | 2006-12-10 02:21:36 -0800 | [diff] [blame] | 1 | /* | 
 | 2 |  * Kernel-based Virtual Machine driver for Linux | 
 | 3 |  * | 
 | 4 |  * This module enables machines with Intel VT-x extensions to run virtual | 
 | 5 |  * machines without emulation or binary translation. | 
 | 6 |  * | 
 | 7 |  * MMU support | 
 | 8 |  * | 
 | 9 |  * Copyright (C) 2006 Qumranet, Inc. | 
 | 10 |  * | 
 | 11 |  * Authors: | 
 | 12 |  *   Yaniv Kamay  <yaniv@qumranet.com> | 
 | 13 |  *   Avi Kivity   <avi@qumranet.com> | 
 | 14 |  * | 
 | 15 |  * This work is licensed under the terms of the GNU GPL, version 2.  See | 
 | 16 |  * the COPYING file in the top-level directory. | 
 | 17 |  * | 
 | 18 |  */ | 
 | 19 | #include <linux/types.h> | 
 | 20 | #include <linux/string.h> | 
 | 21 | #include <asm/page.h> | 
 | 22 | #include <linux/mm.h> | 
 | 23 | #include <linux/highmem.h> | 
 | 24 | #include <linux/module.h> | 
 | 25 |  | 
 | 26 | #include "vmx.h" | 
 | 27 | #include "kvm.h" | 
 | 28 |  | 
 | 29 | #define pgprintk(x...) do { } while (0) | 
 | 30 |  | 
 | 31 | #define ASSERT(x)							\ | 
 | 32 | 	if (!(x)) {							\ | 
 | 33 | 		printk(KERN_WARNING "assertion failed %s:%d: %s\n",	\ | 
 | 34 | 		       __FILE__, __LINE__, #x);				\ | 
 | 35 | 	} | 
 | 36 |  | 
 | 37 | #define PT64_ENT_PER_PAGE 512 | 
 | 38 | #define PT32_ENT_PER_PAGE 1024 | 
 | 39 |  | 
 | 40 | #define PT_WRITABLE_SHIFT 1 | 
 | 41 |  | 
 | 42 | #define PT_PRESENT_MASK (1ULL << 0) | 
 | 43 | #define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT) | 
 | 44 | #define PT_USER_MASK (1ULL << 2) | 
 | 45 | #define PT_PWT_MASK (1ULL << 3) | 
 | 46 | #define PT_PCD_MASK (1ULL << 4) | 
 | 47 | #define PT_ACCESSED_MASK (1ULL << 5) | 
 | 48 | #define PT_DIRTY_MASK (1ULL << 6) | 
 | 49 | #define PT_PAGE_SIZE_MASK (1ULL << 7) | 
 | 50 | #define PT_PAT_MASK (1ULL << 7) | 
 | 51 | #define PT_GLOBAL_MASK (1ULL << 8) | 
 | 52 | #define PT64_NX_MASK (1ULL << 63) | 
 | 53 |  | 
 | 54 | #define PT_PAT_SHIFT 7 | 
 | 55 | #define PT_DIR_PAT_SHIFT 12 | 
 | 56 | #define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT) | 
 | 57 |  | 
 | 58 | #define PT32_DIR_PSE36_SIZE 4 | 
 | 59 | #define PT32_DIR_PSE36_SHIFT 13 | 
 | 60 | #define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) | 
 | 61 |  | 
 | 62 |  | 
 | 63 | #define PT32_PTE_COPY_MASK \ | 
| Avi Kivity | 8c7bb72 | 2006-12-13 00:34:02 -0800 | [diff] [blame] | 64 | 	(PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_GLOBAL_MASK) | 
| Avi Kivity | 6aa8b73 | 2006-12-10 02:21:36 -0800 | [diff] [blame] | 65 |  | 
| Avi Kivity | 8c7bb72 | 2006-12-13 00:34:02 -0800 | [diff] [blame] | 66 | #define PT64_PTE_COPY_MASK (PT64_NX_MASK | PT32_PTE_COPY_MASK) | 
| Avi Kivity | 6aa8b73 | 2006-12-10 02:21:36 -0800 | [diff] [blame] | 67 |  | 
 | 68 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 | 
 | 69 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 | 
 | 70 |  | 
 | 71 | #define PT_SHADOW_PS_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) | 
 | 72 | #define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) | 
 | 73 |  | 
 | 74 | #define PT_SHADOW_WRITABLE_SHIFT (PT_FIRST_AVAIL_BITS_SHIFT + 1) | 
 | 75 | #define PT_SHADOW_WRITABLE_MASK (1ULL << PT_SHADOW_WRITABLE_SHIFT) | 
 | 76 |  | 
 | 77 | #define PT_SHADOW_USER_SHIFT (PT_SHADOW_WRITABLE_SHIFT + 1) | 
 | 78 | #define PT_SHADOW_USER_MASK (1ULL << (PT_SHADOW_USER_SHIFT)) | 
 | 79 |  | 
 | 80 | #define PT_SHADOW_BITS_OFFSET (PT_SHADOW_WRITABLE_SHIFT - PT_WRITABLE_SHIFT) | 
 | 81 |  | 
 | 82 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) | 
 | 83 |  | 
 | 84 | #define PT64_LEVEL_BITS 9 | 
 | 85 |  | 
 | 86 | #define PT64_LEVEL_SHIFT(level) \ | 
 | 87 | 		( PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS ) | 
 | 88 |  | 
 | 89 | #define PT64_LEVEL_MASK(level) \ | 
 | 90 | 		(((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level)) | 
 | 91 |  | 
 | 92 | #define PT64_INDEX(address, level)\ | 
 | 93 | 	(((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1)) | 
 | 94 |  | 
 | 95 |  | 
 | 96 | #define PT32_LEVEL_BITS 10 | 
 | 97 |  | 
 | 98 | #define PT32_LEVEL_SHIFT(level) \ | 
 | 99 | 		( PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS ) | 
 | 100 |  | 
 | 101 | #define PT32_LEVEL_MASK(level) \ | 
 | 102 | 		(((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level)) | 
 | 103 |  | 
 | 104 | #define PT32_INDEX(address, level)\ | 
 | 105 | 	(((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1)) | 
 | 106 |  | 
 | 107 |  | 
 | 108 | #define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & PAGE_MASK) | 
 | 109 | #define PT64_DIR_BASE_ADDR_MASK \ | 
 | 110 | 	(PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1)) | 
 | 111 |  | 
 | 112 | #define PT32_BASE_ADDR_MASK PAGE_MASK | 
 | 113 | #define PT32_DIR_BASE_ADDR_MASK \ | 
 | 114 | 	(PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1)) | 
 | 115 |  | 
 | 116 |  | 
 | 117 | #define PFERR_PRESENT_MASK (1U << 0) | 
 | 118 | #define PFERR_WRITE_MASK (1U << 1) | 
 | 119 | #define PFERR_USER_MASK (1U << 2) | 
 | 120 |  | 
 | 121 | #define PT64_ROOT_LEVEL 4 | 
 | 122 | #define PT32_ROOT_LEVEL 2 | 
 | 123 | #define PT32E_ROOT_LEVEL 3 | 
 | 124 |  | 
 | 125 | #define PT_DIRECTORY_LEVEL 2 | 
 | 126 | #define PT_PAGE_TABLE_LEVEL 1 | 
 | 127 |  | 
 | 128 | static int is_write_protection(struct kvm_vcpu *vcpu) | 
 | 129 | { | 
 | 130 | 	return vcpu->cr0 & CR0_WP_MASK; | 
 | 131 | } | 
 | 132 |  | 
 | 133 | static int is_cpuid_PSE36(void) | 
 | 134 | { | 
 | 135 | 	return 1; | 
 | 136 | } | 
 | 137 |  | 
 | 138 | static int is_present_pte(unsigned long pte) | 
 | 139 | { | 
 | 140 | 	return pte & PT_PRESENT_MASK; | 
 | 141 | } | 
 | 142 |  | 
 | 143 | static int is_writeble_pte(unsigned long pte) | 
 | 144 | { | 
 | 145 | 	return pte & PT_WRITABLE_MASK; | 
 | 146 | } | 
 | 147 |  | 
 | 148 | static int is_io_pte(unsigned long pte) | 
 | 149 | { | 
 | 150 | 	return pte & PT_SHADOW_IO_MARK; | 
 | 151 | } | 
 | 152 |  | 
 | 153 | static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) | 
 | 154 | { | 
 | 155 | 	struct kvm_mmu_page *page_head = page_header(page_hpa); | 
 | 156 |  | 
 | 157 | 	list_del(&page_head->link); | 
 | 158 | 	page_head->page_hpa = page_hpa; | 
 | 159 | 	list_add(&page_head->link, &vcpu->free_pages); | 
 | 160 | } | 
 | 161 |  | 
 | 162 | static int is_empty_shadow_page(hpa_t page_hpa) | 
 | 163 | { | 
 | 164 | 	u32 *pos; | 
 | 165 | 	u32 *end; | 
 | 166 | 	for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u32); | 
 | 167 | 		      pos != end; pos++) | 
 | 168 | 		if (*pos != 0) | 
 | 169 | 			return 0; | 
 | 170 | 	return 1; | 
 | 171 | } | 
 | 172 |  | 
 | 173 | static hpa_t kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte) | 
 | 174 | { | 
 | 175 | 	struct kvm_mmu_page *page; | 
 | 176 |  | 
 | 177 | 	if (list_empty(&vcpu->free_pages)) | 
 | 178 | 		return INVALID_PAGE; | 
 | 179 |  | 
 | 180 | 	page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); | 
 | 181 | 	list_del(&page->link); | 
 | 182 | 	list_add(&page->link, &vcpu->kvm->active_mmu_pages); | 
 | 183 | 	ASSERT(is_empty_shadow_page(page->page_hpa)); | 
 | 184 | 	page->slot_bitmap = 0; | 
 | 185 | 	page->global = 1; | 
 | 186 | 	page->parent_pte = parent_pte; | 
 | 187 | 	return page->page_hpa; | 
 | 188 | } | 
 | 189 |  | 
 | 190 | static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa) | 
 | 191 | { | 
 | 192 | 	int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT)); | 
 | 193 | 	struct kvm_mmu_page *page_head = page_header(__pa(pte)); | 
 | 194 |  | 
 | 195 | 	__set_bit(slot, &page_head->slot_bitmap); | 
 | 196 | } | 
 | 197 |  | 
 | 198 | hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa) | 
 | 199 | { | 
 | 200 | 	hpa_t hpa = gpa_to_hpa(vcpu, gpa); | 
 | 201 |  | 
 | 202 | 	return is_error_hpa(hpa) ? bad_page_address | (gpa & ~PAGE_MASK): hpa; | 
 | 203 | } | 
 | 204 |  | 
 | 205 | hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa) | 
 | 206 | { | 
 | 207 | 	struct kvm_memory_slot *slot; | 
 | 208 | 	struct page *page; | 
 | 209 |  | 
 | 210 | 	ASSERT((gpa & HPA_ERR_MASK) == 0); | 
 | 211 | 	slot = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT); | 
 | 212 | 	if (!slot) | 
 | 213 | 		return gpa | HPA_ERR_MASK; | 
 | 214 | 	page = gfn_to_page(slot, gpa >> PAGE_SHIFT); | 
 | 215 | 	return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT) | 
 | 216 | 		| (gpa & (PAGE_SIZE-1)); | 
 | 217 | } | 
 | 218 |  | 
 | 219 | hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva) | 
 | 220 | { | 
 | 221 | 	gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); | 
 | 222 |  | 
 | 223 | 	if (gpa == UNMAPPED_GVA) | 
 | 224 | 		return UNMAPPED_GVA; | 
 | 225 | 	return gpa_to_hpa(vcpu, gpa); | 
 | 226 | } | 
 | 227 |  | 
 | 228 |  | 
 | 229 | static void release_pt_page_64(struct kvm_vcpu *vcpu, hpa_t page_hpa, | 
 | 230 | 			       int level) | 
 | 231 | { | 
 | 232 | 	ASSERT(vcpu); | 
 | 233 | 	ASSERT(VALID_PAGE(page_hpa)); | 
 | 234 | 	ASSERT(level <= PT64_ROOT_LEVEL && level > 0); | 
 | 235 |  | 
 | 236 | 	if (level == 1) | 
 | 237 | 		memset(__va(page_hpa), 0, PAGE_SIZE); | 
 | 238 | 	else { | 
 | 239 | 		u64 *pos; | 
 | 240 | 		u64 *end; | 
 | 241 |  | 
 | 242 | 		for (pos = __va(page_hpa), end = pos + PT64_ENT_PER_PAGE; | 
 | 243 | 		     pos != end; pos++) { | 
 | 244 | 			u64 current_ent = *pos; | 
 | 245 |  | 
 | 246 | 			*pos = 0; | 
 | 247 | 			if (is_present_pte(current_ent)) | 
 | 248 | 				release_pt_page_64(vcpu, | 
 | 249 | 						  current_ent & | 
 | 250 | 						  PT64_BASE_ADDR_MASK, | 
 | 251 | 						  level - 1); | 
 | 252 | 		} | 
 | 253 | 	} | 
 | 254 | 	kvm_mmu_free_page(vcpu, page_hpa); | 
 | 255 | } | 
 | 256 |  | 
 | 257 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | 
 | 258 | { | 
 | 259 | } | 
 | 260 |  | 
 | 261 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) | 
 | 262 | { | 
 | 263 | 	int level = PT32E_ROOT_LEVEL; | 
 | 264 | 	hpa_t table_addr = vcpu->mmu.root_hpa; | 
 | 265 |  | 
 | 266 | 	for (; ; level--) { | 
 | 267 | 		u32 index = PT64_INDEX(v, level); | 
 | 268 | 		u64 *table; | 
 | 269 |  | 
 | 270 | 		ASSERT(VALID_PAGE(table_addr)); | 
 | 271 | 		table = __va(table_addr); | 
 | 272 |  | 
 | 273 | 		if (level == 1) { | 
 | 274 | 			mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT); | 
 | 275 | 			page_header_update_slot(vcpu->kvm, table, v); | 
 | 276 | 			table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK | | 
 | 277 | 								PT_USER_MASK; | 
 | 278 | 			return 0; | 
 | 279 | 		} | 
 | 280 |  | 
 | 281 | 		if (table[index] == 0) { | 
 | 282 | 			hpa_t new_table = kvm_mmu_alloc_page(vcpu, | 
 | 283 | 							     &table[index]); | 
 | 284 |  | 
 | 285 | 			if (!VALID_PAGE(new_table)) { | 
 | 286 | 				pgprintk("nonpaging_map: ENOMEM\n"); | 
 | 287 | 				return -ENOMEM; | 
 | 288 | 			} | 
 | 289 |  | 
 | 290 | 			if (level == PT32E_ROOT_LEVEL) | 
 | 291 | 				table[index] = new_table | PT_PRESENT_MASK; | 
 | 292 | 			else | 
 | 293 | 				table[index] = new_table | PT_PRESENT_MASK | | 
 | 294 | 						PT_WRITABLE_MASK | PT_USER_MASK; | 
 | 295 | 		} | 
 | 296 | 		table_addr = table[index] & PT64_BASE_ADDR_MASK; | 
 | 297 | 	} | 
 | 298 | } | 
 | 299 |  | 
 | 300 | static void nonpaging_flush(struct kvm_vcpu *vcpu) | 
 | 301 | { | 
 | 302 | 	hpa_t root = vcpu->mmu.root_hpa; | 
 | 303 |  | 
 | 304 | 	++kvm_stat.tlb_flush; | 
 | 305 | 	pgprintk("nonpaging_flush\n"); | 
 | 306 | 	ASSERT(VALID_PAGE(root)); | 
 | 307 | 	release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level); | 
 | 308 | 	root = kvm_mmu_alloc_page(vcpu, NULL); | 
 | 309 | 	ASSERT(VALID_PAGE(root)); | 
 | 310 | 	vcpu->mmu.root_hpa = root; | 
 | 311 | 	if (is_paging(vcpu)) | 
 | 312 | 		root |= (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)); | 
 | 313 | 	kvm_arch_ops->set_cr3(vcpu, root); | 
 | 314 | 	kvm_arch_ops->tlb_flush(vcpu); | 
 | 315 | } | 
 | 316 |  | 
 | 317 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) | 
 | 318 | { | 
 | 319 | 	return vaddr; | 
 | 320 | } | 
 | 321 |  | 
 | 322 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | 
 | 323 | 			       u32 error_code) | 
 | 324 | { | 
 | 325 | 	int ret; | 
 | 326 | 	gpa_t addr = gva; | 
 | 327 |  | 
 | 328 | 	ASSERT(vcpu); | 
 | 329 | 	ASSERT(VALID_PAGE(vcpu->mmu.root_hpa)); | 
 | 330 |  | 
 | 331 | 	for (;;) { | 
 | 332 | 	     hpa_t paddr; | 
 | 333 |  | 
 | 334 | 	     paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK); | 
 | 335 |  | 
 | 336 | 	     if (is_error_hpa(paddr)) | 
 | 337 | 		     return 1; | 
 | 338 |  | 
 | 339 | 	     ret = nonpaging_map(vcpu, addr & PAGE_MASK, paddr); | 
 | 340 | 	     if (ret) { | 
 | 341 | 		     nonpaging_flush(vcpu); | 
 | 342 | 		     continue; | 
 | 343 | 	     } | 
 | 344 | 	     break; | 
 | 345 | 	} | 
 | 346 | 	return ret; | 
 | 347 | } | 
 | 348 |  | 
 | 349 | static void nonpaging_inval_page(struct kvm_vcpu *vcpu, gva_t addr) | 
 | 350 | { | 
 | 351 | } | 
 | 352 |  | 
 | 353 | static void nonpaging_free(struct kvm_vcpu *vcpu) | 
 | 354 | { | 
 | 355 | 	hpa_t root; | 
 | 356 |  | 
 | 357 | 	ASSERT(vcpu); | 
 | 358 | 	root = vcpu->mmu.root_hpa; | 
 | 359 | 	if (VALID_PAGE(root)) | 
 | 360 | 		release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level); | 
 | 361 | 	vcpu->mmu.root_hpa = INVALID_PAGE; | 
 | 362 | } | 
 | 363 |  | 
 | 364 | static int nonpaging_init_context(struct kvm_vcpu *vcpu) | 
 | 365 | { | 
 | 366 | 	struct kvm_mmu *context = &vcpu->mmu; | 
 | 367 |  | 
 | 368 | 	context->new_cr3 = nonpaging_new_cr3; | 
 | 369 | 	context->page_fault = nonpaging_page_fault; | 
 | 370 | 	context->inval_page = nonpaging_inval_page; | 
 | 371 | 	context->gva_to_gpa = nonpaging_gva_to_gpa; | 
 | 372 | 	context->free = nonpaging_free; | 
 | 373 | 	context->root_level = PT32E_ROOT_LEVEL; | 
 | 374 | 	context->shadow_root_level = PT32E_ROOT_LEVEL; | 
 | 375 | 	context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL); | 
 | 376 | 	ASSERT(VALID_PAGE(context->root_hpa)); | 
 | 377 | 	kvm_arch_ops->set_cr3(vcpu, context->root_hpa); | 
 | 378 | 	return 0; | 
 | 379 | } | 
 | 380 |  | 
 | 381 |  | 
 | 382 | static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | 
 | 383 | { | 
 | 384 | 	struct kvm_mmu_page *page, *npage; | 
 | 385 |  | 
 | 386 | 	list_for_each_entry_safe(page, npage, &vcpu->kvm->active_mmu_pages, | 
 | 387 | 				 link) { | 
 | 388 | 		if (page->global) | 
 | 389 | 			continue; | 
 | 390 |  | 
 | 391 | 		if (!page->parent_pte) | 
 | 392 | 			continue; | 
 | 393 |  | 
 | 394 | 		*page->parent_pte = 0; | 
 | 395 | 		release_pt_page_64(vcpu, page->page_hpa, 1); | 
 | 396 | 	} | 
 | 397 | 	++kvm_stat.tlb_flush; | 
 | 398 | 	kvm_arch_ops->tlb_flush(vcpu); | 
 | 399 | } | 
 | 400 |  | 
 | 401 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | 
 | 402 | { | 
 | 403 | 	kvm_mmu_flush_tlb(vcpu); | 
 | 404 | } | 
 | 405 |  | 
 | 406 | static void mark_pagetable_nonglobal(void *shadow_pte) | 
 | 407 | { | 
 | 408 | 	page_header(__pa(shadow_pte))->global = 0; | 
 | 409 | } | 
 | 410 |  | 
 | 411 | static inline void set_pte_common(struct kvm_vcpu *vcpu, | 
 | 412 | 			     u64 *shadow_pte, | 
 | 413 | 			     gpa_t gaddr, | 
 | 414 | 			     int dirty, | 
 | 415 | 			     u64 access_bits) | 
 | 416 | { | 
 | 417 | 	hpa_t paddr; | 
 | 418 |  | 
 | 419 | 	*shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET; | 
 | 420 | 	if (!dirty) | 
 | 421 | 		access_bits &= ~PT_WRITABLE_MASK; | 
 | 422 |  | 
 | 423 | 	if (access_bits & PT_WRITABLE_MASK) | 
 | 424 | 		mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); | 
 | 425 |  | 
 | 426 | 	*shadow_pte |= access_bits; | 
 | 427 |  | 
 | 428 | 	paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK); | 
 | 429 |  | 
 | 430 | 	if (!(*shadow_pte & PT_GLOBAL_MASK)) | 
 | 431 | 		mark_pagetable_nonglobal(shadow_pte); | 
 | 432 |  | 
 | 433 | 	if (is_error_hpa(paddr)) { | 
 | 434 | 		*shadow_pte |= gaddr; | 
 | 435 | 		*shadow_pte |= PT_SHADOW_IO_MARK; | 
 | 436 | 		*shadow_pte &= ~PT_PRESENT_MASK; | 
 | 437 | 	} else { | 
 | 438 | 		*shadow_pte |= paddr; | 
 | 439 | 		page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); | 
 | 440 | 	} | 
 | 441 | } | 
 | 442 |  | 
 | 443 | static void inject_page_fault(struct kvm_vcpu *vcpu, | 
 | 444 | 			      u64 addr, | 
 | 445 | 			      u32 err_code) | 
 | 446 | { | 
 | 447 | 	kvm_arch_ops->inject_page_fault(vcpu, addr, err_code); | 
 | 448 | } | 
 | 449 |  | 
 | 450 | static inline int fix_read_pf(u64 *shadow_ent) | 
 | 451 | { | 
 | 452 | 	if ((*shadow_ent & PT_SHADOW_USER_MASK) && | 
 | 453 | 	    !(*shadow_ent & PT_USER_MASK)) { | 
 | 454 | 		/* | 
 | 455 | 		 * If supervisor write protect is disabled, we shadow kernel | 
 | 456 | 		 * pages as user pages so we can trap the write access. | 
 | 457 | 		 */ | 
 | 458 | 		*shadow_ent |= PT_USER_MASK; | 
 | 459 | 		*shadow_ent &= ~PT_WRITABLE_MASK; | 
 | 460 |  | 
 | 461 | 		return 1; | 
 | 462 |  | 
 | 463 | 	} | 
 | 464 | 	return 0; | 
 | 465 | } | 
 | 466 |  | 
 | 467 | static int may_access(u64 pte, int write, int user) | 
 | 468 | { | 
 | 469 |  | 
 | 470 | 	if (user && !(pte & PT_USER_MASK)) | 
 | 471 | 		return 0; | 
 | 472 | 	if (write && !(pte & PT_WRITABLE_MASK)) | 
 | 473 | 		return 0; | 
 | 474 | 	return 1; | 
 | 475 | } | 
 | 476 |  | 
 | 477 | /* | 
 | 478 |  * Remove a shadow pte. | 
 | 479 |  */ | 
 | 480 | static void paging_inval_page(struct kvm_vcpu *vcpu, gva_t addr) | 
 | 481 | { | 
 | 482 | 	hpa_t page_addr = vcpu->mmu.root_hpa; | 
 | 483 | 	int level = vcpu->mmu.shadow_root_level; | 
 | 484 |  | 
 | 485 | 	++kvm_stat.invlpg; | 
 | 486 |  | 
 | 487 | 	for (; ; level--) { | 
 | 488 | 		u32 index = PT64_INDEX(addr, level); | 
 | 489 | 		u64 *table = __va(page_addr); | 
 | 490 |  | 
 | 491 | 		if (level == PT_PAGE_TABLE_LEVEL ) { | 
 | 492 | 			table[index] = 0; | 
 | 493 | 			return; | 
 | 494 | 		} | 
 | 495 |  | 
 | 496 | 		if (!is_present_pte(table[index])) | 
 | 497 | 			return; | 
 | 498 |  | 
 | 499 | 		page_addr = table[index] & PT64_BASE_ADDR_MASK; | 
 | 500 |  | 
 | 501 | 		if (level == PT_DIRECTORY_LEVEL && | 
 | 502 | 			  (table[index] & PT_SHADOW_PS_MARK)) { | 
 | 503 | 			table[index] = 0; | 
 | 504 | 			release_pt_page_64(vcpu, page_addr, PT_PAGE_TABLE_LEVEL); | 
 | 505 |  | 
 | 506 | 			kvm_arch_ops->tlb_flush(vcpu); | 
 | 507 | 			return; | 
 | 508 | 		} | 
 | 509 | 	} | 
 | 510 | } | 
 | 511 |  | 
 | 512 | static void paging_free(struct kvm_vcpu *vcpu) | 
 | 513 | { | 
 | 514 | 	nonpaging_free(vcpu); | 
 | 515 | } | 
 | 516 |  | 
 | 517 | #define PTTYPE 64 | 
 | 518 | #include "paging_tmpl.h" | 
 | 519 | #undef PTTYPE | 
 | 520 |  | 
 | 521 | #define PTTYPE 32 | 
 | 522 | #include "paging_tmpl.h" | 
 | 523 | #undef PTTYPE | 
 | 524 |  | 
 | 525 | static int paging64_init_context(struct kvm_vcpu *vcpu) | 
 | 526 | { | 
 | 527 | 	struct kvm_mmu *context = &vcpu->mmu; | 
 | 528 |  | 
 | 529 | 	ASSERT(is_pae(vcpu)); | 
 | 530 | 	context->new_cr3 = paging_new_cr3; | 
 | 531 | 	context->page_fault = paging64_page_fault; | 
 | 532 | 	context->inval_page = paging_inval_page; | 
 | 533 | 	context->gva_to_gpa = paging64_gva_to_gpa; | 
 | 534 | 	context->free = paging_free; | 
 | 535 | 	context->root_level = PT64_ROOT_LEVEL; | 
 | 536 | 	context->shadow_root_level = PT64_ROOT_LEVEL; | 
 | 537 | 	context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL); | 
 | 538 | 	ASSERT(VALID_PAGE(context->root_hpa)); | 
 | 539 | 	kvm_arch_ops->set_cr3(vcpu, context->root_hpa | | 
 | 540 | 		    (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); | 
 | 541 | 	return 0; | 
 | 542 | } | 
 | 543 |  | 
 | 544 | static int paging32_init_context(struct kvm_vcpu *vcpu) | 
 | 545 | { | 
 | 546 | 	struct kvm_mmu *context = &vcpu->mmu; | 
 | 547 |  | 
 | 548 | 	context->new_cr3 = paging_new_cr3; | 
 | 549 | 	context->page_fault = paging32_page_fault; | 
 | 550 | 	context->inval_page = paging_inval_page; | 
 | 551 | 	context->gva_to_gpa = paging32_gva_to_gpa; | 
 | 552 | 	context->free = paging_free; | 
 | 553 | 	context->root_level = PT32_ROOT_LEVEL; | 
 | 554 | 	context->shadow_root_level = PT32E_ROOT_LEVEL; | 
 | 555 | 	context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL); | 
 | 556 | 	ASSERT(VALID_PAGE(context->root_hpa)); | 
 | 557 | 	kvm_arch_ops->set_cr3(vcpu, context->root_hpa | | 
 | 558 | 		    (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); | 
 | 559 | 	return 0; | 
 | 560 | } | 
 | 561 |  | 
 | 562 | static int paging32E_init_context(struct kvm_vcpu *vcpu) | 
 | 563 | { | 
 | 564 | 	int ret; | 
 | 565 |  | 
 | 566 | 	if ((ret = paging64_init_context(vcpu))) | 
 | 567 | 		return ret; | 
 | 568 |  | 
 | 569 | 	vcpu->mmu.root_level = PT32E_ROOT_LEVEL; | 
 | 570 | 	vcpu->mmu.shadow_root_level = PT32E_ROOT_LEVEL; | 
 | 571 | 	return 0; | 
 | 572 | } | 
 | 573 |  | 
 | 574 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) | 
 | 575 | { | 
 | 576 | 	ASSERT(vcpu); | 
 | 577 | 	ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); | 
 | 578 |  | 
 | 579 | 	if (!is_paging(vcpu)) | 
 | 580 | 		return nonpaging_init_context(vcpu); | 
 | 581 | 	else if (kvm_arch_ops->is_long_mode(vcpu)) | 
 | 582 | 		return paging64_init_context(vcpu); | 
 | 583 | 	else if (is_pae(vcpu)) | 
 | 584 | 		return paging32E_init_context(vcpu); | 
 | 585 | 	else | 
 | 586 | 		return paging32_init_context(vcpu); | 
 | 587 | } | 
 | 588 |  | 
 | 589 | static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) | 
 | 590 | { | 
 | 591 | 	ASSERT(vcpu); | 
 | 592 | 	if (VALID_PAGE(vcpu->mmu.root_hpa)) { | 
 | 593 | 		vcpu->mmu.free(vcpu); | 
 | 594 | 		vcpu->mmu.root_hpa = INVALID_PAGE; | 
 | 595 | 	} | 
 | 596 | } | 
 | 597 |  | 
 | 598 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) | 
 | 599 | { | 
 | 600 | 	destroy_kvm_mmu(vcpu); | 
 | 601 | 	return init_kvm_mmu(vcpu); | 
 | 602 | } | 
 | 603 |  | 
 | 604 | static void free_mmu_pages(struct kvm_vcpu *vcpu) | 
 | 605 | { | 
 | 606 | 	while (!list_empty(&vcpu->free_pages)) { | 
 | 607 | 		struct kvm_mmu_page *page; | 
 | 608 |  | 
 | 609 | 		page = list_entry(vcpu->free_pages.next, | 
 | 610 | 				  struct kvm_mmu_page, link); | 
 | 611 | 		list_del(&page->link); | 
 | 612 | 		__free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT)); | 
 | 613 | 		page->page_hpa = INVALID_PAGE; | 
 | 614 | 	} | 
 | 615 | } | 
 | 616 |  | 
 | 617 | static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | 
 | 618 | { | 
 | 619 | 	int i; | 
 | 620 |  | 
 | 621 | 	ASSERT(vcpu); | 
 | 622 |  | 
 | 623 | 	for (i = 0; i < KVM_NUM_MMU_PAGES; i++) { | 
 | 624 | 		struct page *page; | 
 | 625 | 		struct kvm_mmu_page *page_header = &vcpu->page_header_buf[i]; | 
 | 626 |  | 
 | 627 | 		INIT_LIST_HEAD(&page_header->link); | 
 | 628 | 		if ((page = alloc_page(GFP_KVM_MMU)) == NULL) | 
 | 629 | 			goto error_1; | 
 | 630 | 		page->private = (unsigned long)page_header; | 
 | 631 | 		page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT; | 
 | 632 | 		memset(__va(page_header->page_hpa), 0, PAGE_SIZE); | 
 | 633 | 		list_add(&page_header->link, &vcpu->free_pages); | 
 | 634 | 	} | 
 | 635 | 	return 0; | 
 | 636 |  | 
 | 637 | error_1: | 
 | 638 | 	free_mmu_pages(vcpu); | 
 | 639 | 	return -ENOMEM; | 
 | 640 | } | 
 | 641 |  | 
 | 642 | int kvm_mmu_init(struct kvm_vcpu *vcpu) | 
 | 643 | { | 
 | 644 | 	int r; | 
 | 645 |  | 
 | 646 | 	ASSERT(vcpu); | 
 | 647 | 	ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); | 
 | 648 | 	ASSERT(list_empty(&vcpu->free_pages)); | 
 | 649 |  | 
 | 650 | 	if ((r = alloc_mmu_pages(vcpu))) | 
 | 651 | 		return r; | 
 | 652 |  | 
 | 653 | 	if ((r = init_kvm_mmu(vcpu))) { | 
 | 654 | 		free_mmu_pages(vcpu); | 
 | 655 | 		return r; | 
 | 656 | 	} | 
 | 657 | 	return 0; | 
 | 658 | } | 
 | 659 |  | 
 | 660 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu) | 
 | 661 | { | 
 | 662 | 	ASSERT(vcpu); | 
 | 663 |  | 
 | 664 | 	destroy_kvm_mmu(vcpu); | 
 | 665 | 	free_mmu_pages(vcpu); | 
 | 666 | } | 
 | 667 |  | 
 | 668 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | 
 | 669 | { | 
 | 670 | 	struct kvm_mmu_page *page; | 
 | 671 |  | 
 | 672 | 	list_for_each_entry(page, &kvm->active_mmu_pages, link) { | 
 | 673 | 		int i; | 
 | 674 | 		u64 *pt; | 
 | 675 |  | 
 | 676 | 		if (!test_bit(slot, &page->slot_bitmap)) | 
 | 677 | 			continue; | 
 | 678 |  | 
 | 679 | 		pt = __va(page->page_hpa); | 
 | 680 | 		for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | 
 | 681 | 			/* avoid RMW */ | 
 | 682 | 			if (pt[i] & PT_WRITABLE_MASK) | 
 | 683 | 				pt[i] &= ~PT_WRITABLE_MASK; | 
 | 684 |  | 
 | 685 | 	} | 
 | 686 | } |