blob: 1a56d76560ded4c36e79c6acb781adbfb7f297b7 [file] [log] [blame]
Avi Kivity6aa8b732006-12-10 02:21:36 -08001/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * This module enables machines with Intel VT-x extensions to run virtual
5 * machines without emulation or binary translation.
6 *
7 * Copyright (C) 2006 Qumranet, Inc.
8 *
9 * Authors:
10 * Avi Kivity <avi@qumranet.com>
11 * Yaniv Kamay <yaniv@qumranet.com>
12 *
13 * This work is licensed under the terms of the GNU GPL, version 2. See
14 * the COPYING file in the top-level directory.
15 *
16 */
17
18#include "kvm.h"
Carsten Otte043405e2007-10-10 17:16:19 +020019#include "x86.h"
Avi Kivitye4956062007-06-28 14:15:57 -040020#include "x86_emulate.h"
Eddie Dong85f455f2007-07-06 12:20:49 +030021#include "irq.h"
Avi Kivity6aa8b732006-12-10 02:21:36 -080022
23#include <linux/kvm.h>
24#include <linux/module.h>
25#include <linux/errno.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080026#include <linux/percpu.h>
27#include <linux/gfp.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080028#include <linux/mm.h>
29#include <linux/miscdevice.h>
30#include <linux/vmalloc.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080031#include <linux/reboot.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080032#include <linux/debugfs.h>
33#include <linux/highmem.h>
34#include <linux/file.h>
Avi Kivity59ae6c62007-02-12 00:54:48 -080035#include <linux/sysdev.h>
Avi Kivity774c47f2007-02-12 00:54:47 -080036#include <linux/cpu.h>
Alexey Dobriyane8edc6e2007-05-21 01:22:52 +040037#include <linux/sched.h>
Avi Kivityd9e368d2007-06-07 19:18:30 +030038#include <linux/cpumask.h>
39#include <linux/smp.h>
Avi Kivityd6d28162007-06-28 08:38:16 -040040#include <linux/anon_inodes.h>
Avi Kivity04d2cc72007-09-10 18:10:54 +030041#include <linux/profile.h>
Anthony Liguori7aa81cc2007-09-17 14:57:50 -050042#include <linux/kvm_para.h>
Izik Eidus6fc138d2007-10-09 19:20:39 +020043#include <linux/pagemap.h>
Anthony Liguori8d4e1282007-10-18 09:59:34 -050044#include <linux/mman.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080045
Avi Kivitye4956062007-06-28 14:15:57 -040046#include <asm/processor.h>
47#include <asm/msr.h>
48#include <asm/io.h>
49#include <asm/uaccess.h>
50#include <asm/desc.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080051
52MODULE_AUTHOR("Qumranet");
53MODULE_LICENSE("GPL");
54
Avi Kivity133de902007-02-12 00:54:44 -080055static DEFINE_SPINLOCK(kvm_lock);
56static LIST_HEAD(vm_list);
57
Avi Kivity1b6c0162007-05-24 13:03:52 +030058static cpumask_t cpus_hardware_enabled;
59
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +030060struct kvm_x86_ops *kvm_x86_ops;
Rusty Russellc16f8622007-07-30 21:12:19 +100061struct kmem_cache *kvm_vcpu_cache;
62EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
Avi Kivity1165f5f2007-04-19 17:27:43 +030063
Avi Kivity15ad7142007-07-11 18:17:21 +030064static __read_mostly struct preempt_ops kvm_preempt_ops;
65
Avi Kivity1165f5f2007-04-19 17:27:43 +030066#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
Avi Kivity6aa8b732006-12-10 02:21:36 -080067
68static struct kvm_stats_debugfs_item {
69 const char *name;
Avi Kivity1165f5f2007-04-19 17:27:43 +030070 int offset;
Avi Kivity6aa8b732006-12-10 02:21:36 -080071 struct dentry *dentry;
72} debugfs_entries[] = {
Avi Kivity1165f5f2007-04-19 17:27:43 +030073 { "pf_fixed", STAT_OFFSET(pf_fixed) },
74 { "pf_guest", STAT_OFFSET(pf_guest) },
75 { "tlb_flush", STAT_OFFSET(tlb_flush) },
76 { "invlpg", STAT_OFFSET(invlpg) },
77 { "exits", STAT_OFFSET(exits) },
78 { "io_exits", STAT_OFFSET(io_exits) },
79 { "mmio_exits", STAT_OFFSET(mmio_exits) },
80 { "signal_exits", STAT_OFFSET(signal_exits) },
81 { "irq_window", STAT_OFFSET(irq_window_exits) },
82 { "halt_exits", STAT_OFFSET(halt_exits) },
Eddie Dongb6958ce2007-07-18 12:15:21 +030083 { "halt_wakeup", STAT_OFFSET(halt_wakeup) },
Avi Kivity1165f5f2007-04-19 17:27:43 +030084 { "request_irq", STAT_OFFSET(request_irq_exits) },
85 { "irq_exits", STAT_OFFSET(irq_exits) },
Avi Kivitye6adf282007-04-30 16:07:54 +030086 { "light_exits", STAT_OFFSET(light_exits) },
Eddie Dong2cc51562007-05-21 07:28:09 +030087 { "efer_reload", STAT_OFFSET(efer_reload) },
Avi Kivity1165f5f2007-04-19 17:27:43 +030088 { NULL }
Avi Kivity6aa8b732006-12-10 02:21:36 -080089};
90
91static struct dentry *debugfs_dir;
92
Avi Kivitybccf2152007-02-21 18:04:26 +020093static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
94 unsigned long arg);
95
James Morris5aacf0c2006-12-22 01:04:55 -080096static inline int valid_vcpu(int n)
97{
98 return likely(n >= 0 && n < KVM_MAX_VCPUS);
99}
100
Avi Kivity7702fd12007-06-14 16:27:40 +0300101void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
102{
103 if (!vcpu->fpu_active || vcpu->guest_fpu_loaded)
104 return;
105
106 vcpu->guest_fpu_loaded = 1;
Rusty Russellb114b082007-07-30 21:13:43 +1000107 fx_save(&vcpu->host_fx_image);
108 fx_restore(&vcpu->guest_fx_image);
Avi Kivity7702fd12007-06-14 16:27:40 +0300109}
110EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
111
112void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
113{
114 if (!vcpu->guest_fpu_loaded)
115 return;
116
117 vcpu->guest_fpu_loaded = 0;
Rusty Russellb114b082007-07-30 21:13:43 +1000118 fx_save(&vcpu->guest_fx_image);
119 fx_restore(&vcpu->host_fx_image);
Avi Kivity7702fd12007-06-14 16:27:40 +0300120}
121EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
122
Avi Kivity6aa8b732006-12-10 02:21:36 -0800123/*
124 * Switches to specified vcpu, until a matching vcpu_put()
125 */
Carsten Otte313a3dc2007-10-11 19:16:52 +0200126void vcpu_load(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800127{
Avi Kivity15ad7142007-07-11 18:17:21 +0300128 int cpu;
129
Avi Kivitybccf2152007-02-21 18:04:26 +0200130 mutex_lock(&vcpu->mutex);
Avi Kivity15ad7142007-07-11 18:17:21 +0300131 cpu = get_cpu();
132 preempt_notifier_register(&vcpu->preempt_notifier);
Carsten Otte313a3dc2007-10-11 19:16:52 +0200133 kvm_arch_vcpu_load(vcpu, cpu);
Avi Kivity15ad7142007-07-11 18:17:21 +0300134 put_cpu();
Avi Kivitybccf2152007-02-21 18:04:26 +0200135}
136
Carsten Otte313a3dc2007-10-11 19:16:52 +0200137void vcpu_put(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800138{
Avi Kivity15ad7142007-07-11 18:17:21 +0300139 preempt_disable();
Carsten Otte313a3dc2007-10-11 19:16:52 +0200140 kvm_arch_vcpu_put(vcpu);
Avi Kivity15ad7142007-07-11 18:17:21 +0300141 preempt_notifier_unregister(&vcpu->preempt_notifier);
142 preempt_enable();
Avi Kivity6aa8b732006-12-10 02:21:36 -0800143 mutex_unlock(&vcpu->mutex);
144}
145
Avi Kivityd9e368d2007-06-07 19:18:30 +0300146static void ack_flush(void *_completed)
147{
Avi Kivityd9e368d2007-06-07 19:18:30 +0300148}
149
150void kvm_flush_remote_tlbs(struct kvm *kvm)
151{
Laurent Vivier49d3bd72007-10-22 16:33:07 +0200152 int i, cpu;
Avi Kivityd9e368d2007-06-07 19:18:30 +0300153 cpumask_t cpus;
154 struct kvm_vcpu *vcpu;
Avi Kivityd9e368d2007-06-07 19:18:30 +0300155
Avi Kivityd9e368d2007-06-07 19:18:30 +0300156 cpus_clear(cpus);
Rusty Russellfb3f0f52007-07-27 17:16:56 +1000157 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
158 vcpu = kvm->vcpus[i];
159 if (!vcpu)
160 continue;
Avi Kivity3176bc32007-10-16 17:22:08 +0200161 if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
Avi Kivityd9e368d2007-06-07 19:18:30 +0300162 continue;
163 cpu = vcpu->cpu;
164 if (cpu != -1 && cpu != raw_smp_processor_id())
Laurent Vivier49d3bd72007-10-22 16:33:07 +0200165 cpu_set(cpu, cpus);
Avi Kivityd9e368d2007-06-07 19:18:30 +0300166 }
Laurent Vivier49d3bd72007-10-22 16:33:07 +0200167 smp_call_function_mask(cpus, ack_flush, NULL, 1);
Avi Kivityd9e368d2007-06-07 19:18:30 +0300168}
169
Rusty Russellfb3f0f52007-07-27 17:16:56 +1000170int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
171{
172 struct page *page;
173 int r;
174
175 mutex_init(&vcpu->mutex);
176 vcpu->cpu = -1;
177 vcpu->mmu.root_hpa = INVALID_PAGE;
178 vcpu->kvm = kvm;
179 vcpu->vcpu_id = id;
He, Qingc5ec1532007-09-03 17:07:41 +0300180 if (!irqchip_in_kernel(kvm) || id == 0)
181 vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
182 else
183 vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED;
Eddie Dongb6958ce2007-07-18 12:15:21 +0300184 init_waitqueue_head(&vcpu->wq);
Rusty Russellfb3f0f52007-07-27 17:16:56 +1000185
186 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
187 if (!page) {
188 r = -ENOMEM;
189 goto fail;
190 }
191 vcpu->run = page_address(page);
192
193 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
194 if (!page) {
195 r = -ENOMEM;
196 goto fail_free_run;
197 }
198 vcpu->pio_data = page_address(page);
199
Rusty Russellfb3f0f52007-07-27 17:16:56 +1000200 r = kvm_mmu_create(vcpu);
201 if (r < 0)
202 goto fail_free_pio_data;
203
Rusty Russell76fafa52007-10-08 10:50:48 +1000204 if (irqchip_in_kernel(kvm)) {
205 r = kvm_create_lapic(vcpu);
206 if (r < 0)
207 goto fail_mmu_destroy;
208 }
209
Rusty Russellfb3f0f52007-07-27 17:16:56 +1000210 return 0;
211
Rusty Russell76fafa52007-10-08 10:50:48 +1000212fail_mmu_destroy:
213 kvm_mmu_destroy(vcpu);
Rusty Russellfb3f0f52007-07-27 17:16:56 +1000214fail_free_pio_data:
215 free_page((unsigned long)vcpu->pio_data);
216fail_free_run:
217 free_page((unsigned long)vcpu->run);
218fail:
Rusty Russell76fafa52007-10-08 10:50:48 +1000219 return r;
Rusty Russellfb3f0f52007-07-27 17:16:56 +1000220}
221EXPORT_SYMBOL_GPL(kvm_vcpu_init);
222
223void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
224{
Rusty Russelld5894442007-10-08 10:48:30 +1000225 kvm_free_lapic(vcpu);
Rusty Russellfb3f0f52007-07-27 17:16:56 +1000226 kvm_mmu_destroy(vcpu);
227 free_page((unsigned long)vcpu->pio_data);
228 free_page((unsigned long)vcpu->run);
229}
230EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
231
Avi Kivityf17abe92007-02-21 19:28:04 +0200232static struct kvm *kvm_create_vm(void)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800233{
234 struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800235
236 if (!kvm)
Avi Kivityf17abe92007-02-21 19:28:04 +0200237 return ERR_PTR(-ENOMEM);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800238
Eddie Dong74906342007-06-19 18:05:03 +0300239 kvm_io_bus_init(&kvm->pio_bus);
Shaohua Li11ec2802007-07-23 14:51:37 +0800240 mutex_init(&kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800241 INIT_LIST_HEAD(&kvm->active_mmu_pages);
Gregory Haskins2eeb2e92007-05-31 14:08:53 -0400242 kvm_io_bus_init(&kvm->mmio_bus);
Rusty Russell5e58cfe2007-07-23 17:08:21 +1000243 spin_lock(&kvm_lock);
244 list_add(&kvm->vm_list, &vm_list);
245 spin_unlock(&kvm_lock);
Avi Kivityf17abe92007-02-21 19:28:04 +0200246 return kvm;
247}
248
Avi Kivity6aa8b732006-12-10 02:21:36 -0800249/*
250 * Free any memory in @free but not in @dont.
251 */
252static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
253 struct kvm_memory_slot *dont)
254{
Izik Eidus290fc382007-09-27 14:11:22 +0200255 if (!dont || free->rmap != dont->rmap)
256 vfree(free->rmap);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800257
258 if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
259 vfree(free->dirty_bitmap);
260
Avi Kivity6aa8b732006-12-10 02:21:36 -0800261 free->npages = 0;
Al Viro8b6d44c2007-02-09 16:38:40 +0000262 free->dirty_bitmap = NULL;
Anthony Liguori8d4e1282007-10-18 09:59:34 -0500263 free->rmap = NULL;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800264}
265
266static void kvm_free_physmem(struct kvm *kvm)
267{
268 int i;
269
270 for (i = 0; i < kvm->nmemslots; ++i)
Al Viro8b6d44c2007-02-09 16:38:40 +0000271 kvm_free_physmem_slot(&kvm->memslots[i], NULL);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800272}
273
Avi Kivity039576c2007-03-20 12:46:50 +0200274static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
275{
276 int i;
277
Rusty Russell3077c4512007-07-30 16:41:57 +1000278 for (i = 0; i < ARRAY_SIZE(vcpu->pio.guest_pages); ++i)
Avi Kivity039576c2007-03-20 12:46:50 +0200279 if (vcpu->pio.guest_pages[i]) {
Izik Eidus8a7ae052007-10-18 11:09:33 +0200280 kvm_release_page(vcpu->pio.guest_pages[i]);
Avi Kivity039576c2007-03-20 12:46:50 +0200281 vcpu->pio.guest_pages[i] = NULL;
282 }
283}
284
Avi Kivity7b53aa52007-06-05 12:17:03 +0300285static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
286{
Avi Kivity7b53aa52007-06-05 12:17:03 +0300287 vcpu_load(vcpu);
288 kvm_mmu_unload(vcpu);
289 vcpu_put(vcpu);
290}
291
Avi Kivity6aa8b732006-12-10 02:21:36 -0800292static void kvm_free_vcpus(struct kvm *kvm)
293{
294 unsigned int i;
295
Avi Kivity7b53aa52007-06-05 12:17:03 +0300296 /*
297 * Unpin any mmu pages first.
298 */
299 for (i = 0; i < KVM_MAX_VCPUS; ++i)
Rusty Russellfb3f0f52007-07-27 17:16:56 +1000300 if (kvm->vcpus[i])
301 kvm_unload_vcpu_mmu(kvm->vcpus[i]);
302 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
303 if (kvm->vcpus[i]) {
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +0300304 kvm_x86_ops->vcpu_free(kvm->vcpus[i]);
Rusty Russellfb3f0f52007-07-27 17:16:56 +1000305 kvm->vcpus[i] = NULL;
306 }
307 }
308
Avi Kivity6aa8b732006-12-10 02:21:36 -0800309}
310
Avi Kivityf17abe92007-02-21 19:28:04 +0200311static void kvm_destroy_vm(struct kvm *kvm)
312{
Avi Kivity133de902007-02-12 00:54:44 -0800313 spin_lock(&kvm_lock);
314 list_del(&kvm->vm_list);
315 spin_unlock(&kvm_lock);
Eddie Dong74906342007-06-19 18:05:03 +0300316 kvm_io_bus_destroy(&kvm->pio_bus);
Gregory Haskins2eeb2e92007-05-31 14:08:53 -0400317 kvm_io_bus_destroy(&kvm->mmio_bus);
Eddie Dong85f455f2007-07-06 12:20:49 +0300318 kfree(kvm->vpic);
Eddie Dong1fd4f2a2007-07-18 12:03:39 +0300319 kfree(kvm->vioapic);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800320 kvm_free_vcpus(kvm);
321 kvm_free_physmem(kvm);
322 kfree(kvm);
Avi Kivityf17abe92007-02-21 19:28:04 +0200323}
324
325static int kvm_vm_release(struct inode *inode, struct file *filp)
326{
327 struct kvm *kvm = filp->private_data;
328
329 kvm_destroy_vm(kvm);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800330 return 0;
331}
332
333static void inject_gp(struct kvm_vcpu *vcpu)
334{
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +0300335 kvm_x86_ops->inject_gp(vcpu, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800336}
337
Avi Kivity6aa8b732006-12-10 02:21:36 -0800338void fx_init(struct kvm_vcpu *vcpu)
339{
Rusty Russellb114b082007-07-30 21:13:43 +1000340 unsigned after_mxcsr_mask;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800341
Rusty Russell9bd015062007-07-30 16:29:56 +1000342 /* Initialize guest FPU by resetting ours and saving into guest's */
343 preempt_disable();
Rusty Russellb114b082007-07-30 21:13:43 +1000344 fx_save(&vcpu->host_fx_image);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800345 fpu_init();
Rusty Russellb114b082007-07-30 21:13:43 +1000346 fx_save(&vcpu->guest_fx_image);
347 fx_restore(&vcpu->host_fx_image);
Rusty Russell9bd015062007-07-30 16:29:56 +1000348 preempt_enable();
Avi Kivity6aa8b732006-12-10 02:21:36 -0800349
Amit Shah380102c2007-08-25 11:35:52 +0300350 vcpu->cr0 |= X86_CR0_ET;
Rusty Russellb114b082007-07-30 21:13:43 +1000351 after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
352 vcpu->guest_fx_image.mxcsr = 0x1f80;
353 memset((void *)&vcpu->guest_fx_image + after_mxcsr_mask,
354 0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800355}
356EXPORT_SYMBOL_GPL(fx_init);
357
358/*
Avi Kivity6aa8b732006-12-10 02:21:36 -0800359 * Allocate some memory and give it an address in the guest physical address
360 * space.
361 *
362 * Discontiguous memory is allowed, mostly for framebuffers.
Sheng Yangf78e0e22007-10-29 09:40:42 +0800363 *
364 * Must be called holding kvm->lock.
Avi Kivity6aa8b732006-12-10 02:21:36 -0800365 */
Sheng Yangf78e0e22007-10-29 09:40:42 +0800366int __kvm_set_memory_region(struct kvm *kvm,
367 struct kvm_userspace_memory_region *mem,
368 int user_alloc)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800369{
370 int r;
371 gfn_t base_gfn;
372 unsigned long npages;
373 unsigned long i;
374 struct kvm_memory_slot *memslot;
375 struct kvm_memory_slot old, new;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800376
377 r = -EINVAL;
378 /* General sanity checks */
379 if (mem->memory_size & (PAGE_SIZE - 1))
380 goto out;
381 if (mem->guest_phys_addr & (PAGE_SIZE - 1))
382 goto out;
Izik Eiduse0d62c72007-10-24 23:57:46 +0200383 if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800384 goto out;
385 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
386 goto out;
387
388 memslot = &kvm->memslots[mem->slot];
389 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
390 npages = mem->memory_size >> PAGE_SHIFT;
391
392 if (!npages)
393 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
394
Avi Kivity6aa8b732006-12-10 02:21:36 -0800395 new = old = *memslot;
396
397 new.base_gfn = base_gfn;
398 new.npages = npages;
399 new.flags = mem->flags;
400
401 /* Disallow changing a memory slot's size. */
402 r = -EINVAL;
403 if (npages && old.npages && npages != old.npages)
Sheng Yangf78e0e22007-10-29 09:40:42 +0800404 goto out_free;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800405
406 /* Check for overlaps */
407 r = -EEXIST;
408 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
409 struct kvm_memory_slot *s = &kvm->memslots[i];
410
411 if (s == memslot)
412 continue;
413 if (!((base_gfn + npages <= s->base_gfn) ||
414 (base_gfn >= s->base_gfn + s->npages)))
Sheng Yangf78e0e22007-10-29 09:40:42 +0800415 goto out_free;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800416 }
Avi Kivity6aa8b732006-12-10 02:21:36 -0800417
Avi Kivity6aa8b732006-12-10 02:21:36 -0800418 /* Free page dirty bitmap if unneeded */
419 if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
Al Viro8b6d44c2007-02-09 16:38:40 +0000420 new.dirty_bitmap = NULL;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800421
422 r = -ENOMEM;
423
424 /* Allocate if a slot is being created */
Anthony Liguori8d4e1282007-10-18 09:59:34 -0500425 if (npages && !new.rmap) {
Mike Dayd77c26f2007-10-08 09:02:08 -0400426 new.rmap = vmalloc(npages * sizeof(struct page *));
Izik Eidus290fc382007-09-27 14:11:22 +0200427
428 if (!new.rmap)
Sheng Yangf78e0e22007-10-29 09:40:42 +0800429 goto out_free;
Izik Eidus290fc382007-09-27 14:11:22 +0200430
Izik Eidus290fc382007-09-27 14:11:22 +0200431 memset(new.rmap, 0, npages * sizeof(*new.rmap));
Anthony Liguori8d4e1282007-10-18 09:59:34 -0500432
Izik Eidus80b14b52007-10-25 11:54:04 +0200433 new.user_alloc = user_alloc;
Anthony Liguori8d4e1282007-10-18 09:59:34 -0500434 if (user_alloc)
Izik Eidus8a7ae052007-10-18 11:09:33 +0200435 new.userspace_addr = mem->userspace_addr;
Anthony Liguori8d4e1282007-10-18 09:59:34 -0500436 else {
437 down_write(&current->mm->mmap_sem);
438 new.userspace_addr = do_mmap(NULL, 0,
439 npages * PAGE_SIZE,
440 PROT_READ | PROT_WRITE,
441 MAP_SHARED | MAP_ANONYMOUS,
442 0);
443 up_write(&current->mm->mmap_sem);
444
445 if (IS_ERR((void *)new.userspace_addr))
Sheng Yangf78e0e22007-10-29 09:40:42 +0800446 goto out_free;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800447 }
Izik Eidus80b14b52007-10-25 11:54:04 +0200448 } else {
449 if (!old.user_alloc && old.rmap) {
450 int ret;
451
452 down_write(&current->mm->mmap_sem);
453 ret = do_munmap(current->mm, old.userspace_addr,
454 old.npages * PAGE_SIZE);
455 up_write(&current->mm->mmap_sem);
456 if (ret < 0)
457 printk(KERN_WARNING
458 "kvm_vm_ioctl_set_memory_region: "
459 "failed to munmap memory\n");
460 }
Avi Kivity6aa8b732006-12-10 02:21:36 -0800461 }
462
463 /* Allocate page dirty bitmap if needed */
464 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
465 unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8;
466
467 new.dirty_bitmap = vmalloc(dirty_bytes);
468 if (!new.dirty_bitmap)
Sheng Yangf78e0e22007-10-29 09:40:42 +0800469 goto out_free;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800470 memset(new.dirty_bitmap, 0, dirty_bytes);
471 }
472
Avi Kivity6aa8b732006-12-10 02:21:36 -0800473 if (mem->slot >= kvm->nmemslots)
474 kvm->nmemslots = mem->slot + 1;
475
Izik Eidus82ce2c92007-10-02 18:52:55 +0200476 if (!kvm->n_requested_mmu_pages) {
477 unsigned int n_pages;
478
479 if (npages) {
480 n_pages = npages * KVM_PERMILLE_MMU_PAGES / 1000;
481 kvm_mmu_change_mmu_pages(kvm, kvm->n_alloc_mmu_pages +
482 n_pages);
483 } else {
484 unsigned int nr_mmu_pages;
485
486 n_pages = old.npages * KVM_PERMILLE_MMU_PAGES / 1000;
487 nr_mmu_pages = kvm->n_alloc_mmu_pages - n_pages;
488 nr_mmu_pages = max(nr_mmu_pages,
489 (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
490 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
491 }
492 }
493
Avi Kivity6aa8b732006-12-10 02:21:36 -0800494 *memslot = new;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800495
Avi Kivity90cb0522007-07-17 13:04:56 +0300496 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
497 kvm_flush_remote_tlbs(kvm);
498
Avi Kivity6aa8b732006-12-10 02:21:36 -0800499 kvm_free_physmem_slot(&old, &new);
500 return 0;
501
Sheng Yangf78e0e22007-10-29 09:40:42 +0800502out_free:
Avi Kivity6aa8b732006-12-10 02:21:36 -0800503 kvm_free_physmem_slot(&new, &old);
504out:
505 return r;
Izik Eidus210c7c42007-10-24 23:52:57 +0200506
507}
Sheng Yangf78e0e22007-10-29 09:40:42 +0800508EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
509
510int kvm_set_memory_region(struct kvm *kvm,
511 struct kvm_userspace_memory_region *mem,
512 int user_alloc)
513{
514 int r;
515
516 mutex_lock(&kvm->lock);
517 r = __kvm_set_memory_region(kvm, mem, user_alloc);
518 mutex_unlock(&kvm->lock);
519 return r;
520}
Izik Eidus210c7c42007-10-24 23:52:57 +0200521EXPORT_SYMBOL_GPL(kvm_set_memory_region);
522
Carsten Otte1fe779f2007-10-29 16:08:35 +0100523int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
524 struct
525 kvm_userspace_memory_region *mem,
526 int user_alloc)
Izik Eidus210c7c42007-10-24 23:52:57 +0200527{
Izik Eiduse0d62c72007-10-24 23:57:46 +0200528 if (mem->slot >= KVM_MEMORY_SLOTS)
529 return -EINVAL;
Izik Eidus210c7c42007-10-24 23:52:57 +0200530 return kvm_set_memory_region(kvm, mem, user_alloc);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800531}
532
533/*
534 * Get (and clear) the dirty memory log for a memory slot.
535 */
Avi Kivity2c6f5df2007-02-20 18:27:58 +0200536static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
537 struct kvm_dirty_log *log)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800538{
539 struct kvm_memory_slot *memslot;
540 int r, i;
541 int n;
542 unsigned long any = 0;
543
Shaohua Li11ec2802007-07-23 14:51:37 +0800544 mutex_lock(&kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800545
Avi Kivity6aa8b732006-12-10 02:21:36 -0800546 r = -EINVAL;
547 if (log->slot >= KVM_MEMORY_SLOTS)
548 goto out;
549
550 memslot = &kvm->memslots[log->slot];
551 r = -ENOENT;
552 if (!memslot->dirty_bitmap)
553 goto out;
554
Uri Lublincd1a4a92007-02-22 16:43:09 +0200555 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800556
Uri Lublincd1a4a92007-02-22 16:43:09 +0200557 for (i = 0; !any && i < n/sizeof(long); ++i)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800558 any = memslot->dirty_bitmap[i];
559
560 r = -EFAULT;
561 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
562 goto out;
563
Rusty Russell39214912007-07-31 19:57:47 +1000564 /* If nothing is dirty, don't bother messing with page tables. */
565 if (any) {
Rusty Russell39214912007-07-31 19:57:47 +1000566 kvm_mmu_slot_remove_write_access(kvm, log->slot);
567 kvm_flush_remote_tlbs(kvm);
568 memset(memslot->dirty_bitmap, 0, n);
Rusty Russell39214912007-07-31 19:57:47 +1000569 }
Avi Kivity6aa8b732006-12-10 02:21:36 -0800570
571 r = 0;
572
573out:
Shaohua Li11ec2802007-07-23 14:51:37 +0800574 mutex_unlock(&kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800575 return r;
576}
577
Izik Eiduscea7bb22007-10-17 19:17:48 +0200578int is_error_page(struct page *page)
579{
580 return page == bad_page;
581}
582EXPORT_SYMBOL_GPL(is_error_page);
583
Izik Eidus290fc382007-09-27 14:11:22 +0200584gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
Avi Kivitye8207542007-03-30 16:54:30 +0300585{
586 int i;
587 struct kvm_mem_alias *alias;
588
589 for (i = 0; i < kvm->naliases; ++i) {
590 alias = &kvm->aliases[i];
591 if (gfn >= alias->base_gfn
592 && gfn < alias->base_gfn + alias->npages)
593 return alias->target_gfn + gfn - alias->base_gfn;
594 }
595 return gfn;
596}
597
598static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800599{
600 int i;
601
602 for (i = 0; i < kvm->nmemslots; ++i) {
603 struct kvm_memory_slot *memslot = &kvm->memslots[i];
604
605 if (gfn >= memslot->base_gfn
606 && gfn < memslot->base_gfn + memslot->npages)
607 return memslot;
608 }
Al Viro8b6d44c2007-02-09 16:38:40 +0000609 return NULL;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800610}
Avi Kivitye8207542007-03-30 16:54:30 +0300611
612struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
613{
614 gfn = unalias_gfn(kvm, gfn);
615 return __gfn_to_memslot(kvm, gfn);
616}
Avi Kivity6aa8b732006-12-10 02:21:36 -0800617
Izik Eiduse0d62c72007-10-24 23:57:46 +0200618int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
619{
620 int i;
621
622 gfn = unalias_gfn(kvm, gfn);
623 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
624 struct kvm_memory_slot *memslot = &kvm->memslots[i];
625
626 if (gfn >= memslot->base_gfn
627 && gfn < memslot->base_gfn + memslot->npages)
628 return 1;
629 }
630 return 0;
631}
632EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
633
Anthony Liguoriaab61cc2007-10-29 15:15:20 -0500634/*
635 * Requires current->mm->mmap_sem to be held
636 */
637static struct page *__gfn_to_page(struct kvm *kvm, gfn_t gfn)
Avi Kivity954bbbc2007-03-30 14:02:32 +0300638{
639 struct kvm_memory_slot *slot;
Anthony Liguori8d4e1282007-10-18 09:59:34 -0500640 struct page *page[1];
641 int npages;
Avi Kivity954bbbc2007-03-30 14:02:32 +0300642
Avi Kivity60395222007-10-21 11:03:36 +0200643 might_sleep();
644
Avi Kivitye8207542007-03-30 16:54:30 +0300645 gfn = unalias_gfn(kvm, gfn);
646 slot = __gfn_to_memslot(kvm, gfn);
Izik Eidus8a7ae052007-10-18 11:09:33 +0200647 if (!slot) {
648 get_page(bad_page);
Izik Eiduscea7bb22007-10-17 19:17:48 +0200649 return bad_page;
Izik Eidus8a7ae052007-10-18 11:09:33 +0200650 }
Izik Eidus8a7ae052007-10-18 11:09:33 +0200651
Anthony Liguori8d4e1282007-10-18 09:59:34 -0500652 npages = get_user_pages(current, current->mm,
653 slot->userspace_addr
654 + (gfn - slot->base_gfn) * PAGE_SIZE, 1,
655 1, 1, page, NULL);
Anthony Liguori8d4e1282007-10-18 09:59:34 -0500656 if (npages != 1) {
657 get_page(bad_page);
658 return bad_page;
Izik Eidus8a7ae052007-10-18 11:09:33 +0200659 }
Anthony Liguori8d4e1282007-10-18 09:59:34 -0500660
661 return page[0];
Avi Kivity954bbbc2007-03-30 14:02:32 +0300662}
Anthony Liguoriaab61cc2007-10-29 15:15:20 -0500663
664struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
665{
666 struct page *page;
667
668 down_read(&current->mm->mmap_sem);
669 page = __gfn_to_page(kvm, gfn);
670 up_read(&current->mm->mmap_sem);
671
672 return page;
673}
674
Avi Kivity954bbbc2007-03-30 14:02:32 +0300675EXPORT_SYMBOL_GPL(gfn_to_page);
676
Izik Eidus8a7ae052007-10-18 11:09:33 +0200677void kvm_release_page(struct page *page)
678{
679 if (!PageReserved(page))
680 SetPageDirty(page);
681 put_page(page);
682}
683EXPORT_SYMBOL_GPL(kvm_release_page);
684
Izik Eidus195aefd2007-10-01 22:14:18 +0200685static int next_segment(unsigned long len, int offset)
686{
687 if (len > PAGE_SIZE - offset)
688 return PAGE_SIZE - offset;
689 else
690 return len;
691}
692
693int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
694 int len)
695{
696 void *page_virt;
697 struct page *page;
698
699 page = gfn_to_page(kvm, gfn);
Izik Eidus8a7ae052007-10-18 11:09:33 +0200700 if (is_error_page(page)) {
701 kvm_release_page(page);
Izik Eidus195aefd2007-10-01 22:14:18 +0200702 return -EFAULT;
Izik Eidus8a7ae052007-10-18 11:09:33 +0200703 }
Izik Eidus195aefd2007-10-01 22:14:18 +0200704 page_virt = kmap_atomic(page, KM_USER0);
705
706 memcpy(data, page_virt + offset, len);
707
708 kunmap_atomic(page_virt, KM_USER0);
Izik Eidus8a7ae052007-10-18 11:09:33 +0200709 kvm_release_page(page);
Izik Eidus195aefd2007-10-01 22:14:18 +0200710 return 0;
711}
712EXPORT_SYMBOL_GPL(kvm_read_guest_page);
713
714int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len)
715{
716 gfn_t gfn = gpa >> PAGE_SHIFT;
717 int seg;
718 int offset = offset_in_page(gpa);
719 int ret;
720
721 while ((seg = next_segment(len, offset)) != 0) {
722 ret = kvm_read_guest_page(kvm, gfn, data, offset, seg);
723 if (ret < 0)
724 return ret;
725 offset = 0;
726 len -= seg;
727 data += seg;
728 ++gfn;
729 }
730 return 0;
731}
732EXPORT_SYMBOL_GPL(kvm_read_guest);
733
734int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
735 int offset, int len)
736{
737 void *page_virt;
738 struct page *page;
739
740 page = gfn_to_page(kvm, gfn);
Izik Eidus8a7ae052007-10-18 11:09:33 +0200741 if (is_error_page(page)) {
742 kvm_release_page(page);
Izik Eidus195aefd2007-10-01 22:14:18 +0200743 return -EFAULT;
Izik Eidus8a7ae052007-10-18 11:09:33 +0200744 }
Izik Eidus195aefd2007-10-01 22:14:18 +0200745 page_virt = kmap_atomic(page, KM_USER0);
746
747 memcpy(page_virt + offset, data, len);
748
749 kunmap_atomic(page_virt, KM_USER0);
750 mark_page_dirty(kvm, gfn);
Izik Eidus8a7ae052007-10-18 11:09:33 +0200751 kvm_release_page(page);
Izik Eidus195aefd2007-10-01 22:14:18 +0200752 return 0;
753}
754EXPORT_SYMBOL_GPL(kvm_write_guest_page);
755
756int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
757 unsigned long len)
758{
759 gfn_t gfn = gpa >> PAGE_SHIFT;
760 int seg;
761 int offset = offset_in_page(gpa);
762 int ret;
763
764 while ((seg = next_segment(len, offset)) != 0) {
765 ret = kvm_write_guest_page(kvm, gfn, data, offset, seg);
766 if (ret < 0)
767 return ret;
768 offset = 0;
769 len -= seg;
770 data += seg;
771 ++gfn;
772 }
773 return 0;
774}
775
776int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
777{
778 void *page_virt;
779 struct page *page;
780
781 page = gfn_to_page(kvm, gfn);
Izik Eidus8a7ae052007-10-18 11:09:33 +0200782 if (is_error_page(page)) {
783 kvm_release_page(page);
Izik Eidus195aefd2007-10-01 22:14:18 +0200784 return -EFAULT;
Izik Eidus8a7ae052007-10-18 11:09:33 +0200785 }
Izik Eidus195aefd2007-10-01 22:14:18 +0200786 page_virt = kmap_atomic(page, KM_USER0);
787
788 memset(page_virt + offset, 0, len);
789
790 kunmap_atomic(page_virt, KM_USER0);
Izik Eidus8a7ae052007-10-18 11:09:33 +0200791 kvm_release_page(page);
Izik Eidus195aefd2007-10-01 22:14:18 +0200792 return 0;
793}
794EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
795
796int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
797{
798 gfn_t gfn = gpa >> PAGE_SHIFT;
799 int seg;
800 int offset = offset_in_page(gpa);
801 int ret;
802
803 while ((seg = next_segment(len, offset)) != 0) {
804 ret = kvm_clear_guest_page(kvm, gfn, offset, seg);
805 if (ret < 0)
806 return ret;
807 offset = 0;
808 len -= seg;
809 ++gfn;
810 }
811 return 0;
812}
813EXPORT_SYMBOL_GPL(kvm_clear_guest);
814
Rusty Russell7e9d6192007-07-31 20:41:14 +1000815/* WARNING: Does not work on aliased pages. */
Avi Kivity6aa8b732006-12-10 02:21:36 -0800816void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
817{
Nguyen Anh Quynh31389942007-06-05 10:35:19 +0300818 struct kvm_memory_slot *memslot;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800819
Rusty Russell7e9d6192007-07-31 20:41:14 +1000820 memslot = __gfn_to_memslot(kvm, gfn);
821 if (memslot && memslot->dirty_bitmap) {
822 unsigned long rel_gfn = gfn - memslot->base_gfn;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800823
Rusty Russell7e9d6192007-07-31 20:41:14 +1000824 /* avoid RMW */
825 if (!test_bit(rel_gfn, memslot->dirty_bitmap))
826 set_bit(rel_gfn, memslot->dirty_bitmap);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800827 }
828}
829
Eddie Dong74906342007-06-19 18:05:03 +0300830static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
831 gpa_t addr)
832{
833 return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr);
834}
835
Eddie Dongb6958ce2007-07-18 12:15:21 +0300836/*
837 * The vCPU has executed a HLT instruction with in-kernel mode enabled.
838 */
He, Qingc5ec1532007-09-03 17:07:41 +0300839static void kvm_vcpu_block(struct kvm_vcpu *vcpu)
Eddie Dongb6958ce2007-07-18 12:15:21 +0300840{
841 DECLARE_WAITQUEUE(wait, current);
842
843 add_wait_queue(&vcpu->wq, &wait);
844
845 /*
846 * We will block until either an interrupt or a signal wakes us up
847 */
He, Qingc5ec1532007-09-03 17:07:41 +0300848 while (!kvm_cpu_has_interrupt(vcpu)
849 && !signal_pending(current)
850 && vcpu->mp_state != VCPU_MP_STATE_RUNNABLE
851 && vcpu->mp_state != VCPU_MP_STATE_SIPI_RECEIVED) {
Eddie Dongb6958ce2007-07-18 12:15:21 +0300852 set_current_state(TASK_INTERRUPTIBLE);
853 vcpu_put(vcpu);
854 schedule();
855 vcpu_load(vcpu);
856 }
857
He, Qingc5ec1532007-09-03 17:07:41 +0300858 __set_current_state(TASK_RUNNING);
Eddie Dongb6958ce2007-07-18 12:15:21 +0300859 remove_wait_queue(&vcpu->wq, &wait);
Eddie Dongb6958ce2007-07-18 12:15:21 +0300860}
861
Avi Kivityd3bef152007-06-05 15:53:05 +0300862int kvm_emulate_halt(struct kvm_vcpu *vcpu)
863{
Avi Kivityd3bef152007-06-05 15:53:05 +0300864 ++vcpu->stat.halt_exits;
Eddie Dongb6958ce2007-07-18 12:15:21 +0300865 if (irqchip_in_kernel(vcpu->kvm)) {
He, Qingc5ec1532007-09-03 17:07:41 +0300866 vcpu->mp_state = VCPU_MP_STATE_HALTED;
867 kvm_vcpu_block(vcpu);
868 if (vcpu->mp_state != VCPU_MP_STATE_RUNNABLE)
869 return -EINTR;
Eddie Dongb6958ce2007-07-18 12:15:21 +0300870 return 1;
871 } else {
872 vcpu->run->exit_reason = KVM_EXIT_HLT;
873 return 0;
874 }
Avi Kivityd3bef152007-06-05 15:53:05 +0300875}
876EXPORT_SYMBOL_GPL(kvm_emulate_halt);
877
Anthony Liguori7aa81cc2007-09-17 14:57:50 -0500878int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
Avi Kivity270fd9b2007-02-19 14:37:47 +0200879{
Anthony Liguori7aa81cc2007-09-17 14:57:50 -0500880 unsigned long nr, a0, a1, a2, a3, ret;
Avi Kivity270fd9b2007-02-19 14:37:47 +0200881
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +0300882 kvm_x86_ops->cache_regs(vcpu);
Anthony Liguori7aa81cc2007-09-17 14:57:50 -0500883
884 nr = vcpu->regs[VCPU_REGS_RAX];
885 a0 = vcpu->regs[VCPU_REGS_RBX];
886 a1 = vcpu->regs[VCPU_REGS_RCX];
887 a2 = vcpu->regs[VCPU_REGS_RDX];
888 a3 = vcpu->regs[VCPU_REGS_RSI];
889
890 if (!is_long_mode(vcpu)) {
891 nr &= 0xFFFFFFFF;
892 a0 &= 0xFFFFFFFF;
893 a1 &= 0xFFFFFFFF;
894 a2 &= 0xFFFFFFFF;
895 a3 &= 0xFFFFFFFF;
Avi Kivity270fd9b2007-02-19 14:37:47 +0200896 }
Anthony Liguori7aa81cc2007-09-17 14:57:50 -0500897
Avi Kivity270fd9b2007-02-19 14:37:47 +0200898 switch (nr) {
899 default:
Anthony Liguori7aa81cc2007-09-17 14:57:50 -0500900 ret = -KVM_ENOSYS;
901 break;
Avi Kivity270fd9b2007-02-19 14:37:47 +0200902 }
903 vcpu->regs[VCPU_REGS_RAX] = ret;
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +0300904 kvm_x86_ops->decache_regs(vcpu);
Anthony Liguori7aa81cc2007-09-17 14:57:50 -0500905 return 0;
Avi Kivity270fd9b2007-02-19 14:37:47 +0200906}
Anthony Liguori7aa81cc2007-09-17 14:57:50 -0500907EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
908
909int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
910{
911 char instruction[3];
912 int ret = 0;
913
914 mutex_lock(&vcpu->kvm->lock);
915
916 /*
917 * Blow out the MMU to ensure that no other VCPU has an active mapping
918 * to ensure that the updated hypercall appears atomically across all
919 * VCPUs.
920 */
921 kvm_mmu_zap_all(vcpu->kvm);
922
923 kvm_x86_ops->cache_regs(vcpu);
924 kvm_x86_ops->patch_hypercall(vcpu, instruction);
925 if (emulator_write_emulated(vcpu->rip, instruction, 3, vcpu)
926 != X86EMUL_CONTINUE)
927 ret = -EFAULT;
928
929 mutex_unlock(&vcpu->kvm->lock);
930
931 return ret;
932}
Avi Kivity270fd9b2007-02-19 14:37:47 +0200933
Avi Kivity6aa8b732006-12-10 02:21:36 -0800934static u64 mk_cr_64(u64 curr_cr, u32 new_val)
935{
936 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
937}
938
939void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
940{
941 struct descriptor_table dt = { limit, base };
942
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +0300943 kvm_x86_ops->set_gdt(vcpu, &dt);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800944}
945
946void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
947{
948 struct descriptor_table dt = { limit, base };
949
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +0300950 kvm_x86_ops->set_idt(vcpu, &dt);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800951}
952
953void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
954 unsigned long *rflags)
955{
956 lmsw(vcpu, msw);
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +0300957 *rflags = kvm_x86_ops->get_rflags(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800958}
959
960unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
961{
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +0300962 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800963 switch (cr) {
964 case 0:
965 return vcpu->cr0;
966 case 2:
967 return vcpu->cr2;
968 case 3:
969 return vcpu->cr3;
970 case 4:
971 return vcpu->cr4;
972 default:
973 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
974 return 0;
975 }
976}
977
978void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
979 unsigned long *rflags)
980{
981 switch (cr) {
982 case 0:
983 set_cr0(vcpu, mk_cr_64(vcpu->cr0, val));
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +0300984 *rflags = kvm_x86_ops->get_rflags(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800985 break;
986 case 2:
987 vcpu->cr2 = val;
988 break;
989 case 3:
990 set_cr3(vcpu, val);
991 break;
992 case 4:
993 set_cr4(vcpu, mk_cr_64(vcpu->cr4, val));
994 break;
995 default:
996 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
997 }
998}
999
Avi Kivity6aa8b732006-12-10 02:21:36 -08001000void kvm_resched(struct kvm_vcpu *vcpu)
1001{
Yaozu Dong3fca0362007-04-25 16:49:19 +03001002 if (!need_resched())
1003 return;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001004 cond_resched();
Avi Kivity6aa8b732006-12-10 02:21:36 -08001005}
1006EXPORT_SYMBOL_GPL(kvm_resched);
1007
Avi Kivity06465c52007-02-28 20:46:53 +02001008void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
1009{
1010 int i;
1011 u32 function;
1012 struct kvm_cpuid_entry *e, *best;
1013
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001014 kvm_x86_ops->cache_regs(vcpu);
Avi Kivity06465c52007-02-28 20:46:53 +02001015 function = vcpu->regs[VCPU_REGS_RAX];
1016 vcpu->regs[VCPU_REGS_RAX] = 0;
1017 vcpu->regs[VCPU_REGS_RBX] = 0;
1018 vcpu->regs[VCPU_REGS_RCX] = 0;
1019 vcpu->regs[VCPU_REGS_RDX] = 0;
1020 best = NULL;
1021 for (i = 0; i < vcpu->cpuid_nent; ++i) {
1022 e = &vcpu->cpuid_entries[i];
1023 if (e->function == function) {
1024 best = e;
1025 break;
1026 }
1027 /*
1028 * Both basic or both extended?
1029 */
1030 if (((e->function ^ function) & 0x80000000) == 0)
1031 if (!best || e->function > best->function)
1032 best = e;
1033 }
1034 if (best) {
1035 vcpu->regs[VCPU_REGS_RAX] = best->eax;
1036 vcpu->regs[VCPU_REGS_RBX] = best->ebx;
1037 vcpu->regs[VCPU_REGS_RCX] = best->ecx;
1038 vcpu->regs[VCPU_REGS_RDX] = best->edx;
1039 }
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001040 kvm_x86_ops->decache_regs(vcpu);
1041 kvm_x86_ops->skip_emulated_instruction(vcpu);
Avi Kivity06465c52007-02-28 20:46:53 +02001042}
1043EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
1044
Avi Kivity039576c2007-03-20 12:46:50 +02001045static int pio_copy_data(struct kvm_vcpu *vcpu)
Avi Kivity46fc1472007-02-22 19:39:30 +02001046{
Avi Kivity039576c2007-03-20 12:46:50 +02001047 void *p = vcpu->pio_data;
1048 void *q;
1049 unsigned bytes;
1050 int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;
1051
Avi Kivity039576c2007-03-20 12:46:50 +02001052 q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
1053 PAGE_KERNEL);
1054 if (!q) {
Avi Kivity039576c2007-03-20 12:46:50 +02001055 free_pio_guest_pages(vcpu);
1056 return -ENOMEM;
1057 }
1058 q += vcpu->pio.guest_page_offset;
1059 bytes = vcpu->pio.size * vcpu->pio.cur_count;
1060 if (vcpu->pio.in)
1061 memcpy(q, p, bytes);
1062 else
1063 memcpy(p, q, bytes);
1064 q -= vcpu->pio.guest_page_offset;
1065 vunmap(q);
Avi Kivity039576c2007-03-20 12:46:50 +02001066 free_pio_guest_pages(vcpu);
1067 return 0;
1068}
1069
1070static int complete_pio(struct kvm_vcpu *vcpu)
1071{
1072 struct kvm_pio_request *io = &vcpu->pio;
Avi Kivity46fc1472007-02-22 19:39:30 +02001073 long delta;
Avi Kivity039576c2007-03-20 12:46:50 +02001074 int r;
Avi Kivity46fc1472007-02-22 19:39:30 +02001075
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001076 kvm_x86_ops->cache_regs(vcpu);
Avi Kivity46fc1472007-02-22 19:39:30 +02001077
1078 if (!io->string) {
Avi Kivity039576c2007-03-20 12:46:50 +02001079 if (io->in)
1080 memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data,
Avi Kivity46fc1472007-02-22 19:39:30 +02001081 io->size);
1082 } else {
Avi Kivity039576c2007-03-20 12:46:50 +02001083 if (io->in) {
1084 r = pio_copy_data(vcpu);
1085 if (r) {
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001086 kvm_x86_ops->cache_regs(vcpu);
Avi Kivity039576c2007-03-20 12:46:50 +02001087 return r;
1088 }
1089 }
1090
Avi Kivity46fc1472007-02-22 19:39:30 +02001091 delta = 1;
1092 if (io->rep) {
Avi Kivity039576c2007-03-20 12:46:50 +02001093 delta *= io->cur_count;
Avi Kivity46fc1472007-02-22 19:39:30 +02001094 /*
1095 * The size of the register should really depend on
1096 * current address size.
1097 */
1098 vcpu->regs[VCPU_REGS_RCX] -= delta;
1099 }
Avi Kivity039576c2007-03-20 12:46:50 +02001100 if (io->down)
Avi Kivity46fc1472007-02-22 19:39:30 +02001101 delta = -delta;
1102 delta *= io->size;
Avi Kivity039576c2007-03-20 12:46:50 +02001103 if (io->in)
Avi Kivity46fc1472007-02-22 19:39:30 +02001104 vcpu->regs[VCPU_REGS_RDI] += delta;
1105 else
1106 vcpu->regs[VCPU_REGS_RSI] += delta;
1107 }
1108
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001109 kvm_x86_ops->decache_regs(vcpu);
Avi Kivity46fc1472007-02-22 19:39:30 +02001110
Avi Kivity039576c2007-03-20 12:46:50 +02001111 io->count -= io->cur_count;
1112 io->cur_count = 0;
1113
Avi Kivity039576c2007-03-20 12:46:50 +02001114 return 0;
Avi Kivity46fc1472007-02-22 19:39:30 +02001115}
1116
Eddie Dong65619eb2007-07-17 11:52:33 +03001117static void kernel_pio(struct kvm_io_device *pio_dev,
1118 struct kvm_vcpu *vcpu,
1119 void *pd)
Eddie Dong74906342007-06-19 18:05:03 +03001120{
1121 /* TODO: String I/O for in kernel device */
1122
Eddie Dong9cf98822007-07-22 10:36:31 +03001123 mutex_lock(&vcpu->kvm->lock);
Eddie Dong74906342007-06-19 18:05:03 +03001124 if (vcpu->pio.in)
1125 kvm_iodevice_read(pio_dev, vcpu->pio.port,
1126 vcpu->pio.size,
Eddie Dong65619eb2007-07-17 11:52:33 +03001127 pd);
Eddie Dong74906342007-06-19 18:05:03 +03001128 else
1129 kvm_iodevice_write(pio_dev, vcpu->pio.port,
1130 vcpu->pio.size,
Eddie Dong65619eb2007-07-17 11:52:33 +03001131 pd);
Eddie Dong9cf98822007-07-22 10:36:31 +03001132 mutex_unlock(&vcpu->kvm->lock);
Eddie Dong65619eb2007-07-17 11:52:33 +03001133}
1134
1135static void pio_string_write(struct kvm_io_device *pio_dev,
1136 struct kvm_vcpu *vcpu)
1137{
1138 struct kvm_pio_request *io = &vcpu->pio;
1139 void *pd = vcpu->pio_data;
1140 int i;
1141
Eddie Dong9cf98822007-07-22 10:36:31 +03001142 mutex_lock(&vcpu->kvm->lock);
Eddie Dong65619eb2007-07-17 11:52:33 +03001143 for (i = 0; i < io->cur_count; i++) {
1144 kvm_iodevice_write(pio_dev, io->port,
1145 io->size,
1146 pd);
1147 pd += io->size;
1148 }
Eddie Dong9cf98822007-07-22 10:36:31 +03001149 mutex_unlock(&vcpu->kvm->lock);
Eddie Dong74906342007-06-19 18:05:03 +03001150}
1151
Mike Dayd77c26f2007-10-08 09:02:08 -04001152int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
Laurent Vivier3090dd72007-08-05 10:43:32 +03001153 int size, unsigned port)
1154{
1155 struct kvm_io_device *pio_dev;
1156
1157 vcpu->run->exit_reason = KVM_EXIT_IO;
1158 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
1159 vcpu->run->io.size = vcpu->pio.size = size;
1160 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
1161 vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = 1;
1162 vcpu->run->io.port = vcpu->pio.port = port;
1163 vcpu->pio.in = in;
1164 vcpu->pio.string = 0;
1165 vcpu->pio.down = 0;
1166 vcpu->pio.guest_page_offset = 0;
1167 vcpu->pio.rep = 0;
1168
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001169 kvm_x86_ops->cache_regs(vcpu);
Laurent Vivier3090dd72007-08-05 10:43:32 +03001170 memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4);
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001171 kvm_x86_ops->decache_regs(vcpu);
Laurent Vivier3090dd72007-08-05 10:43:32 +03001172
Avi Kivity0967b7b2007-09-15 17:34:36 +03001173 kvm_x86_ops->skip_emulated_instruction(vcpu);
1174
Laurent Vivier3090dd72007-08-05 10:43:32 +03001175 pio_dev = vcpu_find_pio_dev(vcpu, port);
1176 if (pio_dev) {
1177 kernel_pio(pio_dev, vcpu, vcpu->pio_data);
1178 complete_pio(vcpu);
1179 return 1;
1180 }
1181 return 0;
1182}
1183EXPORT_SYMBOL_GPL(kvm_emulate_pio);
1184
1185int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
1186 int size, unsigned long count, int down,
Avi Kivity039576c2007-03-20 12:46:50 +02001187 gva_t address, int rep, unsigned port)
1188{
1189 unsigned now, in_page;
Eddie Dong65619eb2007-07-17 11:52:33 +03001190 int i, ret = 0;
Avi Kivity039576c2007-03-20 12:46:50 +02001191 int nr_pages = 1;
1192 struct page *page;
Eddie Dong74906342007-06-19 18:05:03 +03001193 struct kvm_io_device *pio_dev;
Avi Kivity039576c2007-03-20 12:46:50 +02001194
1195 vcpu->run->exit_reason = KVM_EXIT_IO;
1196 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
Laurent Vivier3090dd72007-08-05 10:43:32 +03001197 vcpu->run->io.size = vcpu->pio.size = size;
Avi Kivity039576c2007-03-20 12:46:50 +02001198 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
Laurent Vivier3090dd72007-08-05 10:43:32 +03001199 vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = count;
1200 vcpu->run->io.port = vcpu->pio.port = port;
Avi Kivity039576c2007-03-20 12:46:50 +02001201 vcpu->pio.in = in;
Laurent Vivier3090dd72007-08-05 10:43:32 +03001202 vcpu->pio.string = 1;
Avi Kivity039576c2007-03-20 12:46:50 +02001203 vcpu->pio.down = down;
1204 vcpu->pio.guest_page_offset = offset_in_page(address);
1205 vcpu->pio.rep = rep;
1206
Avi Kivity039576c2007-03-20 12:46:50 +02001207 if (!count) {
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001208 kvm_x86_ops->skip_emulated_instruction(vcpu);
Avi Kivity039576c2007-03-20 12:46:50 +02001209 return 1;
1210 }
1211
Avi Kivity039576c2007-03-20 12:46:50 +02001212 if (!down)
1213 in_page = PAGE_SIZE - offset_in_page(address);
1214 else
1215 in_page = offset_in_page(address) + size;
1216 now = min(count, (unsigned long)in_page / size);
1217 if (!now) {
1218 /*
1219 * String I/O straddles page boundary. Pin two guest pages
1220 * so that we satisfy atomicity constraints. Do just one
1221 * transaction to avoid complexity.
1222 */
1223 nr_pages = 2;
1224 now = 1;
1225 }
1226 if (down) {
1227 /*
1228 * String I/O in reverse. Yuck. Kill the guest, fix later.
1229 */
Rusty Russellf0242472007-08-01 10:48:02 +10001230 pr_unimpl(vcpu, "guest string pio down\n");
Avi Kivity039576c2007-03-20 12:46:50 +02001231 inject_gp(vcpu);
1232 return 1;
1233 }
1234 vcpu->run->io.count = now;
1235 vcpu->pio.cur_count = now;
1236
Avi Kivity0967b7b2007-09-15 17:34:36 +03001237 if (vcpu->pio.cur_count == vcpu->pio.count)
1238 kvm_x86_ops->skip_emulated_instruction(vcpu);
1239
Avi Kivity039576c2007-03-20 12:46:50 +02001240 for (i = 0; i < nr_pages; ++i) {
Shaohua Li11ec2802007-07-23 14:51:37 +08001241 mutex_lock(&vcpu->kvm->lock);
Avi Kivity039576c2007-03-20 12:46:50 +02001242 page = gva_to_page(vcpu, address + i * PAGE_SIZE);
Avi Kivity039576c2007-03-20 12:46:50 +02001243 vcpu->pio.guest_pages[i] = page;
Shaohua Li11ec2802007-07-23 14:51:37 +08001244 mutex_unlock(&vcpu->kvm->lock);
Avi Kivity039576c2007-03-20 12:46:50 +02001245 if (!page) {
1246 inject_gp(vcpu);
1247 free_pio_guest_pages(vcpu);
1248 return 1;
1249 }
1250 }
1251
Laurent Vivier3090dd72007-08-05 10:43:32 +03001252 pio_dev = vcpu_find_pio_dev(vcpu, port);
Eddie Dong65619eb2007-07-17 11:52:33 +03001253 if (!vcpu->pio.in) {
1254 /* string PIO write */
1255 ret = pio_copy_data(vcpu);
1256 if (ret >= 0 && pio_dev) {
1257 pio_string_write(pio_dev, vcpu);
1258 complete_pio(vcpu);
1259 if (vcpu->pio.count == 0)
1260 ret = 1;
1261 }
1262 } else if (pio_dev)
Rusty Russellf0242472007-08-01 10:48:02 +10001263 pr_unimpl(vcpu, "no string pio read support yet, "
Eddie Dong65619eb2007-07-17 11:52:33 +03001264 "port %x size %d count %ld\n",
1265 port, size, count);
1266
1267 return ret;
Avi Kivity039576c2007-03-20 12:46:50 +02001268}
Laurent Vivier3090dd72007-08-05 10:43:32 +03001269EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
Avi Kivity039576c2007-03-20 12:46:50 +02001270
Avi Kivity04d2cc72007-09-10 18:10:54 +03001271/*
1272 * Check if userspace requested an interrupt window, and that the
1273 * interrupt window is open.
1274 *
1275 * No need to exit to userspace if we already have an interrupt queued.
1276 */
1277static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
1278 struct kvm_run *kvm_run)
1279{
1280 return (!vcpu->irq_summary &&
1281 kvm_run->request_interrupt_window &&
1282 vcpu->interrupt_window_open &&
1283 (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
1284}
1285
1286static void post_kvm_run_save(struct kvm_vcpu *vcpu,
1287 struct kvm_run *kvm_run)
1288{
1289 kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
1290 kvm_run->cr8 = get_cr8(vcpu);
1291 kvm_run->apic_base = kvm_get_apic_base(vcpu);
1292 if (irqchip_in_kernel(vcpu->kvm))
1293 kvm_run->ready_for_interrupt_injection = 1;
1294 else
1295 kvm_run->ready_for_interrupt_injection =
1296 (vcpu->interrupt_window_open &&
1297 vcpu->irq_summary == 0);
1298}
1299
1300static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1301{
1302 int r;
1303
1304 if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) {
Mike Dayd77c26f2007-10-08 09:02:08 -04001305 pr_debug("vcpu %d received sipi with vector # %x\n",
Avi Kivity04d2cc72007-09-10 18:10:54 +03001306 vcpu->vcpu_id, vcpu->sipi_vector);
1307 kvm_lapic_reset(vcpu);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02001308 r = kvm_x86_ops->vcpu_reset(vcpu);
1309 if (r)
1310 return r;
Avi Kivity04d2cc72007-09-10 18:10:54 +03001311 vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
1312 }
1313
1314preempted:
1315 if (vcpu->guest_debug.enabled)
1316 kvm_x86_ops->guest_debug_pre(vcpu);
1317
1318again:
1319 r = kvm_mmu_reload(vcpu);
1320 if (unlikely(r))
1321 goto out;
1322
Avi Kivityab6ef342007-10-16 16:23:22 +02001323 kvm_inject_pending_timer_irqs(vcpu);
1324
Avi Kivity04d2cc72007-09-10 18:10:54 +03001325 preempt_disable();
1326
1327 kvm_x86_ops->prepare_guest_switch(vcpu);
1328 kvm_load_guest_fpu(vcpu);
1329
1330 local_irq_disable();
1331
1332 if (signal_pending(current)) {
1333 local_irq_enable();
1334 preempt_enable();
1335 r = -EINTR;
1336 kvm_run->exit_reason = KVM_EXIT_INTR;
1337 ++vcpu->stat.signal_exits;
1338 goto out;
1339 }
1340
1341 if (irqchip_in_kernel(vcpu->kvm))
1342 kvm_x86_ops->inject_pending_irq(vcpu);
1343 else if (!vcpu->mmio_read_completed)
1344 kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
1345
1346 vcpu->guest_mode = 1;
Laurent Vivierd172fcd2007-10-15 17:00:19 +02001347 kvm_guest_enter();
Avi Kivity04d2cc72007-09-10 18:10:54 +03001348
1349 if (vcpu->requests)
Avi Kivity3176bc32007-10-16 17:22:08 +02001350 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
Avi Kivity04d2cc72007-09-10 18:10:54 +03001351 kvm_x86_ops->tlb_flush(vcpu);
1352
1353 kvm_x86_ops->run(vcpu, kvm_run);
1354
1355 vcpu->guest_mode = 0;
1356 local_irq_enable();
1357
1358 ++vcpu->stat.exits;
1359
Laurent Vivier0552f732007-10-18 15:19:01 +02001360 /*
1361 * We must have an instruction between local_irq_enable() and
1362 * kvm_guest_exit(), so the timer interrupt isn't delayed by
1363 * the interrupt shadow. The stat.exits increment will do nicely.
1364 * But we need to prevent reordering, hence this barrier():
1365 */
1366 barrier();
1367
1368 kvm_guest_exit();
1369
Avi Kivity04d2cc72007-09-10 18:10:54 +03001370 preempt_enable();
1371
1372 /*
1373 * Profile KVM exit RIPs:
1374 */
1375 if (unlikely(prof_on == KVM_PROFILING)) {
1376 kvm_x86_ops->cache_regs(vcpu);
1377 profile_hit(KVM_PROFILING, (void *)vcpu->rip);
1378 }
1379
1380 r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
1381
1382 if (r > 0) {
1383 if (dm_request_for_irq_injection(vcpu, kvm_run)) {
1384 r = -EINTR;
1385 kvm_run->exit_reason = KVM_EXIT_INTR;
1386 ++vcpu->stat.request_irq_exits;
1387 goto out;
1388 }
1389 if (!need_resched()) {
1390 ++vcpu->stat.light_exits;
1391 goto again;
1392 }
1393 }
1394
1395out:
1396 if (r > 0) {
1397 kvm_resched(vcpu);
1398 goto preempted;
1399 }
1400
1401 post_kvm_run_save(vcpu, kvm_run);
1402
1403 return r;
1404}
1405
1406
Avi Kivitybccf2152007-02-21 18:04:26 +02001407static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001408{
Avi Kivity6aa8b732006-12-10 02:21:36 -08001409 int r;
Avi Kivity1961d272007-03-05 19:46:05 +02001410 sigset_t sigsaved;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001411
Avi Kivitybccf2152007-02-21 18:04:26 +02001412 vcpu_load(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001413
He, Qingc5ec1532007-09-03 17:07:41 +03001414 if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
1415 kvm_vcpu_block(vcpu);
1416 vcpu_put(vcpu);
1417 return -EAGAIN;
1418 }
1419
Avi Kivity1961d272007-03-05 19:46:05 +02001420 if (vcpu->sigset_active)
1421 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
1422
Dor Laor54810342007-02-12 00:54:39 -08001423 /* re-sync apic's tpr */
He, Qing5cd4f6f2007-08-30 17:04:26 +08001424 if (!irqchip_in_kernel(vcpu->kvm))
1425 set_cr8(vcpu, kvm_run->cr8);
Dor Laor54810342007-02-12 00:54:39 -08001426
Avi Kivity02c83202007-04-29 15:02:17 +03001427 if (vcpu->pio.cur_count) {
1428 r = complete_pio(vcpu);
1429 if (r)
1430 goto out;
1431 }
Zhang Xiantao34c16ee2007-10-20 15:34:38 +08001432#if CONFIG_HAS_IOMEM
Avi Kivity02c83202007-04-29 15:02:17 +03001433 if (vcpu->mmio_needed) {
1434 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
1435 vcpu->mmio_read_completed = 1;
1436 vcpu->mmio_needed = 0;
1437 r = emulate_instruction(vcpu, kvm_run,
Laurent Vivier34273182007-09-18 11:27:37 +02001438 vcpu->mmio_fault_cr2, 0, 1);
Avi Kivity02c83202007-04-29 15:02:17 +03001439 if (r == EMULATE_DO_MMIO) {
1440 /*
1441 * Read-modify-write. Back to userspace.
1442 */
Avi Kivity02c83202007-04-29 15:02:17 +03001443 r = 0;
1444 goto out;
Avi Kivity46fc1472007-02-22 19:39:30 +02001445 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08001446 }
Zhang Xiantao34c16ee2007-10-20 15:34:38 +08001447#endif
Avi Kivity8eb7d332007-03-04 14:17:08 +02001448 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) {
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001449 kvm_x86_ops->cache_regs(vcpu);
Avi Kivityb4e63f52007-03-04 13:59:30 +02001450 vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001451 kvm_x86_ops->decache_regs(vcpu);
Avi Kivityb4e63f52007-03-04 13:59:30 +02001452 }
1453
Avi Kivity04d2cc72007-09-10 18:10:54 +03001454 r = __vcpu_run(vcpu, kvm_run);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001455
Avi Kivity039576c2007-03-20 12:46:50 +02001456out:
Avi Kivity1961d272007-03-05 19:46:05 +02001457 if (vcpu->sigset_active)
1458 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1459
Avi Kivity6aa8b732006-12-10 02:21:36 -08001460 vcpu_put(vcpu);
1461 return r;
1462}
1463
Avi Kivitybccf2152007-02-21 18:04:26 +02001464static int kvm_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu,
1465 struct kvm_regs *regs)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001466{
Avi Kivitybccf2152007-02-21 18:04:26 +02001467 vcpu_load(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001468
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001469 kvm_x86_ops->cache_regs(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001470
1471 regs->rax = vcpu->regs[VCPU_REGS_RAX];
1472 regs->rbx = vcpu->regs[VCPU_REGS_RBX];
1473 regs->rcx = vcpu->regs[VCPU_REGS_RCX];
1474 regs->rdx = vcpu->regs[VCPU_REGS_RDX];
1475 regs->rsi = vcpu->regs[VCPU_REGS_RSI];
1476 regs->rdi = vcpu->regs[VCPU_REGS_RDI];
1477 regs->rsp = vcpu->regs[VCPU_REGS_RSP];
1478 regs->rbp = vcpu->regs[VCPU_REGS_RBP];
Avi Kivity05b3e0c2006-12-13 00:33:45 -08001479#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08001480 regs->r8 = vcpu->regs[VCPU_REGS_R8];
1481 regs->r9 = vcpu->regs[VCPU_REGS_R9];
1482 regs->r10 = vcpu->regs[VCPU_REGS_R10];
1483 regs->r11 = vcpu->regs[VCPU_REGS_R11];
1484 regs->r12 = vcpu->regs[VCPU_REGS_R12];
1485 regs->r13 = vcpu->regs[VCPU_REGS_R13];
1486 regs->r14 = vcpu->regs[VCPU_REGS_R14];
1487 regs->r15 = vcpu->regs[VCPU_REGS_R15];
1488#endif
1489
1490 regs->rip = vcpu->rip;
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001491 regs->rflags = kvm_x86_ops->get_rflags(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001492
1493 /*
1494 * Don't leak debug flags in case they were set for guest debugging
1495 */
1496 if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
1497 regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
1498
1499 vcpu_put(vcpu);
1500
1501 return 0;
1502}
1503
Avi Kivitybccf2152007-02-21 18:04:26 +02001504static int kvm_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu,
1505 struct kvm_regs *regs)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001506{
Avi Kivitybccf2152007-02-21 18:04:26 +02001507 vcpu_load(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001508
1509 vcpu->regs[VCPU_REGS_RAX] = regs->rax;
1510 vcpu->regs[VCPU_REGS_RBX] = regs->rbx;
1511 vcpu->regs[VCPU_REGS_RCX] = regs->rcx;
1512 vcpu->regs[VCPU_REGS_RDX] = regs->rdx;
1513 vcpu->regs[VCPU_REGS_RSI] = regs->rsi;
1514 vcpu->regs[VCPU_REGS_RDI] = regs->rdi;
1515 vcpu->regs[VCPU_REGS_RSP] = regs->rsp;
1516 vcpu->regs[VCPU_REGS_RBP] = regs->rbp;
Avi Kivity05b3e0c2006-12-13 00:33:45 -08001517#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08001518 vcpu->regs[VCPU_REGS_R8] = regs->r8;
1519 vcpu->regs[VCPU_REGS_R9] = regs->r9;
1520 vcpu->regs[VCPU_REGS_R10] = regs->r10;
1521 vcpu->regs[VCPU_REGS_R11] = regs->r11;
1522 vcpu->regs[VCPU_REGS_R12] = regs->r12;
1523 vcpu->regs[VCPU_REGS_R13] = regs->r13;
1524 vcpu->regs[VCPU_REGS_R14] = regs->r14;
1525 vcpu->regs[VCPU_REGS_R15] = regs->r15;
1526#endif
1527
1528 vcpu->rip = regs->rip;
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001529 kvm_x86_ops->set_rflags(vcpu, regs->rflags);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001530
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001531 kvm_x86_ops->decache_regs(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001532
1533 vcpu_put(vcpu);
1534
1535 return 0;
1536}
1537
1538static void get_segment(struct kvm_vcpu *vcpu,
1539 struct kvm_segment *var, int seg)
1540{
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001541 return kvm_x86_ops->get_segment(vcpu, var, seg);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001542}
1543
Avi Kivitybccf2152007-02-21 18:04:26 +02001544static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1545 struct kvm_sregs *sregs)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001546{
Avi Kivity6aa8b732006-12-10 02:21:36 -08001547 struct descriptor_table dt;
Eddie Dong2a8067f2007-08-06 16:29:07 +03001548 int pending_vec;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001549
Avi Kivitybccf2152007-02-21 18:04:26 +02001550 vcpu_load(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001551
1552 get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
1553 get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
1554 get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
1555 get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
1556 get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
1557 get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
1558
1559 get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
1560 get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
1561
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001562 kvm_x86_ops->get_idt(vcpu, &dt);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001563 sregs->idt.limit = dt.limit;
1564 sregs->idt.base = dt.base;
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001565 kvm_x86_ops->get_gdt(vcpu, &dt);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001566 sregs->gdt.limit = dt.limit;
1567 sregs->gdt.base = dt.base;
1568
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001569 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001570 sregs->cr0 = vcpu->cr0;
1571 sregs->cr2 = vcpu->cr2;
1572 sregs->cr3 = vcpu->cr3;
1573 sregs->cr4 = vcpu->cr4;
Eddie Dong7017fc32007-07-18 11:34:57 +03001574 sregs->cr8 = get_cr8(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001575 sregs->efer = vcpu->shadow_efer;
Eddie Dong7017fc32007-07-18 11:34:57 +03001576 sregs->apic_base = kvm_get_apic_base(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001577
Eddie Dong2a8067f2007-08-06 16:29:07 +03001578 if (irqchip_in_kernel(vcpu->kvm)) {
He, Qingc52fb352007-08-02 14:03:07 +03001579 memset(sregs->interrupt_bitmap, 0,
1580 sizeof sregs->interrupt_bitmap);
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001581 pending_vec = kvm_x86_ops->get_irq(vcpu);
Eddie Dong2a8067f2007-08-06 16:29:07 +03001582 if (pending_vec >= 0)
Mike Dayd77c26f2007-10-08 09:02:08 -04001583 set_bit(pending_vec,
1584 (unsigned long *)sregs->interrupt_bitmap);
Eddie Dong2a8067f2007-08-06 16:29:07 +03001585 } else
He, Qingc52fb352007-08-02 14:03:07 +03001586 memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
1587 sizeof sregs->interrupt_bitmap);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001588
1589 vcpu_put(vcpu);
1590
1591 return 0;
1592}
1593
1594static void set_segment(struct kvm_vcpu *vcpu,
1595 struct kvm_segment *var, int seg)
1596{
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001597 return kvm_x86_ops->set_segment(vcpu, var, seg);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001598}
1599
Avi Kivitybccf2152007-02-21 18:04:26 +02001600static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1601 struct kvm_sregs *sregs)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001602{
Avi Kivity6aa8b732006-12-10 02:21:36 -08001603 int mmu_reset_needed = 0;
Eddie Dong2a8067f2007-08-06 16:29:07 +03001604 int i, pending_vec, max_bits;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001605 struct descriptor_table dt;
1606
Avi Kivitybccf2152007-02-21 18:04:26 +02001607 vcpu_load(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001608
Avi Kivity6aa8b732006-12-10 02:21:36 -08001609 dt.limit = sregs->idt.limit;
1610 dt.base = sregs->idt.base;
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001611 kvm_x86_ops->set_idt(vcpu, &dt);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001612 dt.limit = sregs->gdt.limit;
1613 dt.base = sregs->gdt.base;
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001614 kvm_x86_ops->set_gdt(vcpu, &dt);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001615
1616 vcpu->cr2 = sregs->cr2;
1617 mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
1618 vcpu->cr3 = sregs->cr3;
1619
Eddie Dong7017fc32007-07-18 11:34:57 +03001620 set_cr8(vcpu, sregs->cr8);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001621
1622 mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
Avi Kivity05b3e0c2006-12-13 00:33:45 -08001623#ifdef CONFIG_X86_64
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001624 kvm_x86_ops->set_efer(vcpu, sregs->efer);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001625#endif
Eddie Dong7017fc32007-07-18 11:34:57 +03001626 kvm_set_apic_base(vcpu, sregs->apic_base);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001627
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001628 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
Avi Kivity399badf2007-01-05 16:36:38 -08001629
Avi Kivity6aa8b732006-12-10 02:21:36 -08001630 mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
Rusty Russell81f50e32007-09-06 01:20:38 +10001631 vcpu->cr0 = sregs->cr0;
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001632 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001633
1634 mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001635 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
Avi Kivity1b0973b2007-01-05 16:36:41 -08001636 if (!is_long_mode(vcpu) && is_pae(vcpu))
1637 load_pdptrs(vcpu, vcpu->cr3);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001638
1639 if (mmu_reset_needed)
1640 kvm_mmu_reset_context(vcpu);
1641
He, Qingc52fb352007-08-02 14:03:07 +03001642 if (!irqchip_in_kernel(vcpu->kvm)) {
1643 memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
1644 sizeof vcpu->irq_pending);
1645 vcpu->irq_summary = 0;
1646 for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i)
1647 if (vcpu->irq_pending[i])
1648 __set_bit(i, &vcpu->irq_summary);
Eddie Dong2a8067f2007-08-06 16:29:07 +03001649 } else {
1650 max_bits = (sizeof sregs->interrupt_bitmap) << 3;
1651 pending_vec = find_first_bit(
1652 (const unsigned long *)sregs->interrupt_bitmap,
1653 max_bits);
1654 /* Only pending external irq is handled here */
1655 if (pending_vec < max_bits) {
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001656 kvm_x86_ops->set_irq(vcpu, pending_vec);
Mike Dayd77c26f2007-10-08 09:02:08 -04001657 pr_debug("Set back pending irq %d\n",
1658 pending_vec);
Eddie Dong2a8067f2007-08-06 16:29:07 +03001659 }
He, Qingc52fb352007-08-02 14:03:07 +03001660 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08001661
Avi Kivity024aa1c2007-03-21 13:44:58 +02001662 set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
1663 set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
1664 set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
1665 set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
1666 set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
1667 set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
1668
1669 set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
1670 set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
1671
Avi Kivity6aa8b732006-12-10 02:21:36 -08001672 vcpu_put(vcpu);
1673
1674 return 0;
1675}
1676
Rusty Russell1747fb72007-09-06 01:21:32 +10001677void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
1678{
1679 struct kvm_segment cs;
1680
1681 get_segment(vcpu, &cs, VCPU_SREG_CS);
1682 *db = cs.db;
1683 *l = cs.l;
1684}
1685EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
1686
Avi Kivity6aa8b732006-12-10 02:21:36 -08001687/*
Avi Kivity6aa8b732006-12-10 02:21:36 -08001688 * Translate a guest virtual address to a guest physical address.
1689 */
Avi Kivitybccf2152007-02-21 18:04:26 +02001690static int kvm_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1691 struct kvm_translation *tr)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001692{
1693 unsigned long vaddr = tr->linear_address;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001694 gpa_t gpa;
1695
Avi Kivitybccf2152007-02-21 18:04:26 +02001696 vcpu_load(vcpu);
Shaohua Li11ec2802007-07-23 14:51:37 +08001697 mutex_lock(&vcpu->kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001698 gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr);
1699 tr->physical_address = gpa;
1700 tr->valid = gpa != UNMAPPED_GVA;
1701 tr->writeable = 1;
1702 tr->usermode = 0;
Shaohua Li11ec2802007-07-23 14:51:37 +08001703 mutex_unlock(&vcpu->kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001704 vcpu_put(vcpu);
1705
1706 return 0;
1707}
1708
Avi Kivitybccf2152007-02-21 18:04:26 +02001709static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
1710 struct kvm_interrupt *irq)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001711{
Avi Kivity6aa8b732006-12-10 02:21:36 -08001712 if (irq->irq < 0 || irq->irq >= 256)
1713 return -EINVAL;
Eddie Dong97222cc2007-09-12 10:58:04 +03001714 if (irqchip_in_kernel(vcpu->kvm))
1715 return -ENXIO;
Avi Kivitybccf2152007-02-21 18:04:26 +02001716 vcpu_load(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001717
1718 set_bit(irq->irq, vcpu->irq_pending);
1719 set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);
1720
1721 vcpu_put(vcpu);
1722
1723 return 0;
1724}
1725
Avi Kivitybccf2152007-02-21 18:04:26 +02001726static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
1727 struct kvm_debug_guest *dbg)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001728{
Avi Kivity6aa8b732006-12-10 02:21:36 -08001729 int r;
1730
Avi Kivitybccf2152007-02-21 18:04:26 +02001731 vcpu_load(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001732
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001733 r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001734
1735 vcpu_put(vcpu);
1736
1737 return r;
1738}
1739
Avi Kivity9a2bb7f2007-02-22 12:58:31 +02001740static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma,
1741 unsigned long address,
1742 int *type)
1743{
1744 struct kvm_vcpu *vcpu = vma->vm_file->private_data;
1745 unsigned long pgoff;
1746 struct page *page;
1747
Avi Kivity9a2bb7f2007-02-22 12:58:31 +02001748 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
Avi Kivity039576c2007-03-20 12:46:50 +02001749 if (pgoff == 0)
1750 page = virt_to_page(vcpu->run);
1751 else if (pgoff == KVM_PIO_PAGE_OFFSET)
1752 page = virt_to_page(vcpu->pio_data);
1753 else
Avi Kivity9a2bb7f2007-02-22 12:58:31 +02001754 return NOPAGE_SIGBUS;
Avi Kivity9a2bb7f2007-02-22 12:58:31 +02001755 get_page(page);
Nguyen Anh Quynhcd0d9132007-07-11 14:30:54 +03001756 if (type != NULL)
1757 *type = VM_FAULT_MINOR;
1758
Avi Kivity9a2bb7f2007-02-22 12:58:31 +02001759 return page;
1760}
1761
1762static struct vm_operations_struct kvm_vcpu_vm_ops = {
1763 .nopage = kvm_vcpu_nopage,
1764};
1765
1766static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
1767{
1768 vma->vm_ops = &kvm_vcpu_vm_ops;
1769 return 0;
1770}
1771
Avi Kivitybccf2152007-02-21 18:04:26 +02001772static int kvm_vcpu_release(struct inode *inode, struct file *filp)
1773{
1774 struct kvm_vcpu *vcpu = filp->private_data;
1775
1776 fput(vcpu->kvm->filp);
1777 return 0;
1778}
1779
1780static struct file_operations kvm_vcpu_fops = {
1781 .release = kvm_vcpu_release,
1782 .unlocked_ioctl = kvm_vcpu_ioctl,
1783 .compat_ioctl = kvm_vcpu_ioctl,
Avi Kivity9a2bb7f2007-02-22 12:58:31 +02001784 .mmap = kvm_vcpu_mmap,
Avi Kivitybccf2152007-02-21 18:04:26 +02001785};
1786
1787/*
1788 * Allocates an inode for the vcpu.
1789 */
1790static int create_vcpu_fd(struct kvm_vcpu *vcpu)
1791{
1792 int fd, r;
1793 struct inode *inode;
1794 struct file *file;
1795
Avi Kivityd6d28162007-06-28 08:38:16 -04001796 r = anon_inode_getfd(&fd, &inode, &file,
1797 "kvm-vcpu", &kvm_vcpu_fops, vcpu);
1798 if (r)
1799 return r;
Avi Kivitybccf2152007-02-21 18:04:26 +02001800 atomic_inc(&vcpu->kvm->filp->f_count);
Avi Kivitybccf2152007-02-21 18:04:26 +02001801 return fd;
Avi Kivitybccf2152007-02-21 18:04:26 +02001802}
1803
Avi Kivityc5ea7662007-02-20 18:41:05 +02001804/*
1805 * Creates some virtual cpus. Good luck creating more than one.
1806 */
1807static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
1808{
1809 int r;
1810 struct kvm_vcpu *vcpu;
1811
Avi Kivityc5ea7662007-02-20 18:41:05 +02001812 if (!valid_vcpu(n))
Rusty Russellfb3f0f52007-07-27 17:16:56 +10001813 return -EINVAL;
Avi Kivityc5ea7662007-02-20 18:41:05 +02001814
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001815 vcpu = kvm_x86_ops->vcpu_create(kvm, n);
Rusty Russellfb3f0f52007-07-27 17:16:56 +10001816 if (IS_ERR(vcpu))
1817 return PTR_ERR(vcpu);
Avi Kivityc5ea7662007-02-20 18:41:05 +02001818
Avi Kivity15ad7142007-07-11 18:17:21 +03001819 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
1820
Rusty Russellb114b082007-07-30 21:13:43 +10001821 /* We do fxsave: this must be aligned. */
1822 BUG_ON((unsigned long)&vcpu->host_fx_image & 0xF);
1823
Rusty Russellfb3f0f52007-07-27 17:16:56 +10001824 vcpu_load(vcpu);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02001825 r = kvm_x86_ops->vcpu_reset(vcpu);
1826 if (r == 0)
1827 r = kvm_mmu_setup(vcpu);
Avi Kivityc5ea7662007-02-20 18:41:05 +02001828 vcpu_put(vcpu);
Avi Kivityc5ea7662007-02-20 18:41:05 +02001829 if (r < 0)
Rusty Russellfb3f0f52007-07-27 17:16:56 +10001830 goto free_vcpu;
Avi Kivityc5ea7662007-02-20 18:41:05 +02001831
Shaohua Li11ec2802007-07-23 14:51:37 +08001832 mutex_lock(&kvm->lock);
Rusty Russellfb3f0f52007-07-27 17:16:56 +10001833 if (kvm->vcpus[n]) {
1834 r = -EEXIST;
Shaohua Li11ec2802007-07-23 14:51:37 +08001835 mutex_unlock(&kvm->lock);
Rusty Russellfb3f0f52007-07-27 17:16:56 +10001836 goto mmu_unload;
1837 }
1838 kvm->vcpus[n] = vcpu;
Shaohua Li11ec2802007-07-23 14:51:37 +08001839 mutex_unlock(&kvm->lock);
Rusty Russellfb3f0f52007-07-27 17:16:56 +10001840
1841 /* Now it's all set up, let userspace reach it */
Avi Kivitybccf2152007-02-21 18:04:26 +02001842 r = create_vcpu_fd(vcpu);
1843 if (r < 0)
Rusty Russellfb3f0f52007-07-27 17:16:56 +10001844 goto unlink;
Avi Kivitybccf2152007-02-21 18:04:26 +02001845 return r;
Avi Kivityc5ea7662007-02-20 18:41:05 +02001846
Rusty Russellfb3f0f52007-07-27 17:16:56 +10001847unlink:
Shaohua Li11ec2802007-07-23 14:51:37 +08001848 mutex_lock(&kvm->lock);
Rusty Russellfb3f0f52007-07-27 17:16:56 +10001849 kvm->vcpus[n] = NULL;
Shaohua Li11ec2802007-07-23 14:51:37 +08001850 mutex_unlock(&kvm->lock);
Rusty Russellfb3f0f52007-07-27 17:16:56 +10001851
1852mmu_unload:
1853 vcpu_load(vcpu);
1854 kvm_mmu_unload(vcpu);
1855 vcpu_put(vcpu);
1856
1857free_vcpu:
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03001858 kvm_x86_ops->vcpu_free(vcpu);
Avi Kivityc5ea7662007-02-20 18:41:05 +02001859 return r;
1860}
1861
Avi Kivity1961d272007-03-05 19:46:05 +02001862static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
1863{
1864 if (sigset) {
1865 sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP));
1866 vcpu->sigset_active = 1;
1867 vcpu->sigset = *sigset;
1868 } else
1869 vcpu->sigset_active = 0;
1870 return 0;
1871}
1872
Avi Kivityb8836732007-04-01 16:34:31 +03001873/*
1874 * fxsave fpu state. Taken from x86_64/processor.h. To be killed when
1875 * we have asm/x86/processor.h
1876 */
1877struct fxsave {
1878 u16 cwd;
1879 u16 swd;
1880 u16 twd;
1881 u16 fop;
1882 u64 rip;
1883 u64 rdp;
1884 u32 mxcsr;
1885 u32 mxcsr_mask;
1886 u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
1887#ifdef CONFIG_X86_64
1888 u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
1889#else
1890 u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */
1891#endif
1892};
1893
1894static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1895{
Rusty Russellb114b082007-07-30 21:13:43 +10001896 struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
Avi Kivityb8836732007-04-01 16:34:31 +03001897
1898 vcpu_load(vcpu);
1899
1900 memcpy(fpu->fpr, fxsave->st_space, 128);
1901 fpu->fcw = fxsave->cwd;
1902 fpu->fsw = fxsave->swd;
1903 fpu->ftwx = fxsave->twd;
1904 fpu->last_opcode = fxsave->fop;
1905 fpu->last_ip = fxsave->rip;
1906 fpu->last_dp = fxsave->rdp;
1907 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
1908
1909 vcpu_put(vcpu);
1910
1911 return 0;
1912}
1913
1914static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1915{
Rusty Russellb114b082007-07-30 21:13:43 +10001916 struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
Avi Kivityb8836732007-04-01 16:34:31 +03001917
1918 vcpu_load(vcpu);
1919
1920 memcpy(fxsave->st_space, fpu->fpr, 128);
1921 fxsave->cwd = fpu->fcw;
1922 fxsave->swd = fpu->fsw;
1923 fxsave->twd = fpu->ftwx;
1924 fxsave->fop = fpu->last_opcode;
1925 fxsave->rip = fpu->last_ip;
1926 fxsave->rdp = fpu->last_dp;
1927 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
1928
1929 vcpu_put(vcpu);
1930
1931 return 0;
1932}
1933
Avi Kivitybccf2152007-02-21 18:04:26 +02001934static long kvm_vcpu_ioctl(struct file *filp,
1935 unsigned int ioctl, unsigned long arg)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001936{
Avi Kivitybccf2152007-02-21 18:04:26 +02001937 struct kvm_vcpu *vcpu = filp->private_data;
Al Viro2f366982007-02-09 16:38:35 +00001938 void __user *argp = (void __user *)arg;
Carsten Otte313a3dc2007-10-11 19:16:52 +02001939 int r;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001940
1941 switch (ioctl) {
Avi Kivity9a2bb7f2007-02-22 12:58:31 +02001942 case KVM_RUN:
Avi Kivityf0fe5102007-03-07 13:11:17 +02001943 r = -EINVAL;
1944 if (arg)
1945 goto out;
Avi Kivity9a2bb7f2007-02-22 12:58:31 +02001946 r = kvm_vcpu_ioctl_run(vcpu, vcpu->run);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001947 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001948 case KVM_GET_REGS: {
1949 struct kvm_regs kvm_regs;
1950
Avi Kivitybccf2152007-02-21 18:04:26 +02001951 memset(&kvm_regs, 0, sizeof kvm_regs);
1952 r = kvm_vcpu_ioctl_get_regs(vcpu, &kvm_regs);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001953 if (r)
1954 goto out;
1955 r = -EFAULT;
Al Viro2f366982007-02-09 16:38:35 +00001956 if (copy_to_user(argp, &kvm_regs, sizeof kvm_regs))
Avi Kivity6aa8b732006-12-10 02:21:36 -08001957 goto out;
1958 r = 0;
1959 break;
1960 }
1961 case KVM_SET_REGS: {
1962 struct kvm_regs kvm_regs;
1963
1964 r = -EFAULT;
Al Viro2f366982007-02-09 16:38:35 +00001965 if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs))
Avi Kivity6aa8b732006-12-10 02:21:36 -08001966 goto out;
Avi Kivitybccf2152007-02-21 18:04:26 +02001967 r = kvm_vcpu_ioctl_set_regs(vcpu, &kvm_regs);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001968 if (r)
1969 goto out;
1970 r = 0;
1971 break;
1972 }
1973 case KVM_GET_SREGS: {
1974 struct kvm_sregs kvm_sregs;
1975
Avi Kivitybccf2152007-02-21 18:04:26 +02001976 memset(&kvm_sregs, 0, sizeof kvm_sregs);
1977 r = kvm_vcpu_ioctl_get_sregs(vcpu, &kvm_sregs);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001978 if (r)
1979 goto out;
1980 r = -EFAULT;
Al Viro2f366982007-02-09 16:38:35 +00001981 if (copy_to_user(argp, &kvm_sregs, sizeof kvm_sregs))
Avi Kivity6aa8b732006-12-10 02:21:36 -08001982 goto out;
1983 r = 0;
1984 break;
1985 }
1986 case KVM_SET_SREGS: {
1987 struct kvm_sregs kvm_sregs;
1988
1989 r = -EFAULT;
Al Viro2f366982007-02-09 16:38:35 +00001990 if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs))
Avi Kivity6aa8b732006-12-10 02:21:36 -08001991 goto out;
Avi Kivitybccf2152007-02-21 18:04:26 +02001992 r = kvm_vcpu_ioctl_set_sregs(vcpu, &kvm_sregs);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001993 if (r)
1994 goto out;
1995 r = 0;
1996 break;
1997 }
1998 case KVM_TRANSLATE: {
1999 struct kvm_translation tr;
2000
2001 r = -EFAULT;
Al Viro2f366982007-02-09 16:38:35 +00002002 if (copy_from_user(&tr, argp, sizeof tr))
Avi Kivity6aa8b732006-12-10 02:21:36 -08002003 goto out;
Avi Kivitybccf2152007-02-21 18:04:26 +02002004 r = kvm_vcpu_ioctl_translate(vcpu, &tr);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002005 if (r)
2006 goto out;
2007 r = -EFAULT;
Al Viro2f366982007-02-09 16:38:35 +00002008 if (copy_to_user(argp, &tr, sizeof tr))
Avi Kivity6aa8b732006-12-10 02:21:36 -08002009 goto out;
2010 r = 0;
2011 break;
2012 }
2013 case KVM_INTERRUPT: {
2014 struct kvm_interrupt irq;
2015
2016 r = -EFAULT;
Al Viro2f366982007-02-09 16:38:35 +00002017 if (copy_from_user(&irq, argp, sizeof irq))
Avi Kivity6aa8b732006-12-10 02:21:36 -08002018 goto out;
Avi Kivitybccf2152007-02-21 18:04:26 +02002019 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002020 if (r)
2021 goto out;
2022 r = 0;
2023 break;
2024 }
2025 case KVM_DEBUG_GUEST: {
2026 struct kvm_debug_guest dbg;
2027
2028 r = -EFAULT;
Al Viro2f366982007-02-09 16:38:35 +00002029 if (copy_from_user(&dbg, argp, sizeof dbg))
Avi Kivity6aa8b732006-12-10 02:21:36 -08002030 goto out;
Avi Kivitybccf2152007-02-21 18:04:26 +02002031 r = kvm_vcpu_ioctl_debug_guest(vcpu, &dbg);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002032 if (r)
2033 goto out;
2034 r = 0;
2035 break;
2036 }
Avi Kivity1961d272007-03-05 19:46:05 +02002037 case KVM_SET_SIGNAL_MASK: {
2038 struct kvm_signal_mask __user *sigmask_arg = argp;
2039 struct kvm_signal_mask kvm_sigmask;
2040 sigset_t sigset, *p;
2041
2042 p = NULL;
2043 if (argp) {
2044 r = -EFAULT;
2045 if (copy_from_user(&kvm_sigmask, argp,
2046 sizeof kvm_sigmask))
2047 goto out;
2048 r = -EINVAL;
2049 if (kvm_sigmask.len != sizeof sigset)
2050 goto out;
2051 r = -EFAULT;
2052 if (copy_from_user(&sigset, sigmask_arg->sigset,
2053 sizeof sigset))
2054 goto out;
2055 p = &sigset;
2056 }
2057 r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
2058 break;
2059 }
Avi Kivityb8836732007-04-01 16:34:31 +03002060 case KVM_GET_FPU: {
2061 struct kvm_fpu fpu;
2062
2063 memset(&fpu, 0, sizeof fpu);
2064 r = kvm_vcpu_ioctl_get_fpu(vcpu, &fpu);
2065 if (r)
2066 goto out;
2067 r = -EFAULT;
2068 if (copy_to_user(argp, &fpu, sizeof fpu))
2069 goto out;
2070 r = 0;
2071 break;
2072 }
2073 case KVM_SET_FPU: {
2074 struct kvm_fpu fpu;
2075
2076 r = -EFAULT;
2077 if (copy_from_user(&fpu, argp, sizeof fpu))
2078 goto out;
2079 r = kvm_vcpu_ioctl_set_fpu(vcpu, &fpu);
2080 if (r)
2081 goto out;
2082 r = 0;
2083 break;
2084 }
Avi Kivitybccf2152007-02-21 18:04:26 +02002085 default:
Carsten Otte313a3dc2007-10-11 19:16:52 +02002086 r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
Avi Kivitybccf2152007-02-21 18:04:26 +02002087 }
2088out:
2089 return r;
2090}
2091
2092static long kvm_vm_ioctl(struct file *filp,
2093 unsigned int ioctl, unsigned long arg)
2094{
2095 struct kvm *kvm = filp->private_data;
2096 void __user *argp = (void __user *)arg;
Carsten Otte1fe779f2007-10-29 16:08:35 +01002097 int r;
Avi Kivitybccf2152007-02-21 18:04:26 +02002098
2099 switch (ioctl) {
2100 case KVM_CREATE_VCPU:
2101 r = kvm_vm_ioctl_create_vcpu(kvm, arg);
2102 if (r < 0)
2103 goto out;
2104 break;
Izik Eidus6fc138d2007-10-09 19:20:39 +02002105 case KVM_SET_USER_MEMORY_REGION: {
2106 struct kvm_userspace_memory_region kvm_userspace_mem;
2107
2108 r = -EFAULT;
2109 if (copy_from_user(&kvm_userspace_mem, argp,
2110 sizeof kvm_userspace_mem))
2111 goto out;
2112
2113 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002114 if (r)
2115 goto out;
2116 break;
2117 }
2118 case KVM_GET_DIRTY_LOG: {
2119 struct kvm_dirty_log log;
2120
2121 r = -EFAULT;
Al Viro2f366982007-02-09 16:38:35 +00002122 if (copy_from_user(&log, argp, sizeof log))
Avi Kivity6aa8b732006-12-10 02:21:36 -08002123 goto out;
Avi Kivity2c6f5df2007-02-20 18:27:58 +02002124 r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002125 if (r)
2126 goto out;
2127 break;
2128 }
Avi Kivityf17abe92007-02-21 19:28:04 +02002129 default:
Carsten Otte1fe779f2007-10-29 16:08:35 +01002130 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
Avi Kivityf17abe92007-02-21 19:28:04 +02002131 }
2132out:
2133 return r;
2134}
2135
2136static struct page *kvm_vm_nopage(struct vm_area_struct *vma,
2137 unsigned long address,
2138 int *type)
2139{
2140 struct kvm *kvm = vma->vm_file->private_data;
2141 unsigned long pgoff;
Avi Kivityf17abe92007-02-21 19:28:04 +02002142 struct page *page;
2143
Avi Kivityf17abe92007-02-21 19:28:04 +02002144 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
Izik Eiduse0d62c72007-10-24 23:57:46 +02002145 if (!kvm_is_visible_gfn(kvm, pgoff))
2146 return NOPAGE_SIGBUS;
Anthony Liguoriaab61cc2007-10-29 15:15:20 -05002147 /* current->mm->mmap_sem is already held so call lockless version */
2148 page = __gfn_to_page(kvm, pgoff);
Izik Eidus8a7ae052007-10-18 11:09:33 +02002149 if (is_error_page(page)) {
2150 kvm_release_page(page);
Avi Kivityf17abe92007-02-21 19:28:04 +02002151 return NOPAGE_SIGBUS;
Izik Eidus8a7ae052007-10-18 11:09:33 +02002152 }
Nguyen Anh Quynhcd0d9132007-07-11 14:30:54 +03002153 if (type != NULL)
2154 *type = VM_FAULT_MINOR;
2155
Avi Kivityf17abe92007-02-21 19:28:04 +02002156 return page;
2157}
2158
2159static struct vm_operations_struct kvm_vm_vm_ops = {
2160 .nopage = kvm_vm_nopage,
2161};
2162
2163static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
2164{
2165 vma->vm_ops = &kvm_vm_vm_ops;
2166 return 0;
2167}
2168
2169static struct file_operations kvm_vm_fops = {
2170 .release = kvm_vm_release,
2171 .unlocked_ioctl = kvm_vm_ioctl,
2172 .compat_ioctl = kvm_vm_ioctl,
2173 .mmap = kvm_vm_mmap,
2174};
2175
2176static int kvm_dev_ioctl_create_vm(void)
2177{
2178 int fd, r;
2179 struct inode *inode;
2180 struct file *file;
2181 struct kvm *kvm;
2182
Avi Kivityf17abe92007-02-21 19:28:04 +02002183 kvm = kvm_create_vm();
Avi Kivityd6d28162007-06-28 08:38:16 -04002184 if (IS_ERR(kvm))
2185 return PTR_ERR(kvm);
2186 r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm);
2187 if (r) {
2188 kvm_destroy_vm(kvm);
2189 return r;
Avi Kivityf17abe92007-02-21 19:28:04 +02002190 }
2191
Avi Kivitybccf2152007-02-21 18:04:26 +02002192 kvm->filp = file;
Avi Kivityf17abe92007-02-21 19:28:04 +02002193
Avi Kivityf17abe92007-02-21 19:28:04 +02002194 return fd;
Avi Kivityf17abe92007-02-21 19:28:04 +02002195}
2196
2197static long kvm_dev_ioctl(struct file *filp,
2198 unsigned int ioctl, unsigned long arg)
2199{
2200 void __user *argp = (void __user *)arg;
Avi Kivity07c45a32007-03-07 13:05:38 +02002201 long r = -EINVAL;
Avi Kivityf17abe92007-02-21 19:28:04 +02002202
2203 switch (ioctl) {
2204 case KVM_GET_API_VERSION:
Avi Kivityf0fe5102007-03-07 13:11:17 +02002205 r = -EINVAL;
2206 if (arg)
2207 goto out;
Avi Kivityf17abe92007-02-21 19:28:04 +02002208 r = KVM_API_VERSION;
2209 break;
2210 case KVM_CREATE_VM:
Avi Kivityf0fe5102007-03-07 13:11:17 +02002211 r = -EINVAL;
2212 if (arg)
2213 goto out;
Avi Kivityf17abe92007-02-21 19:28:04 +02002214 r = kvm_dev_ioctl_create_vm();
2215 break;
Eddie Dong85f455f2007-07-06 12:20:49 +03002216 case KVM_CHECK_EXTENSION: {
2217 int ext = (long)argp;
2218
2219 switch (ext) {
2220 case KVM_CAP_IRQCHIP:
Eddie Dongb6958ce2007-07-18 12:15:21 +03002221 case KVM_CAP_HLT:
Izik Eidus82ce2c92007-10-02 18:52:55 +02002222 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
Izik Eidus6fc138d2007-10-09 19:20:39 +02002223 case KVM_CAP_USER_MEMORY:
Izik Eiduscbc94022007-10-25 00:29:55 +02002224 case KVM_CAP_SET_TSS_ADDR:
Eddie Dong85f455f2007-07-06 12:20:49 +03002225 r = 1;
2226 break;
2227 default:
2228 r = 0;
2229 break;
2230 }
Avi Kivity5d308f42007-03-01 17:56:20 +02002231 break;
Eddie Dong85f455f2007-07-06 12:20:49 +03002232 }
Avi Kivity07c45a32007-03-07 13:05:38 +02002233 case KVM_GET_VCPU_MMAP_SIZE:
2234 r = -EINVAL;
2235 if (arg)
2236 goto out;
Avi Kivity039576c2007-03-20 12:46:50 +02002237 r = 2 * PAGE_SIZE;
Avi Kivity07c45a32007-03-07 13:05:38 +02002238 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002239 default:
Carsten Otte043405e2007-10-10 17:16:19 +02002240 return kvm_arch_dev_ioctl(filp, ioctl, arg);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002241 }
2242out:
2243 return r;
2244}
2245
Avi Kivity6aa8b732006-12-10 02:21:36 -08002246static struct file_operations kvm_chardev_ops = {
Avi Kivity6aa8b732006-12-10 02:21:36 -08002247 .unlocked_ioctl = kvm_dev_ioctl,
2248 .compat_ioctl = kvm_dev_ioctl,
Avi Kivity6aa8b732006-12-10 02:21:36 -08002249};
2250
2251static struct miscdevice kvm_dev = {
Avi Kivitybbe44322007-03-04 13:27:36 +02002252 KVM_MINOR,
Avi Kivity6aa8b732006-12-10 02:21:36 -08002253 "kvm",
2254 &kvm_chardev_ops,
2255};
2256
Avi Kivity774c47f2007-02-12 00:54:47 -08002257/*
2258 * Make sure that a cpu that is being hot-unplugged does not have any vcpus
2259 * cached on it.
2260 */
2261static void decache_vcpus_on_cpu(int cpu)
2262{
2263 struct kvm *vm;
2264 struct kvm_vcpu *vcpu;
2265 int i;
2266
2267 spin_lock(&kvm_lock);
Shaohua Li11ec2802007-07-23 14:51:37 +08002268 list_for_each_entry(vm, &vm_list, vm_list)
Avi Kivity774c47f2007-02-12 00:54:47 -08002269 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
Rusty Russellfb3f0f52007-07-27 17:16:56 +10002270 vcpu = vm->vcpus[i];
2271 if (!vcpu)
2272 continue;
Avi Kivity774c47f2007-02-12 00:54:47 -08002273 /*
2274 * If the vcpu is locked, then it is running on some
2275 * other cpu and therefore it is not cached on the
2276 * cpu in question.
2277 *
2278 * If it's not locked, check the last cpu it executed
2279 * on.
2280 */
2281 if (mutex_trylock(&vcpu->mutex)) {
2282 if (vcpu->cpu == cpu) {
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03002283 kvm_x86_ops->vcpu_decache(vcpu);
Avi Kivity774c47f2007-02-12 00:54:47 -08002284 vcpu->cpu = -1;
2285 }
2286 mutex_unlock(&vcpu->mutex);
2287 }
2288 }
2289 spin_unlock(&kvm_lock);
2290}
2291
Avi Kivity1b6c0162007-05-24 13:03:52 +03002292static void hardware_enable(void *junk)
2293{
2294 int cpu = raw_smp_processor_id();
2295
2296 if (cpu_isset(cpu, cpus_hardware_enabled))
2297 return;
2298 cpu_set(cpu, cpus_hardware_enabled);
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03002299 kvm_x86_ops->hardware_enable(NULL);
Avi Kivity1b6c0162007-05-24 13:03:52 +03002300}
2301
2302static void hardware_disable(void *junk)
2303{
2304 int cpu = raw_smp_processor_id();
2305
2306 if (!cpu_isset(cpu, cpus_hardware_enabled))
2307 return;
2308 cpu_clear(cpu, cpus_hardware_enabled);
2309 decache_vcpus_on_cpu(cpu);
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03002310 kvm_x86_ops->hardware_disable(NULL);
Avi Kivity1b6c0162007-05-24 13:03:52 +03002311}
2312
Avi Kivity774c47f2007-02-12 00:54:47 -08002313static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
2314 void *v)
2315{
2316 int cpu = (long)v;
2317
2318 switch (val) {
Avi Kivitycec9ad22007-05-24 13:11:41 +03002319 case CPU_DYING:
2320 case CPU_DYING_FROZEN:
Avi Kivity6ec8a852007-08-19 15:57:26 +03002321 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
2322 cpu);
2323 hardware_disable(NULL);
2324 break;
Avi Kivity774c47f2007-02-12 00:54:47 -08002325 case CPU_UP_CANCELED:
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07002326 case CPU_UP_CANCELED_FROZEN:
Jeremy Katz43934a32007-02-19 14:37:46 +02002327 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
2328 cpu);
Avi Kivity1b6c0162007-05-24 13:03:52 +03002329 smp_call_function_single(cpu, hardware_disable, NULL, 0, 1);
Avi Kivity774c47f2007-02-12 00:54:47 -08002330 break;
Jeremy Katz43934a32007-02-19 14:37:46 +02002331 case CPU_ONLINE:
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07002332 case CPU_ONLINE_FROZEN:
Jeremy Katz43934a32007-02-19 14:37:46 +02002333 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
2334 cpu);
Avi Kivity1b6c0162007-05-24 13:03:52 +03002335 smp_call_function_single(cpu, hardware_enable, NULL, 0, 1);
Avi Kivity774c47f2007-02-12 00:54:47 -08002336 break;
2337 }
2338 return NOTIFY_OK;
2339}
2340
Rusty Russell9a2b85c2007-07-17 23:17:55 +10002341static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
Mike Dayd77c26f2007-10-08 09:02:08 -04002342 void *v)
Rusty Russell9a2b85c2007-07-17 23:17:55 +10002343{
2344 if (val == SYS_RESTART) {
2345 /*
2346 * Some (well, at least mine) BIOSes hang on reboot if
2347 * in vmx root mode.
2348 */
2349 printk(KERN_INFO "kvm: exiting hardware virtualization\n");
2350 on_each_cpu(hardware_disable, NULL, 0, 1);
2351 }
2352 return NOTIFY_OK;
2353}
2354
2355static struct notifier_block kvm_reboot_notifier = {
2356 .notifier_call = kvm_reboot,
2357 .priority = 0,
2358};
2359
Gregory Haskins2eeb2e92007-05-31 14:08:53 -04002360void kvm_io_bus_init(struct kvm_io_bus *bus)
2361{
2362 memset(bus, 0, sizeof(*bus));
2363}
2364
2365void kvm_io_bus_destroy(struct kvm_io_bus *bus)
2366{
2367 int i;
2368
2369 for (i = 0; i < bus->dev_count; i++) {
2370 struct kvm_io_device *pos = bus->devs[i];
2371
2372 kvm_iodevice_destructor(pos);
2373 }
2374}
2375
2376struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr)
2377{
2378 int i;
2379
2380 for (i = 0; i < bus->dev_count; i++) {
2381 struct kvm_io_device *pos = bus->devs[i];
2382
2383 if (pos->in_range(pos, addr))
2384 return pos;
2385 }
2386
2387 return NULL;
2388}
2389
2390void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev)
2391{
2392 BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1));
2393
2394 bus->devs[bus->dev_count++] = dev;
2395}
2396
Avi Kivity774c47f2007-02-12 00:54:47 -08002397static struct notifier_block kvm_cpu_notifier = {
2398 .notifier_call = kvm_cpu_hotplug,
2399 .priority = 20, /* must be > scheduler priority */
2400};
2401
Avi Kivity1165f5f2007-04-19 17:27:43 +03002402static u64 stat_get(void *_offset)
2403{
2404 unsigned offset = (long)_offset;
2405 u64 total = 0;
2406 struct kvm *kvm;
2407 struct kvm_vcpu *vcpu;
2408 int i;
2409
2410 spin_lock(&kvm_lock);
2411 list_for_each_entry(kvm, &vm_list, vm_list)
2412 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
Rusty Russellfb3f0f52007-07-27 17:16:56 +10002413 vcpu = kvm->vcpus[i];
2414 if (vcpu)
2415 total += *(u32 *)((void *)vcpu + offset);
Avi Kivity1165f5f2007-04-19 17:27:43 +03002416 }
2417 spin_unlock(&kvm_lock);
2418 return total;
2419}
2420
Rusty Russell3dea7ca2007-08-01 10:12:22 +10002421DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, NULL, "%llu\n");
Avi Kivity1165f5f2007-04-19 17:27:43 +03002422
Avi Kivity6aa8b732006-12-10 02:21:36 -08002423static __init void kvm_init_debug(void)
2424{
2425 struct kvm_stats_debugfs_item *p;
2426
Al Viro8b6d44c2007-02-09 16:38:40 +00002427 debugfs_dir = debugfs_create_dir("kvm", NULL);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002428 for (p = debugfs_entries; p->name; ++p)
Avi Kivity1165f5f2007-04-19 17:27:43 +03002429 p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir,
2430 (void *)(long)p->offset,
2431 &stat_fops);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002432}
2433
2434static void kvm_exit_debug(void)
2435{
2436 struct kvm_stats_debugfs_item *p;
2437
2438 for (p = debugfs_entries; p->name; ++p)
2439 debugfs_remove(p->dentry);
2440 debugfs_remove(debugfs_dir);
2441}
2442
Avi Kivity59ae6c62007-02-12 00:54:48 -08002443static int kvm_suspend(struct sys_device *dev, pm_message_t state)
2444{
Avi Kivity4267c412007-05-24 13:09:41 +03002445 hardware_disable(NULL);
Avi Kivity59ae6c62007-02-12 00:54:48 -08002446 return 0;
2447}
2448
2449static int kvm_resume(struct sys_device *dev)
2450{
Avi Kivity4267c412007-05-24 13:09:41 +03002451 hardware_enable(NULL);
Avi Kivity59ae6c62007-02-12 00:54:48 -08002452 return 0;
2453}
2454
2455static struct sysdev_class kvm_sysdev_class = {
Kay Sieversaf5ca3f2007-12-20 02:09:39 +01002456 .name = "kvm",
Avi Kivity59ae6c62007-02-12 00:54:48 -08002457 .suspend = kvm_suspend,
2458 .resume = kvm_resume,
2459};
2460
2461static struct sys_device kvm_sysdev = {
2462 .id = 0,
2463 .cls = &kvm_sysdev_class,
2464};
2465
Izik Eiduscea7bb22007-10-17 19:17:48 +02002466struct page *bad_page;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002467
Avi Kivity15ad7142007-07-11 18:17:21 +03002468static inline
2469struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
2470{
2471 return container_of(pn, struct kvm_vcpu, preempt_notifier);
2472}
2473
2474static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
2475{
2476 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
2477
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03002478 kvm_x86_ops->vcpu_load(vcpu, cpu);
Avi Kivity15ad7142007-07-11 18:17:21 +03002479}
2480
2481static void kvm_sched_out(struct preempt_notifier *pn,
2482 struct task_struct *next)
2483{
2484 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
2485
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03002486 kvm_x86_ops->vcpu_put(vcpu);
Avi Kivity15ad7142007-07-11 18:17:21 +03002487}
2488
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03002489int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int vcpu_size,
Rusty Russellc16f8622007-07-30 21:12:19 +10002490 struct module *module)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002491{
2492 int r;
Yang, Sheng002c7f72007-07-31 14:23:01 +03002493 int cpu;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002494
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03002495 if (kvm_x86_ops) {
Yoshimi Ichiyanagi09db28b2006-12-29 16:49:41 -08002496 printk(KERN_ERR "kvm: already loaded the other module\n");
2497 return -EEXIST;
2498 }
2499
Yoshimi Ichiyanagie097f352007-01-05 16:36:24 -08002500 if (!ops->cpu_has_kvm_support()) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08002501 printk(KERN_ERR "kvm: no hardware support\n");
2502 return -EOPNOTSUPP;
2503 }
Yoshimi Ichiyanagie097f352007-01-05 16:36:24 -08002504 if (ops->disabled_by_bios()) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08002505 printk(KERN_ERR "kvm: disabled by bios\n");
2506 return -EOPNOTSUPP;
2507 }
2508
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03002509 kvm_x86_ops = ops;
Yoshimi Ichiyanagie097f352007-01-05 16:36:24 -08002510
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03002511 r = kvm_x86_ops->hardware_setup();
Avi Kivity6aa8b732006-12-10 02:21:36 -08002512 if (r < 0)
Avi Kivityca45aaa2007-03-01 19:21:03 +02002513 goto out;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002514
Yang, Sheng002c7f72007-07-31 14:23:01 +03002515 for_each_online_cpu(cpu) {
2516 smp_call_function_single(cpu,
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03002517 kvm_x86_ops->check_processor_compatibility,
Yang, Sheng002c7f72007-07-31 14:23:01 +03002518 &r, 0, 1);
2519 if (r < 0)
2520 goto out_free_0;
2521 }
2522
Avi Kivity1b6c0162007-05-24 13:03:52 +03002523 on_each_cpu(hardware_enable, NULL, 0, 1);
Avi Kivity774c47f2007-02-12 00:54:47 -08002524 r = register_cpu_notifier(&kvm_cpu_notifier);
2525 if (r)
2526 goto out_free_1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002527 register_reboot_notifier(&kvm_reboot_notifier);
2528
Avi Kivity59ae6c62007-02-12 00:54:48 -08002529 r = sysdev_class_register(&kvm_sysdev_class);
2530 if (r)
2531 goto out_free_2;
2532
2533 r = sysdev_register(&kvm_sysdev);
2534 if (r)
2535 goto out_free_3;
2536
Rusty Russellc16f8622007-07-30 21:12:19 +10002537 /* A kmem cache lets us meet the alignment requirements of fx_save. */
2538 kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size,
2539 __alignof__(struct kvm_vcpu), 0, 0);
2540 if (!kvm_vcpu_cache) {
2541 r = -ENOMEM;
2542 goto out_free_4;
2543 }
2544
Avi Kivity6aa8b732006-12-10 02:21:36 -08002545 kvm_chardev_ops.owner = module;
2546
2547 r = misc_register(&kvm_dev);
2548 if (r) {
Mike Dayd77c26f2007-10-08 09:02:08 -04002549 printk(KERN_ERR "kvm: misc device register failed\n");
Avi Kivity6aa8b732006-12-10 02:21:36 -08002550 goto out_free;
2551 }
2552
Avi Kivity15ad7142007-07-11 18:17:21 +03002553 kvm_preempt_ops.sched_in = kvm_sched_in;
2554 kvm_preempt_ops.sched_out = kvm_sched_out;
2555
Avi Kivityc7addb92007-09-16 18:58:32 +02002556 kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
2557
2558 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002559
2560out_free:
Rusty Russellc16f8622007-07-30 21:12:19 +10002561 kmem_cache_destroy(kvm_vcpu_cache);
2562out_free_4:
Avi Kivity59ae6c62007-02-12 00:54:48 -08002563 sysdev_unregister(&kvm_sysdev);
2564out_free_3:
2565 sysdev_class_unregister(&kvm_sysdev_class);
2566out_free_2:
Avi Kivity6aa8b732006-12-10 02:21:36 -08002567 unregister_reboot_notifier(&kvm_reboot_notifier);
Avi Kivity774c47f2007-02-12 00:54:47 -08002568 unregister_cpu_notifier(&kvm_cpu_notifier);
2569out_free_1:
Avi Kivity1b6c0162007-05-24 13:03:52 +03002570 on_each_cpu(hardware_disable, NULL, 0, 1);
Yang, Sheng002c7f72007-07-31 14:23:01 +03002571out_free_0:
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03002572 kvm_x86_ops->hardware_unsetup();
Avi Kivityca45aaa2007-03-01 19:21:03 +02002573out:
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03002574 kvm_x86_ops = NULL;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002575 return r;
2576}
Mike Dayd77c26f2007-10-08 09:02:08 -04002577EXPORT_SYMBOL_GPL(kvm_init_x86);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002578
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03002579void kvm_exit_x86(void)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002580{
2581 misc_deregister(&kvm_dev);
Rusty Russellc16f8622007-07-30 21:12:19 +10002582 kmem_cache_destroy(kvm_vcpu_cache);
Avi Kivity59ae6c62007-02-12 00:54:48 -08002583 sysdev_unregister(&kvm_sysdev);
2584 sysdev_class_unregister(&kvm_sysdev_class);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002585 unregister_reboot_notifier(&kvm_reboot_notifier);
Avi Kivity59ae6c62007-02-12 00:54:48 -08002586 unregister_cpu_notifier(&kvm_cpu_notifier);
Avi Kivity1b6c0162007-05-24 13:03:52 +03002587 on_each_cpu(hardware_disable, NULL, 0, 1);
Christian Ehrhardtcbdd1be2007-09-09 15:41:59 +03002588 kvm_x86_ops->hardware_unsetup();
2589 kvm_x86_ops = NULL;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002590}
Mike Dayd77c26f2007-10-08 09:02:08 -04002591EXPORT_SYMBOL_GPL(kvm_exit_x86);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002592
2593static __init int kvm_init(void)
2594{
Avi Kivity37e29d92007-02-20 14:07:37 +02002595 int r;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002596
Avi Kivityb5a33a72007-04-15 16:31:09 +03002597 r = kvm_mmu_module_init();
2598 if (r)
2599 goto out4;
2600
Avi Kivity6aa8b732006-12-10 02:21:36 -08002601 kvm_init_debug();
2602
Carsten Otte043405e2007-10-10 17:16:19 +02002603 kvm_arch_init();
Michael Riepebf591b22006-12-22 01:05:36 -08002604
Izik Eiduscea7bb22007-10-17 19:17:48 +02002605 bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
Mike Dayd77c26f2007-10-08 09:02:08 -04002606
2607 if (bad_page == NULL) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08002608 r = -ENOMEM;
2609 goto out;
2610 }
2611
Avi Kivity58e690e2007-02-26 16:29:43 +02002612 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002613
2614out:
2615 kvm_exit_debug();
Avi Kivityb5a33a72007-04-15 16:31:09 +03002616 kvm_mmu_module_exit();
2617out4:
Avi Kivity6aa8b732006-12-10 02:21:36 -08002618 return r;
2619}
2620
2621static __exit void kvm_exit(void)
2622{
2623 kvm_exit_debug();
Izik Eiduscea7bb22007-10-17 19:17:48 +02002624 __free_page(bad_page);
Avi Kivityb5a33a72007-04-15 16:31:09 +03002625 kvm_mmu_module_exit();
Avi Kivity6aa8b732006-12-10 02:21:36 -08002626}
2627
2628module_init(kvm_init)
2629module_exit(kvm_exit)