KVM: Do not communicate to userspace through cpu registers during PIO
Currently when passing the a PIO emulation request to userspace, we
rely on userspace updating %rax (on 'in' instructions) and %rsi/%rdi/%rcx
(on string instructions). This (a) requires two extra ioctls for getting
and setting the registers and (b) is unfriendly to non-x86 archs, when
they get kvm ports.
So fix by doing the register fixups in the kernel and passing to userspace
only an abstract description of the PIO to be done.
Signed-off-by: Avi Kivity <avi@qumranet.com>
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 901b8d9..59cbc5b 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -274,6 +274,7 @@
int mmio_size;
unsigned char mmio_data[8];
gpa_t mmio_phys_addr;
+ int pio_pending;
struct {
int active;
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 42be8a8..ff8bcfe 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -1504,6 +1504,44 @@
}
EXPORT_SYMBOL_GPL(save_msrs);
+static void complete_pio(struct kvm_vcpu *vcpu)
+{
+ struct kvm_io *io = &vcpu->run->io;
+ long delta;
+
+ kvm_arch_ops->cache_regs(vcpu);
+
+ if (!io->string) {
+ if (io->direction == KVM_EXIT_IO_IN)
+ memcpy(&vcpu->regs[VCPU_REGS_RAX], &io->value,
+ io->size);
+ } else {
+ delta = 1;
+ if (io->rep) {
+ delta *= io->count;
+ /*
+ * The size of the register should really depend on
+ * current address size.
+ */
+ vcpu->regs[VCPU_REGS_RCX] -= delta;
+ }
+ if (io->string_down)
+ delta = -delta;
+ delta *= io->size;
+ if (io->direction == KVM_EXIT_IO_IN)
+ vcpu->regs[VCPU_REGS_RDI] += delta;
+ else
+ vcpu->regs[VCPU_REGS_RSI] += delta;
+ }
+
+ vcpu->pio_pending = 0;
+ vcpu->run->io_completed = 0;
+
+ kvm_arch_ops->decache_regs(vcpu);
+
+ kvm_arch_ops->skip_emulated_instruction(vcpu);
+}
+
static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
@@ -1518,9 +1556,13 @@
kvm_run->emulated = 0;
}
- if (kvm_run->mmio_completed) {
- memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
- vcpu->mmio_read_completed = 1;
+ if (kvm_run->io_completed) {
+ if (vcpu->pio_pending)
+ complete_pio(vcpu);
+ else {
+ memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
+ vcpu->mmio_read_completed = 1;
+ }
}
vcpu->mmio_needed = 0;
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 6787f11..c35b8c8 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -1037,6 +1037,7 @@
kvm_run->io.size = ((io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT);
kvm_run->io.string = (io_info & SVM_IOIO_STR_MASK) != 0;
kvm_run->io.rep = (io_info & SVM_IOIO_REP_MASK) != 0;
+ kvm_run->io.count = 1;
if (kvm_run->io.string) {
unsigned addr_mask;
@@ -1056,6 +1057,7 @@
}
} else
kvm_run->io.value = vcpu->svm->vmcb->save.rax;
+ vcpu->pio_pending = 1;
return 0;
}
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index a721b60..4d5f40f 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1459,12 +1459,14 @@
= (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0;
kvm_run->io.rep = (exit_qualification & 32) != 0;
kvm_run->io.port = exit_qualification >> 16;
+ kvm_run->io.count = 1;
if (kvm_run->io.string) {
if (!get_io_count(vcpu, &kvm_run->io.count))
return 1;
kvm_run->io.address = vmcs_readl(GUEST_LINEAR_ADDRESS);
} else
kvm_run->io.value = vcpu->regs[VCPU_REGS_RAX]; /* rax */
+ vcpu->pio_pending = 1;
return 0;
}