KVM: x86: Rework user space NMI injection as KVM_CAP_USER_NMI
There is no point in doing the ready_for_nmi_injection/
request_nmi_window dance with user space. First, we don't do this for
in-kernel irqchip anyway, while the code path is the same as for user
space irqchip mode. And second, there is nothing to loose if a pending
NMI is overwritten by another one (in contrast to IRQs where we have to
save the number). Actually, there is even the risk of raising spurious
NMIs this way because the reason for the held-back NMI might already be
handled while processing the first one.
Therefore this patch creates a simplified user space NMI injection
interface, exporting it under KVM_CAP_USER_NMI and dropping the old
KVM_CAP_NMI capability. And this time we also take care to provide the
interface only on archs supporting NMIs via KVM (right now only x86).
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 487e1dc..6259d74 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2498,15 +2498,13 @@
}
if (vcpu->arch.nmi_injected) {
vmx_inject_nmi(vcpu);
- if (vcpu->arch.nmi_pending || kvm_run->request_nmi_window)
+ if (vcpu->arch.nmi_pending)
enable_nmi_window(vcpu);
else if (vcpu->arch.irq_summary
|| kvm_run->request_interrupt_window)
enable_irq_window(vcpu);
return;
}
- if (!vcpu->arch.nmi_window_open || kvm_run->request_nmi_window)
- enable_nmi_window(vcpu);
if (vcpu->arch.interrupt_window_open) {
if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
@@ -3040,14 +3038,6 @@
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
++vcpu->stat.nmi_window_exits;
- /*
- * If the user space waits to inject a NMI, exit as soon as possible
- */
- if (kvm_run->request_nmi_window && !vcpu->arch.nmi_pending) {
- kvm_run->exit_reason = KVM_EXIT_NMI_WINDOW_OPEN;
- return 0;
- }
-
return 1;
}
@@ -3162,7 +3152,7 @@
vmx->soft_vnmi_blocked = 0;
vcpu->arch.nmi_window_open = 1;
} else if (vmx->vnmi_blocked_time > 1000000000LL &&
- (kvm_run->request_nmi_window || vcpu->arch.nmi_pending)) {
+ vcpu->arch.nmi_pending) {
/*
* This CPU don't support us in finding the end of an
* NMI-blocked window if the guest runs with IRQs
@@ -3175,16 +3165,6 @@
vmx->soft_vnmi_blocked = 0;
vmx->vcpu.arch.nmi_window_open = 1;
}
-
- /*
- * If the user space waits to inject an NNI, exit ASAP
- */
- if (vcpu->arch.nmi_window_open && kvm_run->request_nmi_window
- && !vcpu->arch.nmi_pending) {
- kvm_run->exit_reason = KVM_EXIT_NMI_WINDOW_OPEN;
- ++vcpu->stat.nmi_window_exits;
- return 0;
- }
}
if (exit_reason < kvm_vmx_max_exit_handlers
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 10302d3..0e6aa81 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2887,37 +2887,18 @@
(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
}
-/*
- * Check if userspace requested a NMI window, and that the NMI window
- * is open.
- *
- * No need to exit to userspace if we already have a NMI queued.
- */
-static int dm_request_for_nmi_injection(struct kvm_vcpu *vcpu,
- struct kvm_run *kvm_run)
-{
- return (!vcpu->arch.nmi_pending &&
- kvm_run->request_nmi_window &&
- vcpu->arch.nmi_window_open);
-}
-
static void post_kvm_run_save(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
- if (irqchip_in_kernel(vcpu->kvm)) {
+ if (irqchip_in_kernel(vcpu->kvm))
kvm_run->ready_for_interrupt_injection = 1;
- kvm_run->ready_for_nmi_injection = 1;
- } else {
+ else
kvm_run->ready_for_interrupt_injection =
(vcpu->arch.interrupt_window_open &&
vcpu->arch.irq_summary == 0);
- kvm_run->ready_for_nmi_injection =
- (vcpu->arch.nmi_window_open &&
- vcpu->arch.nmi_pending == 0);
- }
}
static void vapic_enter(struct kvm_vcpu *vcpu)
@@ -3093,11 +3074,6 @@
}
if (r > 0) {
- if (dm_request_for_nmi_injection(vcpu, kvm_run)) {
- r = -EINTR;
- kvm_run->exit_reason = KVM_EXIT_NMI;
- ++vcpu->stat.request_nmi_exits;
- }
if (dm_request_for_irq_injection(vcpu, kvm_run)) {
r = -EINTR;
kvm_run->exit_reason = KVM_EXIT_INTR;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 4880776..35525ac 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -84,21 +84,18 @@
#define KVM_EXIT_S390_RESET 14
#define KVM_EXIT_DCR 15
#define KVM_EXIT_NMI 16
-#define KVM_EXIT_NMI_WINDOW_OPEN 17
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
/* in */
__u8 request_interrupt_window;
- __u8 request_nmi_window;
- __u8 padding1[6];
+ __u8 padding1[7];
/* out */
__u32 exit_reason;
__u8 ready_for_interrupt_injection;
__u8 if_flag;
- __u8 ready_for_nmi_injection;
- __u8 padding2;
+ __u8 padding2[2];
/* in (pre_kvm_run), out (post_kvm_run) */
__u64 cr8;
@@ -391,12 +388,14 @@
#define KVM_CAP_DEVICE_ASSIGNMENT 17
#endif
#define KVM_CAP_IOMMU 18
-#define KVM_CAP_NMI 19
#if defined(CONFIG_X86)
#define KVM_CAP_DEVICE_MSI 20
#endif
/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
+#if defined(CONFIG_X86)
+#define KVM_CAP_USER_NMI 22
+#endif
/*
* ioctls for VM fds