x86, apicv: add virtual interrupt delivery support

Virtual interrupt delivery avoids KVM to inject vAPIC interrupts
manually, which is fully taken care of by the hardware. This needs
some special awareness into existing interrupr injection path:

- for pending interrupt, instead of direct injection, we may need
  update architecture specific indicators before resuming to guest.

- A pending interrupt, which is masked by ISR, should be also
  considered in above update action, since hardware will decide
  when to inject it at right time. Current has_interrupt and
  get_interrupt only returns a valid vector from injection p.o.v.

Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index f69fc50..02b51dd 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -145,21 +145,51 @@
 	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }
 
-static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
+void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
+				struct kvm_lapic_irq *irq,
+				u64 *eoi_exit_bitmap)
 {
-	u16 cid;
-	ldr >>= 32 - map->ldr_bits;
-	cid = (ldr >> map->cid_shift) & map->cid_mask;
+	struct kvm_lapic **dst;
+	struct kvm_apic_map *map;
+	unsigned long bitmap = 1;
+	int i;
 
-	BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
+	rcu_read_lock();
+	map = rcu_dereference(vcpu->kvm->arch.apic_map);
 
-	return cid;
-}
+	if (unlikely(!map)) {
+		__set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap);
+		goto out;
+	}
 
-static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
-{
-	ldr >>= (32 - map->ldr_bits);
-	return ldr & map->lid_mask;
+	if (irq->dest_mode == 0) { /* physical mode */
+		if (irq->delivery_mode == APIC_DM_LOWEST ||
+				irq->dest_id == 0xff) {
+			__set_bit(irq->vector,
+				  (unsigned long *)eoi_exit_bitmap);
+			goto out;
+		}
+		dst = &map->phys_map[irq->dest_id & 0xff];
+	} else {
+		u32 mda = irq->dest_id << (32 - map->ldr_bits);
+
+		dst = map->logical_map[apic_cluster_id(map, mda)];
+
+		bitmap = apic_logical_id(map, mda);
+	}
+
+	for_each_set_bit(i, &bitmap, 16) {
+		if (!dst[i])
+			continue;
+		if (dst[i]->vcpu == vcpu) {
+			__set_bit(irq->vector,
+				  (unsigned long *)eoi_exit_bitmap);
+			break;
+		}
+	}
+
+out:
+	rcu_read_unlock();
 }
 
 static void recalculate_apic_map(struct kvm *kvm)
@@ -225,6 +255,8 @@
 
 	if (old)
 		kfree_rcu(old, rcu);
+
+	kvm_ioapic_make_eoibitmap_request(kvm);
 }
 
 static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
@@ -340,6 +372,10 @@
 {
 	int result;
 
+	/*
+	 * Note that irr_pending is just a hint. It will be always
+	 * true with virtual interrupt delivery enabled.
+	 */
 	if (!apic->irr_pending)
 		return -1;
 
@@ -456,6 +492,8 @@
 static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 {
 	int result;
+
+	/* Note that isr_count is always 1 with vid enabled */
 	if (!apic->isr_count)
 		return -1;
 	if (likely(apic->highest_isr_cache != -1))
@@ -735,6 +773,19 @@
 	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
 }
 
+static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
+{
+	if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
+	    kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
+		int trigger_mode;
+		if (apic_test_vector(vector, apic->regs + APIC_TMR))
+			trigger_mode = IOAPIC_LEVEL_TRIG;
+		else
+			trigger_mode = IOAPIC_EDGE_TRIG;
+		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
+	}
+}
+
 static int apic_set_eoi(struct kvm_lapic *apic)
 {
 	int vector = apic_find_highest_isr(apic);
@@ -751,19 +802,26 @@
 	apic_clear_isr(vector, apic);
 	apic_update_ppr(apic);
 
-	if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
-	    kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
-		int trigger_mode;
-		if (apic_test_vector(vector, apic->regs + APIC_TMR))
-			trigger_mode = IOAPIC_LEVEL_TRIG;
-		else
-			trigger_mode = IOAPIC_EDGE_TRIG;
-		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
-	}
+	kvm_ioapic_send_eoi(apic, vector);
 	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
 	return vector;
 }
 
+/*
+ * this interface assumes a trap-like exit, which has already finished
+ * desired side effect including vISR and vPPR update.
+ */
+void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	trace_kvm_eoi(apic, vector);
+
+	kvm_ioapic_send_eoi(apic, vector);
+	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
+
 static void apic_send_ipi(struct kvm_lapic *apic)
 {
 	u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR);
@@ -1375,8 +1433,8 @@
 		apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
 		apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
 	}
-	apic->irr_pending = false;
-	apic->isr_count = 0;
+	apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm);
+	apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm);
 	apic->highest_isr_cache = -1;
 	update_divide_count(apic);
 	atomic_set(&apic->lapic_timer.pending, 0);
@@ -1591,8 +1649,10 @@
 	update_divide_count(apic);
 	start_apic_timer(apic);
 	apic->irr_pending = true;
-	apic->isr_count = count_vectors(apic->regs + APIC_ISR);
+	apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ?
+				1 : count_vectors(apic->regs + APIC_ISR);
 	apic->highest_isr_cache = -1;
+	kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 }