KVM: Lazify fpu activation and deactivation

Defer fpu deactivation as much as possible - if the guest fpu is loaded, keep
it loaded until the next heavyweight exit (where we are forced to unload it).
This reduces unnecessary exits.

We also defer fpu activation on clts; while clts signals the intent to use the
fpu, we can't be sure the guest will actually use it.

Signed-off-by: Avi Kivity <avi@redhat.com>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 748b15d..1de2ad7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1509,8 +1509,8 @@
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
-	kvm_x86_ops->vcpu_put(vcpu);
 	kvm_put_guest_fpu(vcpu);
+	kvm_x86_ops->vcpu_put(vcpu);
 }
 
 static int is_efer_nx(void)
@@ -4006,6 +4006,10 @@
 			r = 0;
 			goto out;
 		}
+		if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) {
+			vcpu->fpu_active = 0;
+			kvm_x86_ops->fpu_deactivate(vcpu);
+		}
 	}
 
 	preempt_disable();
@@ -5075,6 +5079,7 @@
 	kvm_fx_save(&vcpu->arch.guest_fx_image);
 	kvm_fx_restore(&vcpu->arch.host_fx_image);
 	++vcpu->stat.fpu_reload;
+	set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
 }
 EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);