| Marcelo Tosatti | 0cf1bfd | 2008-02-22 12:21:36 -0500 | [diff] [blame] | 1 | /* | 
|  | 2 | * KVM paravirt_ops implementation | 
|  | 3 | * | 
|  | 4 | * This program is free software; you can redistribute it and/or modify | 
|  | 5 | * it under the terms of the GNU General Public License as published by | 
|  | 6 | * the Free Software Foundation; either version 2 of the License, or | 
|  | 7 | * (at your option) any later version. | 
|  | 8 | * | 
|  | 9 | * This program is distributed in the hope that it will be useful, | 
|  | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|  | 12 | * GNU General Public License for more details. | 
|  | 13 | * | 
|  | 14 | * You should have received a copy of the GNU General Public License | 
|  | 15 | * along with this program; if not, write to the Free Software | 
|  | 16 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. | 
|  | 17 | * | 
|  | 18 | * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | 
|  | 19 | * Copyright IBM Corporation, 2007 | 
|  | 20 | *   Authors: Anthony Liguori <aliguori@us.ibm.com> | 
|  | 21 | */ | 
|  | 22 |  | 
|  | 23 | #include <linux/module.h> | 
|  | 24 | #include <linux/kernel.h> | 
|  | 25 | #include <linux/kvm_para.h> | 
|  | 26 | #include <linux/cpu.h> | 
|  | 27 | #include <linux/mm.h> | 
| Marcelo Tosatti | 1da8a77 | 2008-02-22 12:21:37 -0500 | [diff] [blame] | 28 | #include <linux/highmem.h> | 
| Marcelo Tosatti | 096d14a | 2008-02-22 12:21:38 -0500 | [diff] [blame] | 29 | #include <linux/hardirq.h> | 
|  | 30 |  | 
|  | 31 | #define MMU_QUEUE_SIZE 1024 | 
|  | 32 |  | 
|  | 33 | struct kvm_para_state { | 
|  | 34 | u8 mmu_queue[MMU_QUEUE_SIZE]; | 
|  | 35 | int mmu_queue_len; | 
|  | 36 | enum paravirt_lazy_mode mode; | 
|  | 37 | }; | 
|  | 38 |  | 
|  | 39 | static DEFINE_PER_CPU(struct kvm_para_state, para_state); | 
|  | 40 |  | 
|  | 41 | static struct kvm_para_state *kvm_para_state(void) | 
|  | 42 | { | 
|  | 43 | return &per_cpu(para_state, raw_smp_processor_id()); | 
|  | 44 | } | 
| Marcelo Tosatti | 0cf1bfd | 2008-02-22 12:21:36 -0500 | [diff] [blame] | 45 |  | 
|  | 46 | /* | 
|  | 47 | * No need for any "IO delay" on KVM | 
|  | 48 | */ | 
|  | 49 | static void kvm_io_delay(void) | 
|  | 50 | { | 
|  | 51 | } | 
|  | 52 |  | 
| Marcelo Tosatti | 1da8a77 | 2008-02-22 12:21:37 -0500 | [diff] [blame] | 53 | static void kvm_mmu_op(void *buffer, unsigned len) | 
|  | 54 | { | 
|  | 55 | int r; | 
|  | 56 | unsigned long a1, a2; | 
|  | 57 |  | 
|  | 58 | do { | 
|  | 59 | a1 = __pa(buffer); | 
|  | 60 | a2 = 0;   /* on i386 __pa() always returns <4G */ | 
|  | 61 | r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2); | 
|  | 62 | buffer += r; | 
|  | 63 | len -= r; | 
|  | 64 | } while (len); | 
|  | 65 | } | 
|  | 66 |  | 
| Marcelo Tosatti | 096d14a | 2008-02-22 12:21:38 -0500 | [diff] [blame] | 67 | static void mmu_queue_flush(struct kvm_para_state *state) | 
|  | 68 | { | 
|  | 69 | if (state->mmu_queue_len) { | 
|  | 70 | kvm_mmu_op(state->mmu_queue, state->mmu_queue_len); | 
|  | 71 | state->mmu_queue_len = 0; | 
|  | 72 | } | 
|  | 73 | } | 
|  | 74 |  | 
|  | 75 | static void kvm_deferred_mmu_op(void *buffer, int len) | 
|  | 76 | { | 
|  | 77 | struct kvm_para_state *state = kvm_para_state(); | 
|  | 78 |  | 
|  | 79 | if (state->mode != PARAVIRT_LAZY_MMU) { | 
|  | 80 | kvm_mmu_op(buffer, len); | 
|  | 81 | return; | 
|  | 82 | } | 
|  | 83 | if (state->mmu_queue_len + len > sizeof state->mmu_queue) | 
|  | 84 | mmu_queue_flush(state); | 
|  | 85 | memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len); | 
|  | 86 | state->mmu_queue_len += len; | 
|  | 87 | } | 
|  | 88 |  | 
| Marcelo Tosatti | 1da8a77 | 2008-02-22 12:21:37 -0500 | [diff] [blame] | 89 | static void kvm_mmu_write(void *dest, u64 val) | 
|  | 90 | { | 
|  | 91 | __u64 pte_phys; | 
|  | 92 | struct kvm_mmu_op_write_pte wpte; | 
|  | 93 |  | 
|  | 94 | #ifdef CONFIG_HIGHPTE | 
|  | 95 | struct page *page; | 
|  | 96 | unsigned long dst = (unsigned long) dest; | 
|  | 97 |  | 
|  | 98 | page = kmap_atomic_to_page(dest); | 
|  | 99 | pte_phys = page_to_pfn(page); | 
|  | 100 | pte_phys <<= PAGE_SHIFT; | 
|  | 101 | pte_phys += (dst & ~(PAGE_MASK)); | 
|  | 102 | #else | 
|  | 103 | pte_phys = (unsigned long)__pa(dest); | 
|  | 104 | #endif | 
|  | 105 | wpte.header.op = KVM_MMU_OP_WRITE_PTE; | 
|  | 106 | wpte.pte_val = val; | 
|  | 107 | wpte.pte_phys = pte_phys; | 
|  | 108 |  | 
| Marcelo Tosatti | 096d14a | 2008-02-22 12:21:38 -0500 | [diff] [blame] | 109 | kvm_deferred_mmu_op(&wpte, sizeof wpte); | 
| Marcelo Tosatti | 1da8a77 | 2008-02-22 12:21:37 -0500 | [diff] [blame] | 110 | } | 
|  | 111 |  | 
|  | 112 | /* | 
|  | 113 | * We only need to hook operations that are MMU writes.  We hook these so that | 
|  | 114 | * we can use lazy MMU mode to batch these operations.  We could probably | 
|  | 115 | * improve the performance of the host code if we used some of the information | 
|  | 116 | * here to simplify processing of batched writes. | 
|  | 117 | */ | 
|  | 118 | static void kvm_set_pte(pte_t *ptep, pte_t pte) | 
|  | 119 | { | 
|  | 120 | kvm_mmu_write(ptep, pte_val(pte)); | 
|  | 121 | } | 
|  | 122 |  | 
|  | 123 | static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr, | 
|  | 124 | pte_t *ptep, pte_t pte) | 
|  | 125 | { | 
|  | 126 | kvm_mmu_write(ptep, pte_val(pte)); | 
|  | 127 | } | 
|  | 128 |  | 
|  | 129 | static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd) | 
|  | 130 | { | 
|  | 131 | kvm_mmu_write(pmdp, pmd_val(pmd)); | 
|  | 132 | } | 
|  | 133 |  | 
|  | 134 | #if PAGETABLE_LEVELS >= 3 | 
|  | 135 | #ifdef CONFIG_X86_PAE | 
|  | 136 | static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte) | 
|  | 137 | { | 
|  | 138 | kvm_mmu_write(ptep, pte_val(pte)); | 
|  | 139 | } | 
|  | 140 |  | 
|  | 141 | static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr, | 
|  | 142 | pte_t *ptep, pte_t pte) | 
|  | 143 | { | 
|  | 144 | kvm_mmu_write(ptep, pte_val(pte)); | 
|  | 145 | } | 
|  | 146 |  | 
|  | 147 | static void kvm_pte_clear(struct mm_struct *mm, | 
|  | 148 | unsigned long addr, pte_t *ptep) | 
|  | 149 | { | 
|  | 150 | kvm_mmu_write(ptep, 0); | 
|  | 151 | } | 
|  | 152 |  | 
|  | 153 | static void kvm_pmd_clear(pmd_t *pmdp) | 
|  | 154 | { | 
|  | 155 | kvm_mmu_write(pmdp, 0); | 
|  | 156 | } | 
|  | 157 | #endif | 
|  | 158 |  | 
|  | 159 | static void kvm_set_pud(pud_t *pudp, pud_t pud) | 
|  | 160 | { | 
|  | 161 | kvm_mmu_write(pudp, pud_val(pud)); | 
|  | 162 | } | 
|  | 163 |  | 
|  | 164 | #if PAGETABLE_LEVELS == 4 | 
|  | 165 | static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd) | 
|  | 166 | { | 
|  | 167 | kvm_mmu_write(pgdp, pgd_val(pgd)); | 
|  | 168 | } | 
|  | 169 | #endif | 
|  | 170 | #endif /* PAGETABLE_LEVELS >= 3 */ | 
|  | 171 |  | 
|  | 172 | static void kvm_flush_tlb(void) | 
|  | 173 | { | 
|  | 174 | struct kvm_mmu_op_flush_tlb ftlb = { | 
|  | 175 | .header.op = KVM_MMU_OP_FLUSH_TLB, | 
|  | 176 | }; | 
|  | 177 |  | 
| Marcelo Tosatti | 096d14a | 2008-02-22 12:21:38 -0500 | [diff] [blame] | 178 | kvm_deferred_mmu_op(&ftlb, sizeof ftlb); | 
| Marcelo Tosatti | 1da8a77 | 2008-02-22 12:21:37 -0500 | [diff] [blame] | 179 | } | 
|  | 180 |  | 
| Eduardo Habkost | f863993 | 2008-07-30 18:32:27 -0300 | [diff] [blame] | 181 | static void kvm_release_pt(unsigned long pfn) | 
| Marcelo Tosatti | 1da8a77 | 2008-02-22 12:21:37 -0500 | [diff] [blame] | 182 | { | 
|  | 183 | struct kvm_mmu_op_release_pt rpt = { | 
|  | 184 | .header.op = KVM_MMU_OP_RELEASE_PT, | 
|  | 185 | .pt_phys = (u64)pfn << PAGE_SHIFT, | 
|  | 186 | }; | 
|  | 187 |  | 
|  | 188 | kvm_mmu_op(&rpt, sizeof rpt); | 
|  | 189 | } | 
|  | 190 |  | 
| Marcelo Tosatti | 096d14a | 2008-02-22 12:21:38 -0500 | [diff] [blame] | 191 | static void kvm_enter_lazy_mmu(void) | 
|  | 192 | { | 
|  | 193 | struct kvm_para_state *state = kvm_para_state(); | 
|  | 194 |  | 
|  | 195 | paravirt_enter_lazy_mmu(); | 
|  | 196 | state->mode = paravirt_get_lazy_mode(); | 
|  | 197 | } | 
|  | 198 |  | 
|  | 199 | static void kvm_leave_lazy_mmu(void) | 
|  | 200 | { | 
|  | 201 | struct kvm_para_state *state = kvm_para_state(); | 
|  | 202 |  | 
|  | 203 | mmu_queue_flush(state); | 
|  | 204 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | 
|  | 205 | state->mode = paravirt_get_lazy_mode(); | 
|  | 206 | } | 
|  | 207 |  | 
| Marcelo Tosatti | 0cf1bfd | 2008-02-22 12:21:36 -0500 | [diff] [blame] | 208 | static void paravirt_ops_setup(void) | 
|  | 209 | { | 
|  | 210 | pv_info.name = "KVM"; | 
|  | 211 | pv_info.paravirt_enabled = 1; | 
|  | 212 |  | 
|  | 213 | if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) | 
|  | 214 | pv_cpu_ops.io_delay = kvm_io_delay; | 
|  | 215 |  | 
| Marcelo Tosatti | 1da8a77 | 2008-02-22 12:21:37 -0500 | [diff] [blame] | 216 | if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) { | 
|  | 217 | pv_mmu_ops.set_pte = kvm_set_pte; | 
|  | 218 | pv_mmu_ops.set_pte_at = kvm_set_pte_at; | 
|  | 219 | pv_mmu_ops.set_pmd = kvm_set_pmd; | 
|  | 220 | #if PAGETABLE_LEVELS >= 3 | 
|  | 221 | #ifdef CONFIG_X86_PAE | 
|  | 222 | pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic; | 
|  | 223 | pv_mmu_ops.set_pte_present = kvm_set_pte_present; | 
|  | 224 | pv_mmu_ops.pte_clear = kvm_pte_clear; | 
|  | 225 | pv_mmu_ops.pmd_clear = kvm_pmd_clear; | 
|  | 226 | #endif | 
|  | 227 | pv_mmu_ops.set_pud = kvm_set_pud; | 
|  | 228 | #if PAGETABLE_LEVELS == 4 | 
|  | 229 | pv_mmu_ops.set_pgd = kvm_set_pgd; | 
|  | 230 | #endif | 
|  | 231 | #endif | 
|  | 232 | pv_mmu_ops.flush_tlb_user = kvm_flush_tlb; | 
|  | 233 | pv_mmu_ops.release_pte = kvm_release_pt; | 
|  | 234 | pv_mmu_ops.release_pmd = kvm_release_pt; | 
|  | 235 | pv_mmu_ops.release_pud = kvm_release_pt; | 
| Marcelo Tosatti | 096d14a | 2008-02-22 12:21:38 -0500 | [diff] [blame] | 236 |  | 
|  | 237 | pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; | 
|  | 238 | pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; | 
| Marcelo Tosatti | 1da8a77 | 2008-02-22 12:21:37 -0500 | [diff] [blame] | 239 | } | 
| Marcelo Tosatti | 0cf1bfd | 2008-02-22 12:21:36 -0500 | [diff] [blame] | 240 | } | 
|  | 241 |  | 
|  | 242 | void __init kvm_guest_init(void) | 
|  | 243 | { | 
|  | 244 | if (!kvm_para_available()) | 
|  | 245 | return; | 
|  | 246 |  | 
|  | 247 | paravirt_ops_setup(); | 
|  | 248 | } |