KVM: x86 emulator: prefetch up to 15 bytes of the instruction executed

Instead of fetching one byte at a time, prefetch 15 bytes (or until the next
page boundary) to avoid guest page table walks.

Signed-off-by: Avi Kivity <avi@qumranet.com>
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 8e2162f..6e7f774 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -414,8 +414,7 @@
 /* Fetch next part of the instruction being emulated. */
 #define insn_fetch(_type, _size, _eip)                                  \
 ({	unsigned long _x;						\
-	rc = ops->read_std((unsigned long)(_eip) + ctxt->cs_base, &_x,	\
-			   (_size), ctxt->vcpu);			\
+	rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size));		\
 	if (rc != 0)							\
 		goto done;						\
 	(_eip) += (_size);						\
@@ -446,6 +445,41 @@
 		register_address_increment(c->eip, rel);		\
 	} while (0)
 
+static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
+			      struct x86_emulate_ops *ops,
+			      unsigned long linear, u8 *dest)
+{
+	struct fetch_cache *fc = &ctxt->decode.fetch;
+	int rc;
+	int size;
+
+	if (linear < fc->start || linear >= fc->end) {
+		size = min(15UL, PAGE_SIZE - offset_in_page(linear));
+		rc = ops->read_std(linear, fc->data, size, ctxt->vcpu);
+		if (rc)
+			return rc;
+		fc->start = linear;
+		fc->end = linear + size;
+	}
+	*dest = fc->data[linear - fc->start];
+	return 0;
+}
+
+static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
+			 struct x86_emulate_ops *ops,
+			 unsigned long eip, void *dest, unsigned size)
+{
+	int rc = 0;
+
+	eip += ctxt->cs_base;
+	while (size--) {
+		rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
 /*
  * Given the 'reg' portion of a ModRM byte, and a register block, return a
  * pointer into the block that addresses the relevant register.