[PATCH] i386: Convert i386 PDA code to use %fs
Convert the PDA code to use %fs rather than %gs as the segment for
per-processor data. This is because some processors show a small but
measurable performance gain for reloading a NULL segment selector (as %fs
generally is in user-space) versus a non-NULL one (as %gs generally is).
On modern processors the difference is very small, perhaps undetectable.
Some old AMD "K6 3D+" processors are noticably slower when %fs is used
rather than %gs; I have no idea why this might be, but I think they're
sufficiently rare that it doesn't matter much.
This patch also fixes the math emulator, which had not been adjusted to
match the changed struct pt_regs.
[frederik.deweerdt@gmail.com: fixit with gdb]
[mingo@elte.hu: Fix KVM too]
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Ian Campbell <Ian.Campbell@XenSource.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Zachary Amsden <zach@vmware.com>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Frederik Deweerdt <frederik.deweerdt@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c
index be2f96e..d1b8f2b 100644
--- a/arch/i386/kernel/vm86.c
+++ b/arch/i386/kernel/vm86.c
@@ -96,12 +96,12 @@
{
int ret = 0;
- /* kernel_vm86_regs is missing xfs, so copy everything up to
- (but not including) xgs, and then rest after xgs. */
- ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.xgs));
- ret += copy_to_user(&user->__null_gs, ®s->pt.xgs,
+ /* kernel_vm86_regs is missing xgs, so copy everything up to
+ (but not including) orig_eax, and then rest including orig_eax. */
+ ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_eax));
+ ret += copy_to_user(&user->orig_eax, ®s->pt.orig_eax,
sizeof(struct kernel_vm86_regs) -
- offsetof(struct kernel_vm86_regs, pt.xgs));
+ offsetof(struct kernel_vm86_regs, pt.orig_eax));
return ret;
}
@@ -113,12 +113,13 @@
{
int ret = 0;
- ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.xgs));
- ret += copy_from_user(®s->pt.xgs, &user->__null_gs,
+ /* copy eax-xfs inclusive */
+ ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_eax));
+ /* copy orig_eax-__gsh+extra */
+ ret += copy_from_user(®s->pt.orig_eax, &user->orig_eax,
sizeof(struct kernel_vm86_regs) -
- offsetof(struct kernel_vm86_regs, pt.xgs) +
+ offsetof(struct kernel_vm86_regs, pt.orig_eax) +
extra);
-
return ret;
}
@@ -157,8 +158,8 @@
ret = KVM86->regs32;
- loadsegment(fs, current->thread.saved_fs);
- ret->xgs = current->thread.saved_gs;
+ ret->xfs = current->thread.saved_fs;
+ loadsegment(gs, current->thread.saved_gs);
return ret;
}
@@ -285,9 +286,9 @@
*/
info->regs.pt.xds = 0;
info->regs.pt.xes = 0;
- info->regs.pt.xgs = 0;
+ info->regs.pt.xfs = 0;
-/* we are clearing fs later just before "jmp resume_userspace",
+/* we are clearing gs later just before "jmp resume_userspace",
* because it is not saved/restored.
*/
@@ -321,8 +322,8 @@
*/
info->regs32->eax = 0;
tsk->thread.saved_esp0 = tsk->thread.esp0;
- savesegment(fs, tsk->thread.saved_fs);
- tsk->thread.saved_gs = info->regs32->xgs;
+ tsk->thread.saved_fs = info->regs32->xfs;
+ savesegment(gs, tsk->thread.saved_gs);
tss = &per_cpu(init_tss, get_cpu());
tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
@@ -342,7 +343,7 @@
__asm__ __volatile__(
"movl %0,%%esp\n\t"
"movl %1,%%ebp\n\t"
- "mov %2, %%fs\n\t"
+ "mov %2, %%gs\n\t"
"jmp resume_userspace"
: /* no outputs */
:"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0));