x86: load new GDT after setting up boot cpu per-cpu area

Impact: sync 32 and 64-bit code

Merge load_gs_base() into switch_to_new_gdt().  Load the GDT and
per-cpu state for the boot cpu when its new area is set up.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 67e30c8..0c766b8 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -258,12 +258,17 @@
 void switch_to_new_gdt(void)
 {
 	struct desc_ptr gdt_descr;
+	int cpu = smp_processor_id();
 
-	gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
+	gdt_descr.address = (long)get_cpu_gdt_table(cpu);
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
+	/* Reload the per-cpu base */
 #ifdef CONFIG_X86_32
-	asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
+	loadsegment(fs, __KERNEL_PERCPU);
+#else
+	loadsegment(gs, 0);
+	wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
 #endif
 }
 
@@ -968,10 +973,6 @@
 	struct task_struct *me;
 	int i;
 
-	loadsegment(fs, 0);
-	loadsegment(gs, 0);
-	load_gs_base(cpu);
-
 #ifdef CONFIG_NUMA
 	if (cpu != 0 && percpu_read(node_number) == 0 &&
 	    cpu_to_node(cpu) != NUMA_NO_NODE)
@@ -993,6 +994,8 @@
 	 */
 
 	switch_to_new_gdt();
+	loadsegment(fs, 0);
+
 	load_idt((const struct desc_ptr *)&idt_descr);
 
 	memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index bcca3a7..4caa78d 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -113,13 +113,13 @@
 		per_cpu(x86_cpu_to_node_map, cpu) =
 				early_per_cpu_map(x86_cpu_to_node_map, cpu);
 #endif
+#endif
 		/*
 		 * Up to this point, the boot CPU has been using .data.init
-		 * area.  Reload %gs offset for the boot CPU.
+		 * area.  Reload any changed state for the boot CPU.
 		 */
 		if (cpu == boot_cpu_id)
-			load_gs_base(cpu);
-#endif
+			switch_to_new_gdt();
 
 		DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
 	}