x86-64: Move kernelstack from PDA to per-cpu.

Also clean up PER_CPU_VAR usage in xen-asm_64.S

tj: * remove now unused stack_thread_info()
    * s/kernelstack/kernel_stack/
    * added FIXME comment in xen-asm_64.S

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b50e38d1..06b6290 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -889,6 +889,10 @@
 	per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
 #endif
 
+DEFINE_PER_CPU(unsigned long, kernel_stack) =
+	(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
+EXPORT_PER_CPU_SYMBOL(kernel_stack);
+
 void __cpuinit pda_init(int cpu)
 {
 	struct x8664_pda *pda = cpu_pda(cpu);
@@ -900,8 +904,6 @@
 	load_pda_offset(cpu);
 
 	pda->irqcount = -1;
-	pda->kernelstack = (unsigned long)stack_thread_info() -
-				 PDA_STACKOFFSET + THREAD_SIZE;
 
 	if (cpu != 0) {
 		if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)