[IA64] Put the space for cpu0 per-cpu area into .data section

Initial fix for making sure that we can access percpu variables
in all C code (commit: 10617bbe84628eb18ab5f723d3ba35005adde143)
inadvertantly allocated the memory in the "percpu" section of
the vmlinux ELF executable.  This confused kexec/dump.

Signed-off-by: Tony Luck <tony.luck@intel.com>
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 8bdea8e..66e491d 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -367,16 +367,17 @@
 	;;
 #else
 (isAP)	br.few 2f
-	mov r20=r19
-	sub r19=r19,r18
+	movl r20=__cpu0_per_cpu
 	;;
 	shr.u r18=r18,3
 1:
-	ld8 r21=[r20],8;;
-	st8[r19]=r21,8
+	ld8 r21=[r19],8;;
+	st8[r20]=r21,8
 	adds r18=-1,r18;;
 	cmp4.lt p7,p6=0,r18
 (p7)	br.cond.dptk.few 1b
+	mov r19=r20
+	;;
 2:
 #endif
 	tpa r19=r19