arm64: Process management

The patch adds support for thread creation and context switching. The
context switching CPU specific code is introduced with the CPU support
patch (part of the arch/arm64/mm/proc.S file). AArch64 supports
ASID-tagged TLBs and the ASID can be either 8 or 16-bit wide (detectable
via the ID_AA64AFR0_EL1 register).

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Olof Johansson <olof@lixom.net>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
new file mode 100644
index 0000000..f22965e
--- /dev/null
+++ b/arch/arm64/kernel/process.c
@@ -0,0 +1,408 @@
+/*
+ * Based on arch/arm/kernel/process.c
+ *
+ * Original Copyright (C) 1995  Linus Torvalds
+ * Copyright (C) 1996-2000 Russell King - Converted to ARM.
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdarg.h>
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/user.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/interrupt.h>
+#include <linux/kallsyms.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/elfcore.h>
+#include <linux/pm.h>
+#include <linux/tick.h>
+#include <linux/utsname.h>
+#include <linux/uaccess.h>
+#include <linux/random.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/personality.h>
+#include <linux/notifier.h>
+
+#include <asm/compat.h>
+#include <asm/cacheflush.h>
+#include <asm/processor.h>
+#include <asm/stacktrace.h>
+#include <asm/fpsimd.h>
+
+static void setup_restart(void)
+{
+	/*
+	 * Tell the mm system that we are going to reboot -
+	 * we may need it to insert some 1:1 mappings so that
+	 * soft boot works.
+	 */
+	setup_mm_for_reboot();
+
+	/* Clean and invalidate caches */
+	flush_cache_all();
+
+	/* Turn D-cache off */
+	cpu_cache_off();
+
+	/* Push out any further dirty data, and ensure cache is empty */
+	flush_cache_all();
+}
+
+void soft_restart(unsigned long addr)
+{
+	setup_restart();
+	cpu_reset(addr);
+}
+
+/*
+ * Function pointers to optional machine specific functions
+ */
+void (*pm_power_off)(void);
+EXPORT_SYMBOL_GPL(pm_power_off);
+
+void (*pm_restart)(const char *cmd);
+EXPORT_SYMBOL_GPL(pm_restart);
+
+
+/*
+ * This is our default idle handler.
+ */
+static void default_idle(void)
+{
+	/*
+	 * This should do all the clock switching and wait for interrupt
+	 * tricks
+	 */
+	cpu_do_idle();
+	local_irq_enable();
+}
+
+void (*pm_idle)(void) = default_idle;
+EXPORT_SYMBOL_GPL(pm_idle);
+
+/*
+ * The idle thread, has rather strange semantics for calling pm_idle,
+ * but this is what x86 does and we need to do the same, so that
+ * things like cpuidle get called in the same way.  The only difference
+ * is that we always respect 'hlt_counter' to prevent low power idle.
+ */
+void cpu_idle(void)
+{
+	local_fiq_enable();
+
+	/* endless idle loop with no priority at all */
+	while (1) {
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
+		while (!need_resched()) {
+			/*
+			 * We need to disable interrupts here to ensure
+			 * we don't miss a wakeup call.
+			 */
+			local_irq_disable();
+			if (!need_resched()) {
+				stop_critical_timings();
+				pm_idle();
+				start_critical_timings();
+				/*
+				 * pm_idle functions should always return
+				 * with IRQs enabled.
+				 */
+				WARN_ON(irqs_disabled());
+			} else {
+				local_irq_enable();
+			}
+		}
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
+		schedule_preempt_disabled();
+	}
+}
+
+void machine_shutdown(void)
+{
+#ifdef CONFIG_SMP
+	smp_send_stop();
+#endif
+}
+
+void machine_halt(void)
+{
+	machine_shutdown();
+	while (1);
+}
+
+void machine_power_off(void)
+{
+	machine_shutdown();
+	if (pm_power_off)
+		pm_power_off();
+}
+
+void machine_restart(char *cmd)
+{
+	machine_shutdown();
+
+	/* Disable interrupts first */
+	local_irq_disable();
+	local_fiq_disable();
+
+	/* Now call the architecture specific reboot code. */
+	if (pm_restart)
+		pm_restart(cmd);
+
+	/*
+	 * Whoops - the architecture was unable to reboot.
+	 */
+	printk("Reboot failed -- System halted\n");
+	while (1);
+}
+
+void __show_regs(struct pt_regs *regs)
+{
+	int i;
+
+	printk("CPU: %d    %s  (%s %.*s)\n",
+		raw_smp_processor_id(), print_tainted(),
+		init_utsname()->release,
+		(int)strcspn(init_utsname()->version, " "),
+		init_utsname()->version);
+	print_symbol("PC is at %s\n", instruction_pointer(regs));
+	print_symbol("LR is at %s\n", regs->regs[30]);
+	printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n",
+	       regs->pc, regs->regs[30], regs->pstate);
+	printk("sp : %016llx\n", regs->sp);
+	for (i = 29; i >= 0; i--) {
+		printk("x%-2d: %016llx ", i, regs->regs[i]);
+		if (i % 2 == 0)
+			printk("\n");
+	}
+	printk("\n");
+}
+
+void show_regs(struct pt_regs * regs)
+{
+	printk("\n");
+	printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm);
+	__show_regs(regs);
+}
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+}
+
+void flush_thread(void)
+{
+	fpsimd_flush_thread();
+	flush_ptrace_hw_breakpoint(current);
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+}
+
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+{
+	fpsimd_save_state(&current->thread.fpsimd_state);
+	*dst = *src;
+	return 0;
+}
+
+asmlinkage void ret_from_fork(void) asm("ret_from_fork");
+
+int copy_thread(unsigned long clone_flags, unsigned long stack_start,
+		unsigned long stk_sz, struct task_struct *p,
+		struct pt_regs *regs)
+{
+	struct pt_regs *childregs = task_pt_regs(p);
+	unsigned long tls = p->thread.tp_value;
+
+	*childregs = *regs;
+	childregs->regs[0] = 0;
+
+	if (is_compat_thread(task_thread_info(p)))
+		childregs->compat_sp = stack_start;
+	else {
+		/*
+		 * Read the current TLS pointer from tpidr_el0 as it may be
+		 * out-of-sync with the saved value.
+		 */
+		asm("mrs %0, tpidr_el0" : "=r" (tls));
+		childregs->sp = stack_start;
+	}
+
+	memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
+	p->thread.cpu_context.sp = (unsigned long)childregs;
+	p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
+
+	/* If a TLS pointer was passed to clone, use that for the new thread. */
+	if (clone_flags & CLONE_SETTLS)
+		tls = regs->regs[3];
+	p->thread.tp_value = tls;
+
+	ptrace_hw_copy_thread(p);
+
+	return 0;
+}
+
+static void tls_thread_switch(struct task_struct *next)
+{
+	unsigned long tpidr, tpidrro;
+
+	if (!is_compat_task()) {
+		asm("mrs %0, tpidr_el0" : "=r" (tpidr));
+		current->thread.tp_value = tpidr;
+	}
+
+	if (is_compat_thread(task_thread_info(next))) {
+		tpidr = 0;
+		tpidrro = next->thread.tp_value;
+	} else {
+		tpidr = next->thread.tp_value;
+		tpidrro = 0;
+	}
+
+	asm(
+	"	msr	tpidr_el0, %0\n"
+	"	msr	tpidrro_el0, %1"
+	: : "r" (tpidr), "r" (tpidrro));
+}
+
+/*
+ * Thread switching.
+ */
+struct task_struct *__switch_to(struct task_struct *prev,
+				struct task_struct *next)
+{
+	struct task_struct *last;
+
+	fpsimd_thread_switch(next);
+	tls_thread_switch(next);
+	hw_breakpoint_thread_switch(next);
+
+	/* the actual thread switch */
+	last = cpu_switch_to(prev, next);
+
+	return last;
+}
+
+/*
+ * Fill in the task's elfregs structure for a core dump.
+ */
+int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs)
+{
+	elf_core_copy_regs(elfregs, task_pt_regs(t));
+	return 1;
+}
+
+/*
+ * fill in the fpe structure for a core dump...
+ */
+int dump_fpu (struct pt_regs *regs, struct user_fp *fp)
+{
+	return 0;
+}
+EXPORT_SYMBOL(dump_fpu);
+
+/*
+ * Shuffle the argument into the correct register before calling the
+ * thread function.  x1 is the thread argument, x2 is the pointer to
+ * the thread function, and x3 points to the exit function.
+ */
+extern void kernel_thread_helper(void);
+asm(	".section .text\n"
+"	.align\n"
+"	.type	kernel_thread_helper, #function\n"
+"kernel_thread_helper:\n"
+"	mov	x0, x1\n"
+"	mov	x30, x3\n"
+"	br	x2\n"
+"	.size	kernel_thread_helper, . - kernel_thread_helper\n"
+"	.previous");
+
+#define kernel_thread_exit	do_exit
+
+/*
+ * Create a kernel thread.
+ */
+pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+	struct pt_regs regs;
+
+	memset(&regs, 0, sizeof(regs));
+
+	regs.regs[1] = (unsigned long)arg;
+	regs.regs[2] = (unsigned long)fn;
+	regs.regs[3] = (unsigned long)kernel_thread_exit;
+	regs.pc = (unsigned long)kernel_thread_helper;
+	regs.pstate = PSR_MODE_EL1h;
+
+	return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
+}
+EXPORT_SYMBOL(kernel_thread);
+
+unsigned long get_wchan(struct task_struct *p)
+{
+	struct stackframe frame;
+	int count = 0;
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
+
+	frame.fp = thread_saved_fp(p);
+	frame.sp = thread_saved_sp(p);
+	frame.pc = thread_saved_pc(p);
+	do {
+		int ret = unwind_frame(&frame);
+		if (ret < 0)
+			return 0;
+		if (!in_sched_functions(frame.pc))
+			return frame.pc;
+	} while (count ++ < 16);
+	return 0;
+}
+
+unsigned long arch_align_stack(unsigned long sp)
+{
+	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+		sp -= get_random_int() & ~PAGE_MASK;
+	return sp & ~0xf;
+}
+
+static unsigned long randomize_base(unsigned long base)
+{
+	unsigned long range_end = base + (STACK_RND_MASK << PAGE_SHIFT) + 1;
+	return randomize_range(base, range_end, 0) ? : base;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+	return randomize_base(mm->brk);
+}
+
+unsigned long randomize_et_dyn(unsigned long base)
+{
+	return randomize_base(base);
+}