Merge branch 'master' into sh/hw-breakpoints

Conflict between FPU thread flag migration and debug
thread flag addition.

Conflicts:
	arch/sh/include/asm/thread_info.h
	arch/sh/include/asm/ubc.h
	arch/sh/kernel/process_32.c
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 4a2c866..7399d78 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -133,7 +133,10 @@
 	regs.regs[5] = (unsigned long)fn;
 
 	regs.pc = (unsigned long)kernel_thread_helper;
-	regs.sr = (1 << 30);
+	regs.sr = SR_MD;
+#if defined(CONFIG_SH_FPU)
+	regs.sr |= SR_FD;
+#endif
 
 	/* Ok, create the new process.. */
 	pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
@@ -141,6 +144,7 @@
 
 	return pid;
 }
+EXPORT_SYMBOL(kernel_thread);
 
 /*
  * Free current thread data structures etc..
@@ -184,6 +188,16 @@
 
 	return fpvalid;
 }
+EXPORT_SYMBOL(dump_fpu);
+
+/*
+ * This gets called before we allocate a new thread and copy
+ * the current task into it.
+ */
+void prepare_to_copy(struct task_struct *tsk)
+{
+	unlazy_fpu(tsk, task_pt_regs(tsk));
+}
 
 asmlinkage void ret_from_fork(void);
 
@@ -193,15 +207,10 @@
 {
 	struct thread_info *ti = task_thread_info(p);
 	struct pt_regs *childregs;
-	struct task_struct *tsk = current;
-
-#if defined(CONFIG_SH_FPU)
-	unlazy_fpu(tsk, regs);
-	p->thread.fpu = tsk->thread.fpu;
-	copy_to_stopped_child_used_math(p);
-#endif
 
 #if defined(CONFIG_SH_DSP)
+	struct task_struct *tsk = current;
+
 	if (is_dsp_enabled(tsk)) {
 		/* We can use the __save_dsp or just copy the struct:
 		 * __save_dsp(p);
@@ -220,6 +229,8 @@
 	} else {
 		childregs->regs[15] = (unsigned long)childregs;
 		ti->addr_limit = KERNEL_DS;
+		ti->status &= ~TS_USEDFPU;
+		p->fpu_counter = 0;
 	}
 
 	if (clone_flags & CLONE_SETTLS)
@@ -242,9 +253,13 @@
 __notrace_funcgraph struct task_struct *
 __switch_to(struct task_struct *prev, struct task_struct *next)
 {
-#if defined(CONFIG_SH_FPU)
+	struct thread_struct *next_t = &next->thread;
+
 	unlazy_fpu(prev, task_pt_regs(prev));
-#endif
+
+	/* we're going to use this soon, after a few expensive things */
+	if (next->fpu_counter > 5)
+		prefetch(&next_t->fpu.hard);
 
 #ifdef CONFIG_MMU
 	/*
@@ -256,6 +271,14 @@
 		     : "r" (task_thread_info(next)));
 #endif
 
+	/*
+	 * If the task has used fpu the last 5 timeslices, just do a full
+	 * restore of the math state immediately to avoid the trap; the
+	 * chances of needing FPU soon are obviously high now
+	 */
+	if (next->fpu_counter > 5)
+		fpu_state_restore(task_pt_regs(next));
+
 	return prev;
 }