MIPS: Unify memcpy.S and memcpy-inatomic.S

We can save the 451 lines of code that comprise memcpy-inatomic.S at the
expense of a single instruction in the memcpy prolog.  We also use an
additional register (t6), so this may cause increased register pressure in
some places as well.  But I think the reduced maintenance burden, of not
having two nearly identical implementations, makes it worth it.

Signed-off-by: David Daney <david.daney@cavium.com>
Cc: linux-mips@linux-mips.org
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index 56a1f85..65192c0 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -183,6 +183,14 @@
 #endif
 
 /*
+ * t6 is used as a flag to note inatomic mode.
+ */
+LEAF(__copy_user_inatomic)
+	b	__copy_user_common
+	 li	t6, 1
+	END(__copy_user_inatomic)
+
+/*
  * A combined memcpy/__copy_user
  * __copy_user sets len to 0 for success; else to an upper bound of
  * the number of uncopied bytes.
@@ -193,6 +201,8 @@
 	move	v0, dst				/* return value */
 .L__memcpy:
 FEXPORT(__copy_user)
+	li	t6, 0	/* not inatomic */
+__copy_user_common:
 	/*
 	 * Note: dst & src may be unaligned, len may be 0
 	 * Temps
@@ -458,6 +468,7 @@
 	LOAD	t0, THREAD_BUADDR(t0)	# t0 is just past last good address
 	 nop
 	SUB	len, AT, t0		# len number of uncopied bytes
+	bnez	t6, .Ldone	/* Skip the zeroing part if inatomic */
 	/*
 	 * Here's where we rely on src and dst being incremented in tandem,
 	 *   See (3) above.