arch/tile: use extended assembly to inline __mb_incoherent()

This avoids having to maintain an additional separate assembly
file, and of course the inline is slightly more efficient as well.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
diff --git a/arch/tile/include/asm/system.h b/arch/tile/include/asm/system.h
index 5388850..23d1842 100644
--- a/arch/tile/include/asm/system.h
+++ b/arch/tile/include/asm/system.h
@@ -90,7 +90,24 @@
 #endif
 
 #if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
-int __mb_incoherent(void);  /* Helper routine for mb_incoherent(). */
+#include <hv/syscall_public.h>
+/*
+ * Issue an uncacheable load to each memory controller, then
+ * wait until those loads have completed.
+ */
+static inline void __mb_incoherent(void)
+{
+	long clobber_r10;
+	asm volatile("swint2"
+		     : "=R10" (clobber_r10)
+		     : "R10" (HV_SYS_fence_incoherent)
+		     : "r0", "r1", "r2", "r3", "r4",
+		       "r5", "r6", "r7", "r8", "r9",
+		       "r11", "r12", "r13", "r14",
+		       "r15", "r16", "r17", "r18", "r19",
+		       "r20", "r21", "r22", "r23", "r24",
+		       "r25", "r26", "r27", "r28", "r29");
+}
 #endif
 
 /* Fence to guarantee visibility of stores to incoherent memory. */