[SPARC64]: More fully work around Spitfire Errata 51.

It appears that a memory barrier soon after a mispredicted
branch, not just in the delay slot, can cause the hang
condition of this cpu errata.

So move them out-of-line, and explicitly put them into
a "branch always, predict taken" delay slot which should
fully kill this problem.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c
index 2803bc7..425c60c 100644
--- a/arch/sparc64/kernel/pci_iommu.c
+++ b/arch/sparc64/kernel/pci_iommu.c
@@ -466,7 +466,7 @@
 		if (!limit)
 			break;
 		udelay(1);
-		membar("#LoadLoad");
+		rmb();
 	}
 	if (!limit)
 		printk(KERN_WARNING "pci_strbuf_flush: flushflag timeout "
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
index 07424b0..6625543 100644
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -103,7 +103,7 @@
 		 * other cpus see our increasing idleness for the buddy
 		 * redistribution algorithm.  -DaveM
 		 */
-		membar("#StoreStore | #StoreLoad");
+		membar_storeload_storestore();
 	}
 }
 
diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c
index 89f5e01..e09ddf9 100644
--- a/arch/sparc64/kernel/sbus.c
+++ b/arch/sparc64/kernel/sbus.c
@@ -147,7 +147,7 @@
 		if (!limit)
 			break;
 		udelay(1);
-		membar("#LoadLoad");
+		rmb();
 	}
 	if (!limit)
 		printk(KERN_WARNING "sbus_strbuf_flush: flushflag timeout "
diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c
index b1ed230..aecccd0 100644
--- a/arch/sparc64/kernel/signal32.c
+++ b/arch/sparc64/kernel/signal32.c
@@ -877,11 +877,12 @@
 			unsigned long page = (unsigned long)
 				page_address(pte_page(*ptep));
 
-			__asm__ __volatile__(
-			"	membar	#StoreStore\n"
-			"	flush	%0 + %1"
-			: : "r" (page), "r" (address & (PAGE_SIZE - 1))
-			: "memory");
+			wmb();
+			__asm__ __volatile__("flush	%0 + %1"
+					     : /* no outputs */
+					     : "r" (page),
+					       "r" (address & (PAGE_SIZE - 1))
+					     : "memory");
 		}
 		pte_unmap(ptep);
 		preempt_enable();
@@ -1292,11 +1293,12 @@
 			unsigned long page = (unsigned long)
 				page_address(pte_page(*ptep));
 
-			__asm__ __volatile__(
-			"	membar	#StoreStore\n"
-			"	flush	%0 + %1"
-			: : "r" (page), "r" (address & (PAGE_SIZE - 1))
-			: "memory");
+			wmb();
+			__asm__ __volatile__("flush	%0 + %1"
+					     : /* no outputs */
+					     : "r" (page),
+					       "r" (address & (PAGE_SIZE - 1))
+					     : "memory");
 		}
 		pte_unmap(ptep);
 		preempt_enable();
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index b9b4249..b4fc6a5 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -144,7 +144,7 @@
 	current->active_mm = &init_mm;
 
 	while (!cpu_isset(cpuid, smp_commenced_mask))
-		membar("#LoadLoad");
+		rmb();
 
 	cpu_set(cpuid, cpu_online_map);
 }
@@ -184,11 +184,11 @@
 	for (i = 0; i < NUM_ITERS; i++) {
 		t0 = tick_ops->get_tick();
 		go[MASTER] = 1;
-		membar("#StoreLoad");
+		membar_storeload();
 		while (!(tm = go[SLAVE]))
-			membar("#LoadLoad");
+			rmb();
 		go[SLAVE] = 0;
-		membar("#StoreStore");
+		wmb();
 		t1 = tick_ops->get_tick();
 
 		if (t1 - t0 < best_t1 - best_t0)
@@ -221,7 +221,7 @@
 	go[MASTER] = 1;
 
 	while (go[MASTER])
-		membar("#LoadLoad");
+		rmb();
 
 	local_irq_save(flags);
 	{
@@ -273,21 +273,21 @@
 
 	/* wait for client to be ready */
 	while (!go[MASTER])
-		membar("#LoadLoad");
+		rmb();
 
 	/* now let the client proceed into his loop */
 	go[MASTER] = 0;
-	membar("#StoreLoad");
+	membar_storeload();
 
 	spin_lock_irqsave(&itc_sync_lock, flags);
 	{
 		for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
 			while (!go[MASTER])
-				membar("#LoadLoad");
+				rmb();
 			go[MASTER] = 0;
-			membar("#StoreStore");
+			wmb();
 			go[SLAVE] = tick_ops->get_tick();
-			membar("#StoreLoad");
+			membar_storeload();
 		}
 	}
 	spin_unlock_irqrestore(&itc_sync_lock, flags);
@@ -927,11 +927,11 @@
 		       smp_processor_id());
 #endif
 		penguins_are_doing_time = 1;
-		membar("#StoreStore | #LoadStore");
+		membar_storestore_loadstore();
 		atomic_inc(&smp_capture_registry);
 		smp_cross_call(&xcall_capture, 0, 0, 0);
 		while (atomic_read(&smp_capture_registry) != ncpus)
-			membar("#LoadLoad");
+			rmb();
 #ifdef CAPTURE_DEBUG
 		printk("done\n");
 #endif
@@ -947,7 +947,7 @@
 		       smp_processor_id());
 #endif
 		penguins_are_doing_time = 0;
-		membar("#StoreStore | #StoreLoad");
+		membar_storeload_storestore();
 		atomic_dec(&smp_capture_registry);
 	}
 }
@@ -970,9 +970,9 @@
 	save_alternate_globals(global_save);
 	prom_world(1);
 	atomic_inc(&smp_capture_registry);
-	membar("#StoreLoad | #StoreStore");
+	membar_storeload_storestore();
 	while (penguins_are_doing_time)
-		membar("#LoadLoad");
+		rmb();
 	restore_alternate_globals(global_save);
 	atomic_dec(&smp_capture_registry);
 	prom_world(0);
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index 0764b93..a3ea697 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -406,3 +406,12 @@
 EXPORT_SYMBOL(xor_vis_5);
 
 EXPORT_SYMBOL(prom_palette);
+
+/* memory barriers */
+EXPORT_SYMBOL(mb);
+EXPORT_SYMBOL(rmb);
+EXPORT_SYMBOL(wmb);
+EXPORT_SYMBOL(membar_storeload);
+EXPORT_SYMBOL(membar_storeload_storestore);
+EXPORT_SYMBOL(membar_storeload_loadload);
+EXPORT_SYMBOL(membar_storestore_loadstore);