Revert "rcu: Decrease memory-barrier usage based on semi-formal proof"

This reverts commit e59fb3120becfb36b22ddb8bd27d065d3cdca499.

This reversion was due to (extreme) boot-time slowdowns on SPARC seen by
Yinghai Lu and on x86 by Ingo
.
This is a non-trivial reversion due to intervening commits.

Conflicts:

	Documentation/RCU/trace.txt
	kernel/rcutree.c

Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index ed339702..3f6559a 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1520,6 +1520,7 @@
 {
 	int c = 0;
 	int snap;
+	int snap_nmi;
 	int thatcpu;
 
 	/* Check for being in the holdoff period. */
@@ -1530,10 +1531,10 @@
 	for_each_online_cpu(thatcpu) {
 		if (thatcpu == cpu)
 			continue;
-		snap = atomic_add_return(0, &per_cpu(rcu_dynticks,
-						     thatcpu).dynticks);
+		snap = per_cpu(rcu_dynticks, thatcpu).dynticks;
+		snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi;
 		smp_mb(); /* Order sampling of snap with end of grace period. */
-		if ((snap & 0x1) != 0) {
+		if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) {
 			per_cpu(rcu_dyntick_drain, cpu) = 0;
 			per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
 			return rcu_needs_cpu_quick_check(cpu);