sched/idle: Delay clearing the polling bit

With the generic idle functions assuming !polling we should only clear
the polling bit at the very last opportunity in order to avoid
spurious IPIs.

Ideally we'd flip the default to polling, but that means auditing all
arch idle functions.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-vq7719foqzf6z5h4j7eh7f9e@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 8f4390a..ed67f0c 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -78,12 +78,10 @@
 
 	/*
 	 * Check if the idle task must be rescheduled. If it is the
-	 * case, exit the function after re-enabling the local irq and
-	 * set again the polling flag
+	 * case, exit the function after re-enabling the local irq.
 	 */
-	if (current_clr_polling_and_test()) {
+	if (need_resched()) {
 		local_irq_enable();
-		__current_set_polling();
 		return 0;
 	}
 
@@ -127,7 +125,7 @@
 			broadcast = !!(drv->states[next_state].flags &
 				       CPUIDLE_FLAG_TIMER_STOP);
 
-			if (broadcast)
+			if (broadcast) {
 				/*
 				 * Tell the time framework to switch
 				 * to a broadcast timer because our
@@ -139,6 +137,7 @@
 				ret = clockevents_notify(
 					CLOCK_EVT_NOTIFY_BROADCAST_ENTER,
 					&dev->cpu);
+			}
 
 			if (!ret) {
 				trace_cpu_idle_rcuidle(next_state, dev->cpu);
@@ -175,8 +174,12 @@
 	 * We can't use the cpuidle framework, let's use the default
 	 * idle routine
 	 */
-	if (ret)
-		arch_cpu_idle();
+	if (ret) {
+		if (!current_clr_polling_and_test())
+			arch_cpu_idle();
+		else
+			local_irq_enable();
+	}
 
 	__current_set_polling();