sched: Separate the scheduler entry for preemption

Block-IO and workqueues call into notifier functions from the
scheduler core code with interrupts and preemption disabled. These
calls should be made before entering the scheduler core.

To simplify this, separate the scheduler core code into
__schedule(). __schedule() is directly called from the places which
set PREEMPT_ACTIVE and from schedule(). This allows us to add the work
checks into schedule(), so they are only called when a task voluntary
goes to sleep.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: stable@kernel.org # 2.6.39+
Link: http://lkml.kernel.org/r/20110622174918.813258321@linutronix.de
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index ccacdbd..ec15e812 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4279,9 +4279,9 @@
 }
 
 /*
- * schedule() is the main scheduler function.
+ * __schedule() is the main scheduler function.
  */
-asmlinkage void __sched schedule(void)
+static void __sched __schedule(void)
 {
 	struct task_struct *prev, *next;
 	unsigned long *switch_count;
@@ -4369,6 +4369,11 @@
 	if (need_resched())
 		goto need_resched;
 }
+
+asmlinkage void schedule(void)
+{
+	__schedule();
+}
 EXPORT_SYMBOL(schedule);
 
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
@@ -4435,7 +4440,7 @@
 
 	do {
 		add_preempt_count_notrace(PREEMPT_ACTIVE);
-		schedule();
+		__schedule();
 		sub_preempt_count_notrace(PREEMPT_ACTIVE);
 
 		/*
@@ -4463,7 +4468,7 @@
 	do {
 		add_preempt_count(PREEMPT_ACTIVE);
 		local_irq_enable();
-		schedule();
+		__schedule();
 		local_irq_disable();
 		sub_preempt_count(PREEMPT_ACTIVE);
 
@@ -5588,7 +5593,7 @@
 static void __cond_resched(void)
 {
 	add_preempt_count(PREEMPT_ACTIVE);
-	schedule();
+	__schedule();
 	sub_preempt_count(PREEMPT_ACTIVE);
 }