[PATCH] x86_64: Add idle notifiers

This adds a new notifier chain that is called with IDLE_START
when a CPU goes idle and IDLE_END when it goes out of idle.
The context can be idle thread or interrupt context.

Since we cannot rely on MONITOR/MWAIT existing the idle
end check currently has to be done in all interrupt
handlers.

They were originally inspired by the similar s390 implementation.

They have a variety of applications:
- They will be needed for CONFIG_NO_IDLE_HZ
- They can be used for oprofile to fix up the missing time
in idle when performance counters don't tick.
- They can be used for better C state management in ACPI
- They could be used for microstate accounting.

This is just infrastructure so far, no users.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index c9df991..669cf0e 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -36,6 +36,7 @@
 #include <linux/utsname.h>
 #include <linux/random.h>
 #include <linux/kprobes.h>
+#include <linux/notifier.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -50,6 +51,7 @@
 #include <asm/desc.h>
 #include <asm/proto.h>
 #include <asm/ia32.h>
+#include <asm/idle.h>
 
 asmlinkage extern void ret_from_fork(void);
 
@@ -64,6 +66,50 @@
 void (*pm_idle)(void);
 static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
 
+static struct notifier_block *idle_notifier;
+static DEFINE_SPINLOCK(idle_notifier_lock);
+
+void idle_notifier_register(struct notifier_block *n)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&idle_notifier_lock, flags);
+	notifier_chain_register(&idle_notifier, n);
+	spin_unlock_irqrestore(&idle_notifier_lock, flags);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_register);
+
+void idle_notifier_unregister(struct notifier_block *n)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&idle_notifier_lock, flags);
+	notifier_chain_unregister(&idle_notifier, n);
+	spin_unlock_irqrestore(&idle_notifier_lock, flags);
+}
+EXPORT_SYMBOL(idle_notifier_unregister);
+
+enum idle_state { CPU_IDLE, CPU_NOT_IDLE };
+static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
+
+void enter_idle(void)
+{
+	__get_cpu_var(idle_state) = CPU_IDLE;
+	notifier_call_chain(&idle_notifier, IDLE_START, NULL);
+}
+
+static void __exit_idle(void)
+{
+	__get_cpu_var(idle_state) = CPU_NOT_IDLE;
+	notifier_call_chain(&idle_notifier, IDLE_END, NULL);
+}
+
+/* Called from interrupts to signify idle end */
+void exit_idle(void)
+{
+	if (current->pid | read_pda(irqcount))
+		return;
+	__exit_idle();
+}
+
 /*
  * We use this if we don't have any better
  * idle routine..
@@ -180,7 +226,9 @@
 				idle = default_idle;
 			if (cpu_is_offline(smp_processor_id()))
 				play_dead();
+			enter_idle();
 			idle();
+			__exit_idle();
 		}
 
 		preempt_enable_no_resched();