tracing: add "power-tracer": C/P state tracer to help power optimization

Impact: new "power-tracer" ftrace plugin

This patch adds a C/P-state ftrace plugin that will generate
detailed statistics about the C/P-states that are being used,
so that we can look at detailed decisions that the C/P-state
code is making, rather than the too high level "average"
that we have today.

An example way of using this is:

 mount -t debugfs none /sys/kernel/debug
 echo cstate > /sys/kernel/debug/tracing/current_tracer
 echo 1 > /sys/kernel/debug/tracing/tracing_enabled
 sleep 1
 echo 0 > /sys/kernel/debug/tracing/tracing_enabled
 cat /sys/kernel/debug/tracing/trace | perl scripts/trace/cstate.pl > out.svg

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8e48c5d..88ea02d 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,6 +33,7 @@
 #include <linux/cpufreq.h>
 #include <linux/compiler.h>
 #include <linux/dmi.h>
+#include <linux/ftrace.h>
 
 #include <linux/acpi.h>
 #include <acpi/processor.h>
@@ -391,6 +392,7 @@
 	unsigned int next_perf_state = 0; /* Index into perf table */
 	unsigned int i;
 	int result = 0;
+	struct power_trace it;
 
 	dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
 
@@ -427,6 +429,8 @@
 		}
 	}
 
+	trace_power_mark(&it, POWER_PSTATE, next_perf_state);
+
 	switch (data->cpu_feature) {
 	case SYSTEM_INTEL_MSR_CAPABLE:
 		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c622772..c27af49 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -7,6 +7,7 @@
 #include <linux/module.h>
 #include <linux/pm.h>
 #include <linux/clockchips.h>
+#include <linux/ftrace.h>
 #include <asm/system.h>
 
 unsigned long idle_halt;
@@ -100,6 +101,9 @@
 void default_idle(void)
 {
 	if (hlt_use_halt()) {
+		struct power_trace it;
+
+		trace_power_start(&it, POWER_CSTATE, 1);
 		current_thread_info()->status &= ~TS_POLLING;
 		/*
 		 * TS_POLLING-cleared state must be visible before we
@@ -112,6 +116,7 @@
 		else
 			local_irq_enable();
 		current_thread_info()->status |= TS_POLLING;
+		trace_power_end(&it);
 	} else {
 		local_irq_enable();
 		/* loop is done by the caller */
@@ -154,24 +159,31 @@
  */
 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
+	struct power_trace it;
+
+	trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
 	if (!need_resched()) {
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
 		if (!need_resched())
 			__mwait(ax, cx);
 	}
+	trace_power_end(&it);
 }
 
 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
 static void mwait_idle(void)
 {
+	struct power_trace it;
 	if (!need_resched()) {
+		trace_power_start(&it, POWER_CSTATE, 1);
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
 		if (!need_resched())
 			__sti_mwait(0, 0);
 		else
 			local_irq_enable();
+		trace_power_end(&it);
 	} else
 		local_irq_enable();
 }
@@ -183,9 +195,13 @@
  */
 static void poll_idle(void)
 {
+	struct power_trace it;
+
+	trace_power_start(&it, POWER_CSTATE, 0);
 	local_irq_enable();
 	while (!need_resched())
 		cpu_relax();
+	trace_power_end(&it);
 }
 
 /*