[PATCH] sched: mc/smt power savings sched policy

sysfs entries 'sched_mc_power_savings' and 'sched_smt_power_savings' in
/sys/devices/system/cpu/ control the MC/SMT power savings policy for the
scheduler.

Based on the values (1-enable, 0-disable) for these controls, sched groups
cpu power will be determined for different domains.  When power savings
policy is enabled and under light load conditions, scheduler will minimize
the physical packages/cpu cores carrying the load and thus conserving
power(with a perf impact based on the workload characteristics...  see OLS
2005 CMP kernel scheduler paper for more details..)

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Con Kolivas <kernel@kolivas.org>
Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h
index aa4185e..6adbd9b 100644
--- a/include/asm-i386/topology.h
+++ b/include/asm-i386/topology.h
@@ -112,4 +112,9 @@
 
 extern cpumask_t cpu_coregroup_map(int cpu);
 
+#ifdef CONFIG_SMP
+#define mc_capable()	(boot_cpu_data.x86_max_cores > 1)
+#define smt_capable()	(smp_num_siblings > 1)
+#endif
+
 #endif /* _ASM_I386_TOPOLOGY_H */
diff --git a/include/asm-ia64/topology.h b/include/asm-ia64/topology.h
index 616b5ed..937c212 100644
--- a/include/asm-ia64/topology.h
+++ b/include/asm-ia64/topology.h
@@ -112,6 +112,7 @@
 #define topology_core_id(cpu)			(cpu_data(cpu)->core_id)
 #define topology_core_siblings(cpu)		(cpu_core_map[cpu])
 #define topology_thread_siblings(cpu)		(cpu_sibling_map[cpu])
+#define smt_capable() 				(smp_num_siblings > 1)
 #endif
 
 #include <asm-generic/topology.h>
diff --git a/include/asm-powerpc/topology.h b/include/asm-powerpc/topology.h
index 92f3e55..bbc3844 100644
--- a/include/asm-powerpc/topology.h
+++ b/include/asm-powerpc/topology.h
@@ -93,5 +93,10 @@
 
 #endif /* CONFIG_NUMA */
 
+#ifdef CONFIG_SMP
+#include <asm/cputable.h>
+#define smt_capable() 		(cpu_has_feature(CPU_FTR_SMT))
+#endif
+
 #endif /* __KERNEL__ */
 #endif	/* _ASM_POWERPC_TOPOLOGY_H */
diff --git a/include/asm-sparc64/topology.h b/include/asm-sparc64/topology.h
index 0e234e2..98a6c61 100644
--- a/include/asm-sparc64/topology.h
+++ b/include/asm-sparc64/topology.h
@@ -1,6 +1,9 @@
 #ifndef _ASM_SPARC64_TOPOLOGY_H
 #define _ASM_SPARC64_TOPOLOGY_H
 
+#include <asm/spitfire.h>
+#define smt_capable()	(tlb_type == hypervisor)
+
 #include <asm-generic/topology.h>
 
 #endif /* _ASM_SPARC64_TOPOLOGY_H */
diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h
index c4e46e7..6e7a2e9 100644
--- a/include/asm-x86_64/topology.h
+++ b/include/asm-x86_64/topology.h
@@ -59,6 +59,8 @@
 #define topology_core_id(cpu)			(cpu_data[cpu].cpu_core_id)
 #define topology_core_siblings(cpu)		(cpu_core_map[cpu])
 #define topology_thread_siblings(cpu)		(cpu_sibling_map[cpu])
+#define mc_capable()			(boot_cpu_data.x86_max_cores > 1)
+#define smt_capable() 			(smp_num_siblings > 1)
 #endif
 
 #include <asm-generic/topology.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ab8ffc5..0bc81a1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -570,6 +570,11 @@
 #define SD_WAKE_AFFINE		32	/* Wake task to waking CPU */
 #define SD_WAKE_BALANCE		64	/* Perform balancing at task wakeup */
 #define SD_SHARE_CPUPOWER	128	/* Domain members share cpu power */
+#define SD_POWERSAVINGS_BALANCE	256	/* Balance for power savings */
+
+#define BALANCE_FOR_POWER	((sched_mc_power_savings || sched_smt_power_savings) \
+				 ? SD_POWERSAVINGS_BALANCE : 0)
+
 
 struct sched_group {
 	struct sched_group *next;	/* Must be a circular list */
@@ -1412,6 +1417,11 @@
 extern long sched_setaffinity(pid_t pid, cpumask_t new_mask);
 extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
 
+#include <linux/sysdev.h>
+extern int sched_mc_power_savings, sched_smt_power_savings;
+extern struct sysdev_attribute attr_sched_mc_power_savings, attr_sched_smt_power_savings;
+extern int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls);
+
 extern void normalize_rt_tasks(void);
 
 #ifdef CONFIG_PM
diff --git a/include/linux/topology.h b/include/linux/topology.h
index a305ae2..ec1eca8 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -134,7 +134,8 @@
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
-				| SD_WAKE_AFFINE,	\
+				| SD_WAKE_AFFINE	\
+				| BALANCE_FOR_POWER,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 	.nr_balance_failed	= 0,			\