sched, nohz: Track nr_busy_cpus in the sched_group_power
Introduce nr_busy_cpus in the struct sched_group_power [Not in sched_group
because sched groups are duplicated for the SD_OVERLAP scheduler domain]
and for each cpu that enters and exits idle, this parameter will
be updated in each scheduler group of the scheduler domain that this cpu
belongs to.
To avoid the frequent update of this state as the cpu enters
and exits idle, the update of the stat during idle exit is
delayed to the first timer tick that happens after the cpu becomes busy.
This is done using NOHZ_IDLE flag in the struct rq's nohz_flags.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20111202010832.555984323@sbsiddha-desk.sc.intel.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 50c06b0..e050563 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4901,6 +4901,36 @@
return;
}
+static inline void set_cpu_sd_state_busy(void)
+{
+ struct sched_domain *sd;
+ int cpu = smp_processor_id();
+
+ if (!test_bit(NOHZ_IDLE, nohz_flags(cpu)))
+ return;
+ clear_bit(NOHZ_IDLE, nohz_flags(cpu));
+
+ rcu_read_lock();
+ for_each_domain(cpu, sd)
+ atomic_inc(&sd->groups->sgp->nr_busy_cpus);
+ rcu_read_unlock();
+}
+
+void set_cpu_sd_state_idle(void)
+{
+ struct sched_domain *sd;
+ int cpu = smp_processor_id();
+
+ if (test_bit(NOHZ_IDLE, nohz_flags(cpu)))
+ return;
+ set_bit(NOHZ_IDLE, nohz_flags(cpu));
+
+ rcu_read_lock();
+ for_each_domain(cpu, sd)
+ atomic_dec(&sd->groups->sgp->nr_busy_cpus);
+ rcu_read_unlock();
+}
+
/*
* This routine will try to nominate the ilb (idle load balancing)
* owner among the cpus whose ticks are stopped. ilb owner will do the idle
@@ -5135,6 +5165,7 @@
* We may be recently in ticked or tickless idle mode. At the first
* busy tick after returning from idle, we will update the busy stats.
*/
+ set_cpu_sd_state_busy();
if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))))
clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));