blob: 1a2d8f0aa659d5fd0dd8c0d44d18d438189d0c36 [file] [log] [blame]
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001/*
2 * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
3 * policies)
4 */
5
Steven Rostedt4fd29172008-01-25 21:08:06 +01006#ifdef CONFIG_SMP
Ingo Molnar84de4272008-01-25 21:08:15 +01007
8/*
9 * The "RT overload" flag: it gets set if a CPU has more than
10 * one runnable RT task.
11 */
Steven Rostedt4fd29172008-01-25 21:08:06 +010012static cpumask_t rt_overload_mask;
13static atomic_t rto_count;
Ingo Molnar84de4272008-01-25 21:08:15 +010014
Steven Rostedt4fd29172008-01-25 21:08:06 +010015static inline int rt_overloaded(void)
16{
17 return atomic_read(&rto_count);
18}
Ingo Molnar84de4272008-01-25 21:08:15 +010019
Steven Rostedt4fd29172008-01-25 21:08:06 +010020static inline cpumask_t *rt_overload(void)
21{
22 return &rt_overload_mask;
23}
Ingo Molnar84de4272008-01-25 21:08:15 +010024
Steven Rostedt4fd29172008-01-25 21:08:06 +010025static inline void rt_set_overload(struct rq *rq)
26{
Gregory Haskinsa22d7fc2008-01-25 21:08:12 +010027 rq->rt.overloaded = 1;
Steven Rostedt4fd29172008-01-25 21:08:06 +010028 cpu_set(rq->cpu, rt_overload_mask);
29 /*
30 * Make sure the mask is visible before we set
31 * the overload count. That is checked to determine
32 * if we should look at the mask. It would be a shame
33 * if we looked at the mask, but the mask was not
34 * updated yet.
35 */
36 wmb();
37 atomic_inc(&rto_count);
38}
Ingo Molnar84de4272008-01-25 21:08:15 +010039
Steven Rostedt4fd29172008-01-25 21:08:06 +010040static inline void rt_clear_overload(struct rq *rq)
41{
42 /* the order here really doesn't matter */
43 atomic_dec(&rto_count);
44 cpu_clear(rq->cpu, rt_overload_mask);
Gregory Haskinsa22d7fc2008-01-25 21:08:12 +010045 rq->rt.overloaded = 0;
Steven Rostedt4fd29172008-01-25 21:08:06 +010046}
Gregory Haskins73fe6aa2008-01-25 21:08:07 +010047
48static void update_rt_migration(struct rq *rq)
49{
50 if (rq->rt.rt_nr_migratory && (rq->rt.rt_nr_running > 1))
51 rt_set_overload(rq);
52 else
53 rt_clear_overload(rq);
54}
Steven Rostedt4fd29172008-01-25 21:08:06 +010055#endif /* CONFIG_SMP */
56
Ingo Molnarbb44e5d2007-07-09 18:51:58 +020057/*
58 * Update the current task's runtime statistics. Skip current tasks that
59 * are not in our scheduling class.
60 */
Alexey Dobriyana9957442007-10-15 17:00:13 +020061static void update_curr_rt(struct rq *rq)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +020062{
63 struct task_struct *curr = rq->curr;
64 u64 delta_exec;
65
66 if (!task_has_rt_policy(curr))
67 return;
68
Ingo Molnard2819182007-08-09 11:16:47 +020069 delta_exec = rq->clock - curr->se.exec_start;
Ingo Molnarbb44e5d2007-07-09 18:51:58 +020070 if (unlikely((s64)delta_exec < 0))
71 delta_exec = 0;
Ingo Molnar6cfb0d52007-08-02 17:41:40 +020072
73 schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
Ingo Molnarbb44e5d2007-07-09 18:51:58 +020074
75 curr->se.sum_exec_runtime += delta_exec;
Ingo Molnard2819182007-08-09 11:16:47 +020076 curr->se.exec_start = rq->clock;
Srivatsa Vaddagirid842de82007-12-02 20:04:49 +010077 cpuacct_charge(curr, delta_exec);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +020078}
79
Steven Rostedt63489e42008-01-25 21:08:03 +010080static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq)
81{
82 WARN_ON(!rt_task(p));
83 rq->rt.rt_nr_running++;
Steven Rostedt764a9d62008-01-25 21:08:04 +010084#ifdef CONFIG_SMP
85 if (p->prio < rq->rt.highest_prio)
86 rq->rt.highest_prio = p->prio;
Gregory Haskins73fe6aa2008-01-25 21:08:07 +010087 if (p->nr_cpus_allowed > 1)
88 rq->rt.rt_nr_migratory++;
89
90 update_rt_migration(rq);
Steven Rostedt764a9d62008-01-25 21:08:04 +010091#endif /* CONFIG_SMP */
Steven Rostedt63489e42008-01-25 21:08:03 +010092}
93
94static inline void dec_rt_tasks(struct task_struct *p, struct rq *rq)
95{
96 WARN_ON(!rt_task(p));
97 WARN_ON(!rq->rt.rt_nr_running);
98 rq->rt.rt_nr_running--;
Steven Rostedt764a9d62008-01-25 21:08:04 +010099#ifdef CONFIG_SMP
100 if (rq->rt.rt_nr_running) {
101 struct rt_prio_array *array;
102
103 WARN_ON(p->prio < rq->rt.highest_prio);
104 if (p->prio == rq->rt.highest_prio) {
105 /* recalculate */
106 array = &rq->rt.active;
107 rq->rt.highest_prio =
108 sched_find_first_bit(array->bitmap);
109 } /* otherwise leave rq->highest prio alone */
110 } else
111 rq->rt.highest_prio = MAX_RT_PRIO;
Gregory Haskins73fe6aa2008-01-25 21:08:07 +0100112 if (p->nr_cpus_allowed > 1)
113 rq->rt.rt_nr_migratory--;
114
115 update_rt_migration(rq);
Steven Rostedt764a9d62008-01-25 21:08:04 +0100116#endif /* CONFIG_SMP */
Steven Rostedt63489e42008-01-25 21:08:03 +0100117}
118
Ingo Molnarfd390f62007-08-09 11:16:48 +0200119static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200120{
121 struct rt_prio_array *array = &rq->rt.active;
122
123 list_add_tail(&p->run_list, array->queue + p->prio);
124 __set_bit(p->prio, array->bitmap);
Srivatsa Vaddagiri58e2d4c2008-01-25 21:08:00 +0100125 inc_cpu_load(rq, p->se.load.weight);
Steven Rostedt63489e42008-01-25 21:08:03 +0100126
127 inc_rt_tasks(p, rq);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200128}
129
130/*
131 * Adding/removing a task to/from a priority array:
132 */
Ingo Molnarf02231e2007-08-09 11:16:48 +0200133static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200134{
135 struct rt_prio_array *array = &rq->rt.active;
136
Ingo Molnarf1e14ef2007-08-09 11:16:48 +0200137 update_curr_rt(rq);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200138
139 list_del(&p->run_list);
140 if (list_empty(array->queue + p->prio))
141 __clear_bit(p->prio, array->bitmap);
Srivatsa Vaddagiri58e2d4c2008-01-25 21:08:00 +0100142 dec_cpu_load(rq, p->se.load.weight);
Steven Rostedt63489e42008-01-25 21:08:03 +0100143
144 dec_rt_tasks(p, rq);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200145}
146
147/*
148 * Put task to the end of the run list without the overhead of dequeue
149 * followed by enqueue.
150 */
151static void requeue_task_rt(struct rq *rq, struct task_struct *p)
152{
153 struct rt_prio_array *array = &rq->rt.active;
154
155 list_move_tail(&p->run_list, array->queue + p->prio);
156}
157
158static void
Dmitry Adamushko4530d7a2007-10-15 17:00:08 +0200159yield_task_rt(struct rq *rq)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200160{
Dmitry Adamushko4530d7a2007-10-15 17:00:08 +0200161 requeue_task_rt(rq, rq->curr);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200162}
163
Gregory Haskinse7693a32008-01-25 21:08:09 +0100164#ifdef CONFIG_SMP
Gregory Haskins318e0892008-01-25 21:08:10 +0100165static int find_lowest_rq(struct task_struct *task);
166
Gregory Haskinse7693a32008-01-25 21:08:09 +0100167static int select_task_rq_rt(struct task_struct *p, int sync)
168{
Gregory Haskins318e0892008-01-25 21:08:10 +0100169 struct rq *rq = task_rq(p);
170
171 /*
Steven Rostedte1f47d82008-01-25 21:08:12 +0100172 * If the current task is an RT task, then
173 * try to see if we can wake this RT task up on another
174 * runqueue. Otherwise simply start this RT task
175 * on its current runqueue.
176 *
177 * We want to avoid overloading runqueues. Even if
178 * the RT task is of higher priority than the current RT task.
179 * RT tasks behave differently than other tasks. If
180 * one gets preempted, we try to push it off to another queue.
181 * So trying to keep a preempting RT task on the same
182 * cache hot CPU will force the running RT task to
183 * a cold CPU. So we waste all the cache for the lower
184 * RT task in hopes of saving some of a RT task
185 * that is just being woken and probably will have
186 * cold cache anyway.
Gregory Haskins318e0892008-01-25 21:08:10 +0100187 */
Gregory Haskins17b32792008-01-25 21:08:13 +0100188 if (unlikely(rt_task(rq->curr)) &&
189 (p->nr_cpus_allowed > 1)) {
Gregory Haskins318e0892008-01-25 21:08:10 +0100190 int cpu = find_lowest_rq(p);
191
192 return (cpu == -1) ? task_cpu(p) : cpu;
193 }
194
195 /*
196 * Otherwise, just let it ride on the affined RQ and the
197 * post-schedule router will push the preempted task away
198 */
Gregory Haskinse7693a32008-01-25 21:08:09 +0100199 return task_cpu(p);
200}
201#endif /* CONFIG_SMP */
202
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200203/*
204 * Preempt the current task with a newly woken task if needed:
205 */
206static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
207{
208 if (p->prio < rq->curr->prio)
209 resched_task(rq->curr);
210}
211
Ingo Molnarfb8d4722007-08-09 11:16:48 +0200212static struct task_struct *pick_next_task_rt(struct rq *rq)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200213{
214 struct rt_prio_array *array = &rq->rt.active;
215 struct task_struct *next;
216 struct list_head *queue;
217 int idx;
218
219 idx = sched_find_first_bit(array->bitmap);
220 if (idx >= MAX_RT_PRIO)
221 return NULL;
222
223 queue = array->queue + idx;
224 next = list_entry(queue->next, struct task_struct, run_list);
225
Ingo Molnard2819182007-08-09 11:16:47 +0200226 next->se.exec_start = rq->clock;
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200227
228 return next;
229}
230
Ingo Molnar31ee5292007-08-09 11:16:49 +0200231static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200232{
Ingo Molnarf1e14ef2007-08-09 11:16:48 +0200233 update_curr_rt(rq);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200234 p->se.exec_start = 0;
235}
236
Peter Williams681f3e62007-10-24 18:23:51 +0200237#ifdef CONFIG_SMP
Steven Rostedte8fa1362008-01-25 21:08:05 +0100238/* Only try algorithms three times */
239#define RT_MAX_TRIES 3
240
241static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
242static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
243
Steven Rostedtf65eda42008-01-25 21:08:07 +0100244static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
245{
246 if (!task_running(rq, p) &&
Gregory Haskins73fe6aa2008-01-25 21:08:07 +0100247 (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) &&
248 (p->nr_cpus_allowed > 1))
Steven Rostedtf65eda42008-01-25 21:08:07 +0100249 return 1;
250 return 0;
251}
252
Steven Rostedte8fa1362008-01-25 21:08:05 +0100253/* Return the second highest RT task, NULL otherwise */
Ingo Molnar79064fb2008-01-25 21:08:14 +0100254static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
Steven Rostedte8fa1362008-01-25 21:08:05 +0100255{
256 struct rt_prio_array *array = &rq->rt.active;
257 struct task_struct *next;
258 struct list_head *queue;
259 int idx;
260
261 assert_spin_locked(&rq->lock);
262
263 if (likely(rq->rt.rt_nr_running < 2))
264 return NULL;
265
266 idx = sched_find_first_bit(array->bitmap);
267 if (unlikely(idx >= MAX_RT_PRIO)) {
268 WARN_ON(1); /* rt_nr_running is bad */
269 return NULL;
270 }
271
272 queue = array->queue + idx;
Steven Rostedtf65eda42008-01-25 21:08:07 +0100273 BUG_ON(list_empty(queue));
274
Steven Rostedte8fa1362008-01-25 21:08:05 +0100275 next = list_entry(queue->next, struct task_struct, run_list);
Steven Rostedtf65eda42008-01-25 21:08:07 +0100276 if (unlikely(pick_rt_task(rq, next, cpu)))
277 goto out;
Steven Rostedte8fa1362008-01-25 21:08:05 +0100278
279 if (queue->next->next != queue) {
280 /* same prio task */
Ingo Molnar79064fb2008-01-25 21:08:14 +0100281 next = list_entry(queue->next->next, struct task_struct,
282 run_list);
Steven Rostedtf65eda42008-01-25 21:08:07 +0100283 if (pick_rt_task(rq, next, cpu))
284 goto out;
Steven Rostedte8fa1362008-01-25 21:08:05 +0100285 }
286
Steven Rostedtf65eda42008-01-25 21:08:07 +0100287 retry:
Steven Rostedte8fa1362008-01-25 21:08:05 +0100288 /* slower, but more flexible */
289 idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
Steven Rostedtf65eda42008-01-25 21:08:07 +0100290 if (unlikely(idx >= MAX_RT_PRIO))
Steven Rostedte8fa1362008-01-25 21:08:05 +0100291 return NULL;
Steven Rostedte8fa1362008-01-25 21:08:05 +0100292
293 queue = array->queue + idx;
Steven Rostedtf65eda42008-01-25 21:08:07 +0100294 BUG_ON(list_empty(queue));
Steven Rostedte8fa1362008-01-25 21:08:05 +0100295
Steven Rostedtf65eda42008-01-25 21:08:07 +0100296 list_for_each_entry(next, queue, run_list) {
297 if (pick_rt_task(rq, next, cpu))
298 goto out;
299 }
300
301 goto retry;
302
303 out:
Steven Rostedte8fa1362008-01-25 21:08:05 +0100304 return next;
305}
306
307static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
308
Gregory Haskins6e1254d2008-01-25 21:08:11 +0100309static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask)
Gregory Haskins07b40322008-01-25 21:08:10 +0100310{
Gregory Haskins6e1254d2008-01-25 21:08:11 +0100311 int lowest_prio = -1;
Steven Rostedt610bf052008-01-25 21:08:13 +0100312 int lowest_cpu = -1;
Gregory Haskins06f90db2008-01-25 21:08:13 +0100313 int count = 0;
Steven Rostedt610bf052008-01-25 21:08:13 +0100314 int cpu;
Gregory Haskins07b40322008-01-25 21:08:10 +0100315
Steven Rostedt610bf052008-01-25 21:08:13 +0100316 cpus_and(*lowest_mask, cpu_online_map, task->cpus_allowed);
Gregory Haskins07b40322008-01-25 21:08:10 +0100317
318 /*
319 * Scan each rq for the lowest prio.
320 */
Steven Rostedt610bf052008-01-25 21:08:13 +0100321 for_each_cpu_mask(cpu, *lowest_mask) {
Gregory Haskins07b40322008-01-25 21:08:10 +0100322 struct rq *rq = cpu_rq(cpu);
323
Gregory Haskins07b40322008-01-25 21:08:10 +0100324 /* We look for lowest RT prio or non-rt CPU */
325 if (rq->rt.highest_prio >= MAX_RT_PRIO) {
Steven Rostedt610bf052008-01-25 21:08:13 +0100326 /*
327 * if we already found a low RT queue
328 * and now we found this non-rt queue
329 * clear the mask and set our bit.
330 * Otherwise just return the queue as is
331 * and the count==1 will cause the algorithm
332 * to use the first bit found.
333 */
334 if (lowest_cpu != -1) {
Gregory Haskins6e1254d2008-01-25 21:08:11 +0100335 cpus_clear(*lowest_mask);
Steven Rostedt610bf052008-01-25 21:08:13 +0100336 cpu_set(rq->cpu, *lowest_mask);
337 }
Gregory Haskins6e1254d2008-01-25 21:08:11 +0100338 return 1;
Gregory Haskins07b40322008-01-25 21:08:10 +0100339 }
340
341 /* no locking for now */
Gregory Haskins6e1254d2008-01-25 21:08:11 +0100342 if ((rq->rt.highest_prio > task->prio)
343 && (rq->rt.highest_prio >= lowest_prio)) {
344 if (rq->rt.highest_prio > lowest_prio) {
345 /* new low - clear old data */
346 lowest_prio = rq->rt.highest_prio;
Steven Rostedt610bf052008-01-25 21:08:13 +0100347 lowest_cpu = cpu;
348 count = 0;
Gregory Haskins6e1254d2008-01-25 21:08:11 +0100349 }
Gregory Haskins06f90db2008-01-25 21:08:13 +0100350 count++;
Steven Rostedt610bf052008-01-25 21:08:13 +0100351 } else
352 cpu_clear(cpu, *lowest_mask);
353 }
354
355 /*
356 * Clear out all the set bits that represent
357 * runqueues that were of higher prio than
358 * the lowest_prio.
359 */
360 if (lowest_cpu > 0) {
361 /*
362 * Perhaps we could add another cpumask op to
363 * zero out bits. Like cpu_zero_bits(cpumask, nrbits);
364 * Then that could be optimized to use memset and such.
365 */
366 for_each_cpu_mask(cpu, *lowest_mask) {
367 if (cpu >= lowest_cpu)
368 break;
369 cpu_clear(cpu, *lowest_mask);
Gregory Haskins07b40322008-01-25 21:08:10 +0100370 }
371 }
372
Gregory Haskins06f90db2008-01-25 21:08:13 +0100373 return count;
Gregory Haskins6e1254d2008-01-25 21:08:11 +0100374}
375
376static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
377{
378 int first;
379
380 /* "this_cpu" is cheaper to preempt than a remote processor */
381 if ((this_cpu != -1) && cpu_isset(this_cpu, *mask))
382 return this_cpu;
383
384 first = first_cpu(*mask);
385 if (first != NR_CPUS)
386 return first;
387
388 return -1;
389}
390
391static int find_lowest_rq(struct task_struct *task)
392{
393 struct sched_domain *sd;
394 cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask);
395 int this_cpu = smp_processor_id();
396 int cpu = task_cpu(task);
Gregory Haskins06f90db2008-01-25 21:08:13 +0100397 int count = find_lowest_cpus(task, lowest_mask);
Gregory Haskins6e1254d2008-01-25 21:08:11 +0100398
Gregory Haskins06f90db2008-01-25 21:08:13 +0100399 if (!count)
400 return -1; /* No targets found */
401
402 /*
403 * There is no sense in performing an optimal search if only one
404 * target is found.
405 */
406 if (count == 1)
407 return first_cpu(*lowest_mask);
Gregory Haskins6e1254d2008-01-25 21:08:11 +0100408
409 /*
410 * At this point we have built a mask of cpus representing the
411 * lowest priority tasks in the system. Now we want to elect
412 * the best one based on our affinity and topology.
413 *
414 * We prioritize the last cpu that the task executed on since
415 * it is most likely cache-hot in that location.
416 */
417 if (cpu_isset(cpu, *lowest_mask))
418 return cpu;
419
420 /*
421 * Otherwise, we consult the sched_domains span maps to figure
422 * out which cpu is logically closest to our hot cache data.
423 */
424 if (this_cpu == cpu)
425 this_cpu = -1; /* Skip this_cpu opt if the same */
426
427 for_each_domain(cpu, sd) {
428 if (sd->flags & SD_WAKE_AFFINE) {
429 cpumask_t domain_mask;
430 int best_cpu;
431
432 cpus_and(domain_mask, sd->span, *lowest_mask);
433
434 best_cpu = pick_optimal_cpu(this_cpu,
435 &domain_mask);
436 if (best_cpu != -1)
437 return best_cpu;
438 }
439 }
440
441 /*
442 * And finally, if there were no matches within the domains
443 * just give the caller *something* to work with from the compatible
444 * locations.
445 */
446 return pick_optimal_cpu(this_cpu, lowest_mask);
Gregory Haskins07b40322008-01-25 21:08:10 +0100447}
448
Steven Rostedte8fa1362008-01-25 21:08:05 +0100449/* Will lock the rq it finds */
Ingo Molnar4df64c02008-01-25 21:08:15 +0100450static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
Steven Rostedte8fa1362008-01-25 21:08:05 +0100451{
452 struct rq *lowest_rq = NULL;
Steven Rostedte8fa1362008-01-25 21:08:05 +0100453 int tries;
Ingo Molnar4df64c02008-01-25 21:08:15 +0100454 int cpu;
Steven Rostedte8fa1362008-01-25 21:08:05 +0100455
456 for (tries = 0; tries < RT_MAX_TRIES; tries++) {
Gregory Haskins07b40322008-01-25 21:08:10 +0100457 cpu = find_lowest_rq(task);
Steven Rostedte8fa1362008-01-25 21:08:05 +0100458
Gregory Haskins2de0b462008-01-25 21:08:10 +0100459 if ((cpu == -1) || (cpu == rq->cpu))
Steven Rostedte8fa1362008-01-25 21:08:05 +0100460 break;
461
Gregory Haskins07b40322008-01-25 21:08:10 +0100462 lowest_rq = cpu_rq(cpu);
463
Steven Rostedte8fa1362008-01-25 21:08:05 +0100464 /* if the prio of this runqueue changed, try again */
Gregory Haskins07b40322008-01-25 21:08:10 +0100465 if (double_lock_balance(rq, lowest_rq)) {
Steven Rostedte8fa1362008-01-25 21:08:05 +0100466 /*
467 * We had to unlock the run queue. In
468 * the mean time, task could have
469 * migrated already or had its affinity changed.
470 * Also make sure that it wasn't scheduled on its rq.
471 */
Gregory Haskins07b40322008-01-25 21:08:10 +0100472 if (unlikely(task_rq(task) != rq ||
Ingo Molnar4df64c02008-01-25 21:08:15 +0100473 !cpu_isset(lowest_rq->cpu,
474 task->cpus_allowed) ||
Gregory Haskins07b40322008-01-25 21:08:10 +0100475 task_running(rq, task) ||
Steven Rostedte8fa1362008-01-25 21:08:05 +0100476 !task->se.on_rq)) {
Ingo Molnar4df64c02008-01-25 21:08:15 +0100477
Steven Rostedte8fa1362008-01-25 21:08:05 +0100478 spin_unlock(&lowest_rq->lock);
479 lowest_rq = NULL;
480 break;
481 }
482 }
483
484 /* If this rq is still suitable use it. */
485 if (lowest_rq->rt.highest_prio > task->prio)
486 break;
487
488 /* try again */
489 spin_unlock(&lowest_rq->lock);
490 lowest_rq = NULL;
491 }
492
493 return lowest_rq;
494}
495
496/*
497 * If the current CPU has more than one RT task, see if the non
498 * running task can migrate over to a CPU that is running a task
499 * of lesser priority.
500 */
Gregory Haskins697f0a42008-01-25 21:08:09 +0100501static int push_rt_task(struct rq *rq)
Steven Rostedte8fa1362008-01-25 21:08:05 +0100502{
503 struct task_struct *next_task;
504 struct rq *lowest_rq;
505 int ret = 0;
506 int paranoid = RT_MAX_TRIES;
507
Gregory Haskins697f0a42008-01-25 21:08:09 +0100508 assert_spin_locked(&rq->lock);
Steven Rostedte8fa1362008-01-25 21:08:05 +0100509
Gregory Haskinsa22d7fc2008-01-25 21:08:12 +0100510 if (!rq->rt.overloaded)
511 return 0;
512
Gregory Haskins697f0a42008-01-25 21:08:09 +0100513 next_task = pick_next_highest_task_rt(rq, -1);
Steven Rostedte8fa1362008-01-25 21:08:05 +0100514 if (!next_task)
515 return 0;
516
517 retry:
Gregory Haskins697f0a42008-01-25 21:08:09 +0100518 if (unlikely(next_task == rq->curr)) {
Steven Rostedtf65eda42008-01-25 21:08:07 +0100519 WARN_ON(1);
Steven Rostedte8fa1362008-01-25 21:08:05 +0100520 return 0;
Steven Rostedtf65eda42008-01-25 21:08:07 +0100521 }
Steven Rostedte8fa1362008-01-25 21:08:05 +0100522
523 /*
524 * It's possible that the next_task slipped in of
525 * higher priority than current. If that's the case
526 * just reschedule current.
527 */
Gregory Haskins697f0a42008-01-25 21:08:09 +0100528 if (unlikely(next_task->prio < rq->curr->prio)) {
529 resched_task(rq->curr);
Steven Rostedte8fa1362008-01-25 21:08:05 +0100530 return 0;
531 }
532
Gregory Haskins697f0a42008-01-25 21:08:09 +0100533 /* We might release rq lock */
Steven Rostedte8fa1362008-01-25 21:08:05 +0100534 get_task_struct(next_task);
535
536 /* find_lock_lowest_rq locks the rq if found */
Gregory Haskins697f0a42008-01-25 21:08:09 +0100537 lowest_rq = find_lock_lowest_rq(next_task, rq);
Steven Rostedte8fa1362008-01-25 21:08:05 +0100538 if (!lowest_rq) {
539 struct task_struct *task;
540 /*
Gregory Haskins697f0a42008-01-25 21:08:09 +0100541 * find lock_lowest_rq releases rq->lock
Steven Rostedte8fa1362008-01-25 21:08:05 +0100542 * so it is possible that next_task has changed.
543 * If it has, then try again.
544 */
Gregory Haskins697f0a42008-01-25 21:08:09 +0100545 task = pick_next_highest_task_rt(rq, -1);
Steven Rostedte8fa1362008-01-25 21:08:05 +0100546 if (unlikely(task != next_task) && task && paranoid--) {
547 put_task_struct(next_task);
548 next_task = task;
549 goto retry;
550 }
551 goto out;
552 }
553
554 assert_spin_locked(&lowest_rq->lock);
555
Gregory Haskins697f0a42008-01-25 21:08:09 +0100556 deactivate_task(rq, next_task, 0);
Steven Rostedte8fa1362008-01-25 21:08:05 +0100557 set_task_cpu(next_task, lowest_rq->cpu);
558 activate_task(lowest_rq, next_task, 0);
559
560 resched_task(lowest_rq->curr);
561
562 spin_unlock(&lowest_rq->lock);
563
564 ret = 1;
565out:
566 put_task_struct(next_task);
567
568 return ret;
569}
570
571/*
572 * TODO: Currently we just use the second highest prio task on
573 * the queue, and stop when it can't migrate (or there's
574 * no more RT tasks). There may be a case where a lower
575 * priority RT task has a different affinity than the
576 * higher RT task. In this case the lower RT task could
577 * possibly be able to migrate where as the higher priority
578 * RT task could not. We currently ignore this issue.
579 * Enhancements are welcome!
580 */
581static void push_rt_tasks(struct rq *rq)
582{
583 /* push_rt_task will return true if it moved an RT */
584 while (push_rt_task(rq))
585 ;
586}
587
Steven Rostedtf65eda42008-01-25 21:08:07 +0100588static int pull_rt_task(struct rq *this_rq)
589{
590 struct task_struct *next;
591 struct task_struct *p;
592 struct rq *src_rq;
593 cpumask_t *rto_cpumask;
594 int this_cpu = this_rq->cpu;
595 int cpu;
596 int ret = 0;
597
598 assert_spin_locked(&this_rq->lock);
599
600 /*
601 * If cpusets are used, and we have overlapping
602 * run queue cpusets, then this algorithm may not catch all.
603 * This is just the price you pay on trying to keep
604 * dirtying caches down on large SMP machines.
605 */
606 if (likely(!rt_overloaded()))
607 return 0;
608
609 next = pick_next_task_rt(this_rq);
610
611 rto_cpumask = rt_overload();
612
613 for_each_cpu_mask(cpu, *rto_cpumask) {
614 if (this_cpu == cpu)
615 continue;
616
617 src_rq = cpu_rq(cpu);
618 if (unlikely(src_rq->rt.rt_nr_running <= 1)) {
619 /*
620 * It is possible that overlapping cpusets
621 * will miss clearing a non overloaded runqueue.
622 * Clear it now.
623 */
624 if (double_lock_balance(this_rq, src_rq)) {
625 /* unlocked our runqueue lock */
626 struct task_struct *old_next = next;
627 next = pick_next_task_rt(this_rq);
628 if (next != old_next)
629 ret = 1;
630 }
631 if (likely(src_rq->rt.rt_nr_running <= 1))
632 /*
633 * Small chance that this_rq->curr changed
634 * but it's really harmless here.
635 */
636 rt_clear_overload(this_rq);
637 else
638 /*
639 * Heh, the src_rq is now overloaded, since
640 * we already have the src_rq lock, go straight
641 * to pulling tasks from it.
642 */
643 goto try_pulling;
644 spin_unlock(&src_rq->lock);
645 continue;
646 }
647
648 /*
649 * We can potentially drop this_rq's lock in
650 * double_lock_balance, and another CPU could
651 * steal our next task - hence we must cause
652 * the caller to recalculate the next task
653 * in that case:
654 */
655 if (double_lock_balance(this_rq, src_rq)) {
656 struct task_struct *old_next = next;
657 next = pick_next_task_rt(this_rq);
658 if (next != old_next)
659 ret = 1;
660 }
661
662 /*
663 * Are there still pullable RT tasks?
664 */
665 if (src_rq->rt.rt_nr_running <= 1) {
666 spin_unlock(&src_rq->lock);
667 continue;
668 }
669
670 try_pulling:
671 p = pick_next_highest_task_rt(src_rq, this_cpu);
672
673 /*
674 * Do we have an RT task that preempts
675 * the to-be-scheduled task?
676 */
677 if (p && (!next || (p->prio < next->prio))) {
678 WARN_ON(p == src_rq->curr);
679 WARN_ON(!p->se.on_rq);
680
681 /*
682 * There's a chance that p is higher in priority
683 * than what's currently running on its cpu.
684 * This is just that p is wakeing up and hasn't
685 * had a chance to schedule. We only pull
686 * p if it is lower in priority than the
687 * current task on the run queue or
688 * this_rq next task is lower in prio than
689 * the current task on that rq.
690 */
691 if (p->prio < src_rq->curr->prio ||
692 (next && next->prio < src_rq->curr->prio))
693 goto bail;
694
695 ret = 1;
696
697 deactivate_task(src_rq, p, 0);
698 set_task_cpu(p, this_cpu);
699 activate_task(this_rq, p, 0);
700 /*
701 * We continue with the search, just in
702 * case there's an even higher prio task
703 * in another runqueue. (low likelyhood
704 * but possible)
705 */
706
707 /*
708 * Update next so that we won't pick a task
709 * on another cpu with a priority lower (or equal)
710 * than the one we just picked.
711 */
712 next = p;
713
714 }
715 bail:
716 spin_unlock(&src_rq->lock);
717 }
718
719 return ret;
720}
721
722static void schedule_balance_rt(struct rq *rq,
723 struct task_struct *prev)
724{
725 /* Try to pull RT tasks here if we lower this rq's prio */
726 if (unlikely(rt_task(prev)) &&
727 rq->rt.highest_prio > prev->prio)
728 pull_rt_task(rq);
729}
730
Steven Rostedte8fa1362008-01-25 21:08:05 +0100731static void schedule_tail_balance_rt(struct rq *rq)
732{
733 /*
734 * If we have more than one rt_task queued, then
735 * see if we can push the other rt_tasks off to other CPUS.
736 * Note we may release the rq lock, and since
737 * the lock was owned by prev, we need to release it
738 * first via finish_lock_switch and then reaquire it here.
739 */
Gregory Haskinsa22d7fc2008-01-25 21:08:12 +0100740 if (unlikely(rq->rt.overloaded)) {
Steven Rostedte8fa1362008-01-25 21:08:05 +0100741 spin_lock_irq(&rq->lock);
742 push_rt_tasks(rq);
743 spin_unlock_irq(&rq->lock);
744 }
745}
746
Steven Rostedt4642daf2008-01-25 21:08:07 +0100747
748static void wakeup_balance_rt(struct rq *rq, struct task_struct *p)
749{
750 if (unlikely(rt_task(p)) &&
751 !task_running(rq, p) &&
Gregory Haskinsa22d7fc2008-01-25 21:08:12 +0100752 (p->prio >= rq->rt.highest_prio) &&
753 rq->rt.overloaded)
Steven Rostedt4642daf2008-01-25 21:08:07 +0100754 push_rt_tasks(rq);
755}
756
Peter Williams43010652007-08-09 11:16:46 +0200757static unsigned long
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200758load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
Peter Williamse1d14842007-10-24 18:23:51 +0200759 unsigned long max_load_move,
760 struct sched_domain *sd, enum cpu_idle_type idle,
761 int *all_pinned, int *this_best_prio)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200762{
Steven Rostedtc7a1e462008-01-25 21:08:07 +0100763 /* don't touch RT tasks */
764 return 0;
Peter Williamse1d14842007-10-24 18:23:51 +0200765}
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200766
Peter Williamse1d14842007-10-24 18:23:51 +0200767static int
768move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
769 struct sched_domain *sd, enum cpu_idle_type idle)
770{
Steven Rostedtc7a1e462008-01-25 21:08:07 +0100771 /* don't touch RT tasks */
772 return 0;
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200773}
Ingo Molnardeeeccd2008-01-25 21:08:15 +0100774
Gregory Haskins73fe6aa2008-01-25 21:08:07 +0100775static void set_cpus_allowed_rt(struct task_struct *p, cpumask_t *new_mask)
776{
777 int weight = cpus_weight(*new_mask);
778
779 BUG_ON(!rt_task(p));
780
781 /*
782 * Update the migration status of the RQ if we have an RT task
783 * which is running AND changing its weight value.
784 */
785 if (p->se.on_rq && (weight != p->nr_cpus_allowed)) {
786 struct rq *rq = task_rq(p);
787
Ingo Molnardeeeccd2008-01-25 21:08:15 +0100788 if ((p->nr_cpus_allowed <= 1) && (weight > 1)) {
Gregory Haskins73fe6aa2008-01-25 21:08:07 +0100789 rq->rt.rt_nr_migratory++;
Ingo Molnardeeeccd2008-01-25 21:08:15 +0100790 } else if ((p->nr_cpus_allowed > 1) && (weight <= 1)) {
Gregory Haskins73fe6aa2008-01-25 21:08:07 +0100791 BUG_ON(!rq->rt.rt_nr_migratory);
792 rq->rt.rt_nr_migratory--;
793 }
794
795 update_rt_migration(rq);
796 }
797
798 p->cpus_allowed = *new_mask;
799 p->nr_cpus_allowed = weight;
800}
Ingo Molnardeeeccd2008-01-25 21:08:15 +0100801
Steven Rostedte8fa1362008-01-25 21:08:05 +0100802#else /* CONFIG_SMP */
803# define schedule_tail_balance_rt(rq) do { } while (0)
Steven Rostedtf65eda42008-01-25 21:08:07 +0100804# define schedule_balance_rt(rq, prev) do { } while (0)
Steven Rostedt4642daf2008-01-25 21:08:07 +0100805# define wakeup_balance_rt(rq, p) do { } while (0)
Steven Rostedte8fa1362008-01-25 21:08:05 +0100806#endif /* CONFIG_SMP */
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200807
808static void task_tick_rt(struct rq *rq, struct task_struct *p)
809{
Peter Zijlstra67e2be02007-12-20 15:01:17 +0100810 update_curr_rt(rq);
811
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200812 /*
813 * RR tasks need a special form of timeslice management.
814 * FIFO tasks have no timeslices.
815 */
816 if (p->policy != SCHED_RR)
817 return;
818
819 if (--p->time_slice)
820 return;
821
Dmitry Adamushkoa4ec24b2007-10-15 17:00:13 +0200822 p->time_slice = DEF_TIMESLICE;
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200823
Dmitry Adamushko98fbc792007-08-24 20:39:10 +0200824 /*
825 * Requeue to the end of queue if we are not the only element
826 * on the queue:
827 */
828 if (p->run_list.prev != p->run_list.next) {
829 requeue_task_rt(rq, p);
830 set_tsk_need_resched(p);
831 }
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200832}
833
Srivatsa Vaddagiri83b699e2007-10-15 17:00:08 +0200834static void set_curr_task_rt(struct rq *rq)
835{
836 struct task_struct *p = rq->curr;
837
838 p->se.exec_start = rq->clock;
839}
840
Ingo Molnar5522d5d2007-10-15 17:00:12 +0200841const struct sched_class rt_sched_class = {
842 .next = &fair_sched_class,
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200843 .enqueue_task = enqueue_task_rt,
844 .dequeue_task = dequeue_task_rt,
845 .yield_task = yield_task_rt,
Gregory Haskinse7693a32008-01-25 21:08:09 +0100846#ifdef CONFIG_SMP
847 .select_task_rq = select_task_rq_rt,
848#endif /* CONFIG_SMP */
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200849
850 .check_preempt_curr = check_preempt_curr_rt,
851
852 .pick_next_task = pick_next_task_rt,
853 .put_prev_task = put_prev_task_rt,
854
Peter Williams681f3e62007-10-24 18:23:51 +0200855#ifdef CONFIG_SMP
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200856 .load_balance = load_balance_rt,
Peter Williamse1d14842007-10-24 18:23:51 +0200857 .move_one_task = move_one_task_rt,
Gregory Haskins73fe6aa2008-01-25 21:08:07 +0100858 .set_cpus_allowed = set_cpus_allowed_rt,
Peter Williams681f3e62007-10-24 18:23:51 +0200859#endif
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200860
Srivatsa Vaddagiri83b699e2007-10-15 17:00:08 +0200861 .set_curr_task = set_curr_task_rt,
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200862 .task_tick = task_tick_rt,
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200863};