blob: fdd151cef9b11432c2b68d4eb5344054391e609d [file] [log] [blame]
Suresh Siddha61c46282008-03-10 15:28:04 -07001#include <linux/errno.h>
2#include <linux/kernel.h>
3#include <linux/mm.h>
4#include <linux/smp.h>
Jeremy Fitzhardinge389d1fb2009-02-27 13:25:28 -08005#include <linux/prctl.h>
Suresh Siddha61c46282008-03-10 15:28:04 -07006#include <linux/slab.h>
7#include <linux/sched.h>
Peter Zijlstra7f424a82008-04-25 17:39:01 +02008#include <linux/module.h>
9#include <linux/pm.h>
Thomas Gleixneraa276e12008-06-09 19:15:00 +020010#include <linux/clockchips.h>
Amerigo Wang9d62dcd2009-05-11 22:05:28 -040011#include <linux/random.h>
Avi Kivity7c68af62009-09-19 09:40:22 +030012#include <linux/user-return-notifier.h>
Andy Isaacson814e2c82009-12-08 00:29:42 -080013#include <linux/dmi.h>
14#include <linux/utsname.h>
Richard Weinberger90e24012012-03-25 23:00:04 +020015#include <linux/stackprotector.h>
16#include <linux/tick.h>
17#include <linux/cpuidle.h>
Arjan van de Ven61613522009-09-17 16:11:28 +020018#include <trace/events/power.h>
Frederic Weisbecker24f1e32c2009-09-09 19:22:48 +020019#include <linux/hw_breakpoint.h>
Borislav Petkov93789b32011-01-20 15:42:52 +010020#include <asm/cpu.h>
Ivan Vecerad3ec5ca2008-11-11 14:33:44 +010021#include <asm/apic.h>
Jaswinder Singh Rajput2c1b2842009-04-11 00:03:10 +053022#include <asm/syscalls.h>
Jeremy Fitzhardinge389d1fb2009-02-27 13:25:28 -080023#include <asm/idle.h>
24#include <asm/uaccess.h>
25#include <asm/i387.h>
Linus Torvalds1361b832012-02-21 13:19:22 -080026#include <asm/fpu-internal.h>
K.Prasad66cb5912009-06-01 23:44:55 +053027#include <asm/debugreg.h>
Richard Weinberger90e24012012-03-25 23:00:04 +020028#include <asm/nmi.h>
29
30#ifdef CONFIG_X86_64
31static DEFINE_PER_CPU(unsigned char, is_idle);
Richard Weinberger90e24012012-03-25 23:00:04 +020032#endif
Zhao Yakuic1e3b372008-06-24 17:58:53 +080033
Suresh Siddhaaa283f42008-03-10 15:28:05 -070034struct kmem_cache *task_xstate_cachep;
Sheng Yang5ee481d2010-05-17 17:22:23 +080035EXPORT_SYMBOL_GPL(task_xstate_cachep);
Suresh Siddha61c46282008-03-10 15:28:04 -070036
37int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
38{
Avi Kivity86603282010-05-06 11:45:46 +030039 int ret;
40
Suresh Siddha61c46282008-03-10 15:28:04 -070041 *dst = *src;
Avi Kivity86603282010-05-06 11:45:46 +030042 if (fpu_allocated(&src->thread.fpu)) {
43 memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
44 ret = fpu_alloc(&dst->thread.fpu);
45 if (ret)
46 return ret;
47 fpu_copy(&dst->thread.fpu, &src->thread.fpu);
Suresh Siddhaaa283f42008-03-10 15:28:05 -070048 }
Suresh Siddha61c46282008-03-10 15:28:04 -070049 return 0;
50}
51
Suresh Siddhaaa283f42008-03-10 15:28:05 -070052void free_thread_xstate(struct task_struct *tsk)
53{
Avi Kivity86603282010-05-06 11:45:46 +030054 fpu_free(&tsk->thread.fpu);
Suresh Siddhaaa283f42008-03-10 15:28:05 -070055}
56
Suresh Siddha61c46282008-03-10 15:28:04 -070057void free_thread_info(struct thread_info *ti)
58{
Suresh Siddhaaa283f42008-03-10 15:28:05 -070059 free_thread_xstate(ti->task);
Zhao Jinc812d8f2011-08-20 21:24:57 +080060 free_pages((unsigned long)ti, THREAD_ORDER);
Suresh Siddha61c46282008-03-10 15:28:04 -070061}
62
63void arch_task_cache_init(void)
64{
65 task_xstate_cachep =
66 kmem_cache_create("task_xstate", xstate_size,
67 __alignof__(union thread_xstate),
Vegard Nossum2dff4402008-05-31 15:56:17 +020068 SLAB_PANIC | SLAB_NOTRACK, NULL);
Suresh Siddha61c46282008-03-10 15:28:04 -070069}
Peter Zijlstra7f424a82008-04-25 17:39:01 +020070
Thomas Gleixner00dba562008-06-09 18:35:28 +020071/*
Jeremy Fitzhardinge389d1fb2009-02-27 13:25:28 -080072 * Free current thread data structures etc..
73 */
74void exit_thread(void)
75{
76 struct task_struct *me = current;
77 struct thread_struct *t = &me->thread;
Thomas Gleixner250981e2009-03-16 13:07:21 +010078 unsigned long *bp = t->io_bitmap_ptr;
Jeremy Fitzhardinge389d1fb2009-02-27 13:25:28 -080079
Thomas Gleixner250981e2009-03-16 13:07:21 +010080 if (bp) {
Jeremy Fitzhardinge389d1fb2009-02-27 13:25:28 -080081 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
82
Jeremy Fitzhardinge389d1fb2009-02-27 13:25:28 -080083 t->io_bitmap_ptr = NULL;
84 clear_thread_flag(TIF_IO_BITMAP);
85 /*
86 * Careful, clear this in the TSS too:
87 */
88 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
89 t->io_bitmap_max = 0;
90 put_cpu();
Thomas Gleixner250981e2009-03-16 13:07:21 +010091 kfree(bp);
Jeremy Fitzhardinge389d1fb2009-02-27 13:25:28 -080092 }
Jeremy Fitzhardinge389d1fb2009-02-27 13:25:28 -080093}
94
Brian Gerst3bef4442010-01-13 10:45:55 -050095void show_regs(struct pt_regs *regs)
96{
97 show_registers(regs);
Namhyung Kime8e999c2011-03-18 11:40:06 +090098 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), 0);
Brian Gerst3bef4442010-01-13 10:45:55 -050099}
100
Andy Isaacson814e2c82009-12-08 00:29:42 -0800101void show_regs_common(void)
102{
Naga Chumbalkar84e383b2011-02-14 22:47:17 +0000103 const char *vendor, *product, *board;
Andy Isaacson814e2c82009-12-08 00:29:42 -0800104
Naga Chumbalkar84e383b2011-02-14 22:47:17 +0000105 vendor = dmi_get_system_info(DMI_SYS_VENDOR);
106 if (!vendor)
107 vendor = "";
Andy Isaacsona1884b82009-12-08 00:30:21 -0800108 product = dmi_get_system_info(DMI_PRODUCT_NAME);
109 if (!product)
110 product = "";
Andy Isaacson814e2c82009-12-08 00:29:42 -0800111
Naga Chumbalkar84e383b2011-02-14 22:47:17 +0000112 /* Board Name is optional */
113 board = dmi_get_system_info(DMI_BOARD_NAME);
114
Pekka Enbergd015a092009-12-28 10:26:59 +0200115 printk(KERN_CONT "\n");
Naga Chumbalkar84e383b2011-02-14 22:47:17 +0000116 printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s",
Andy Isaacson814e2c82009-12-08 00:29:42 -0800117 current->pid, current->comm, print_tainted(),
118 init_utsname()->release,
119 (int)strcspn(init_utsname()->version, " "),
Naga Chumbalkar84e383b2011-02-14 22:47:17 +0000120 init_utsname()->version);
Jan Beulichfd8fa4d32011-02-17 15:56:58 +0000121 printk(KERN_CONT " %s %s", vendor, product);
122 if (board)
123 printk(KERN_CONT "/%s", board);
Naga Chumbalkar84e383b2011-02-14 22:47:17 +0000124 printk(KERN_CONT "\n");
Andy Isaacson814e2c82009-12-08 00:29:42 -0800125}
126
Jeremy Fitzhardinge389d1fb2009-02-27 13:25:28 -0800127void flush_thread(void)
128{
129 struct task_struct *tsk = current;
130
Frederic Weisbecker24f1e32c2009-09-09 19:22:48 +0200131 flush_ptrace_hw_breakpoint(tsk);
Jeremy Fitzhardinge389d1fb2009-02-27 13:25:28 -0800132 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
133 /*
134 * Forget coprocessor state..
135 */
136 tsk->fpu_counter = 0;
137 clear_fpu(tsk);
138 clear_used_math();
139}
140
141static void hard_disable_TSC(void)
142{
143 write_cr4(read_cr4() | X86_CR4_TSD);
144}
145
146void disable_TSC(void)
147{
148 preempt_disable();
149 if (!test_and_set_thread_flag(TIF_NOTSC))
150 /*
151 * Must flip the CPU state synchronously with
152 * TIF_NOTSC in the current running context.
153 */
154 hard_disable_TSC();
155 preempt_enable();
156}
157
158static void hard_enable_TSC(void)
159{
160 write_cr4(read_cr4() & ~X86_CR4_TSD);
161}
162
163static void enable_TSC(void)
164{
165 preempt_disable();
166 if (test_and_clear_thread_flag(TIF_NOTSC))
167 /*
168 * Must flip the CPU state synchronously with
169 * TIF_NOTSC in the current running context.
170 */
171 hard_enable_TSC();
172 preempt_enable();
173}
174
175int get_tsc_mode(unsigned long adr)
176{
177 unsigned int val;
178
179 if (test_thread_flag(TIF_NOTSC))
180 val = PR_TSC_SIGSEGV;
181 else
182 val = PR_TSC_ENABLE;
183
184 return put_user(val, (unsigned int __user *)adr);
185}
186
187int set_tsc_mode(unsigned int val)
188{
189 if (val == PR_TSC_SIGSEGV)
190 disable_TSC();
191 else if (val == PR_TSC_ENABLE)
192 enable_TSC();
193 else
194 return -EINVAL;
195
196 return 0;
197}
198
199void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
200 struct tss_struct *tss)
201{
202 struct thread_struct *prev, *next;
203
204 prev = &prev_p->thread;
205 next = &next_p->thread;
206
Peter Zijlstraea8e61b2010-03-25 14:51:51 +0100207 if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
208 test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
209 unsigned long debugctl = get_debugctlmsr();
210
211 debugctl &= ~DEBUGCTLMSR_BTF;
212 if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
213 debugctl |= DEBUGCTLMSR_BTF;
214
215 update_debugctlmsr(debugctl);
216 }
Jeremy Fitzhardinge389d1fb2009-02-27 13:25:28 -0800217
Jeremy Fitzhardinge389d1fb2009-02-27 13:25:28 -0800218 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
219 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
220 /* prev and next are different */
221 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
222 hard_disable_TSC();
223 else
224 hard_enable_TSC();
225 }
226
227 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
228 /*
229 * Copy the relevant range of the IO bitmap.
230 * Normally this is 128 bytes or less:
231 */
232 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
233 max(prev->io_bitmap_max, next->io_bitmap_max));
234 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
235 /*
236 * Clear any possible leftover bits:
237 */
238 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
239 }
Avi Kivity7c68af62009-09-19 09:40:22 +0300240 propagate_user_return_notify(prev_p, next_p);
Jeremy Fitzhardinge389d1fb2009-02-27 13:25:28 -0800241}
242
243int sys_fork(struct pt_regs *regs)
244{
245 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
246}
247
248/*
249 * This is trivial, and on the face of it looks like it
250 * could equally well be done in user mode.
251 *
252 * Not so, for quite unobvious reasons - register pressure.
253 * In user mode vfork() cannot have a stack frame, and if
254 * done by calling the "clone()" system call directly, you
255 * do not have enough call-clobbered registers to hold all
256 * the information you need.
257 */
258int sys_vfork(struct pt_regs *regs)
259{
260 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
261 NULL, NULL);
262}
263
Brian Gerstf839bbc2009-12-09 19:01:56 -0500264long
265sys_clone(unsigned long clone_flags, unsigned long newsp,
266 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
267{
268 if (!newsp)
269 newsp = regs->sp;
270 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
271}
272
Brian Gerstdf59e7b2009-12-09 12:34:44 -0500273/*
274 * This gets run with %si containing the
275 * function to call, and %di containing
276 * the "args".
277 */
278extern void kernel_thread_helper(void);
279
280/*
281 * Create a kernel thread
282 */
283int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
284{
285 struct pt_regs regs;
286
287 memset(&regs, 0, sizeof(regs));
288
289 regs.si = (unsigned long) fn;
290 regs.di = (unsigned long) arg;
291
292#ifdef CONFIG_X86_32
293 regs.ds = __USER_DS;
294 regs.es = __USER_DS;
295 regs.fs = __KERNEL_PERCPU;
296 regs.gs = __KERNEL_STACK_CANARY;
Cyrill Gorcunov864a0922010-01-13 10:16:07 +0000297#else
298 regs.ss = __KERNEL_DS;
Brian Gerstdf59e7b2009-12-09 12:34:44 -0500299#endif
300
301 regs.orig_ax = -1;
302 regs.ip = (unsigned long) kernel_thread_helper;
303 regs.cs = __KERNEL_CS | get_kernel_rpl();
Seiichi Ikarashi1cf83432011-12-06 17:58:14 +0900304 regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
Brian Gerstdf59e7b2009-12-09 12:34:44 -0500305
306 /* Ok, create the new process.. */
307 return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
308}
309EXPORT_SYMBOL(kernel_thread);
Jeremy Fitzhardinge389d1fb2009-02-27 13:25:28 -0800310
311/*
Brian Gerst11cf88b2009-12-09 19:01:53 -0500312 * sys_execve() executes a new program.
313 */
David Howellsd7627462010-08-17 23:52:56 +0100314long sys_execve(const char __user *name,
315 const char __user *const __user *argv,
316 const char __user *const __user *envp, struct pt_regs *regs)
Brian Gerst11cf88b2009-12-09 19:01:53 -0500317{
318 long error;
319 char *filename;
320
321 filename = getname(name);
322 error = PTR_ERR(filename);
323 if (IS_ERR(filename))
324 return error;
325 error = do_execve(filename, argv, envp, regs);
326
327#ifdef CONFIG_X86_32
328 if (error == 0) {
329 /* Make sure we don't return using sysenter.. */
330 set_thread_flag(TIF_IRET);
331 }
332#endif
333
334 putname(filename);
335 return error;
336}
Thomas Gleixner09fd4b42008-06-09 18:04:27 +0200337
338/*
Thomas Gleixnere9623b32008-05-16 22:55:26 +0200339 * Idle related variables and functions
340 */
Thomas Renningerd1896042010-11-03 17:06:14 +0100341unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200342EXPORT_SYMBOL(boot_option_idle_override);
343
344/*
345 * Powermanagement idle function, if any..
346 */
347void (*pm_idle)(void);
Andy Whitcroft60b8b1d2011-06-14 12:45:10 -0700348#ifdef CONFIG_APM_MODULE
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200349EXPORT_SYMBOL(pm_idle);
Len Brown06ae40c2011-04-01 15:28:09 -0400350#endif
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200351
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200352static inline int hlt_use_halt(void)
353{
354 return 1;
355}
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200356
Richard Weinberger90e24012012-03-25 23:00:04 +0200357#ifndef CONFIG_SMP
358static inline void play_dead(void)
359{
360 BUG();
361}
362#endif
363
364#ifdef CONFIG_X86_64
365void enter_idle(void)
366{
367 percpu_write(is_idle, 1);
Todd Poynora12d5912011-06-15 17:21:57 -0700368 idle_notifier_call_chain(IDLE_START);
Richard Weinberger90e24012012-03-25 23:00:04 +0200369}
370
371static void __exit_idle(void)
372{
373 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
374 return;
Todd Poynora12d5912011-06-15 17:21:57 -0700375 idle_notifier_call_chain(IDLE_END);
Richard Weinberger90e24012012-03-25 23:00:04 +0200376}
377
378/* Called from interrupts to signify idle end */
379void exit_idle(void)
380{
381 /* idle loop has pid 0 */
382 if (current->pid)
383 return;
384 __exit_idle();
385}
386#endif
387
388/*
389 * The idle thread. There's no useful work to be
390 * done, so just try to conserve power and have a
391 * low exit latency (ie sit in a loop waiting for
392 * somebody to say that they'd like to reschedule)
393 */
394void cpu_idle(void)
395{
396 /*
397 * If we're the non-boot CPU, nothing set the stack canary up
398 * for us. CPU0 already has it initialized but no harm in
399 * doing it again. This is a good place for updating it, as
400 * we wont ever return from this function (so the invalid
401 * canaries already on the stack wont ever trigger).
402 */
403 boot_init_stack_canary();
404 current_thread_info()->status |= TS_POLLING;
405
406 while (1) {
407 tick_nohz_idle_enter();
408
409 while (!need_resched()) {
410 rmb();
411
412 if (cpu_is_offline(smp_processor_id()))
413 play_dead();
414
415 /*
416 * Idle routines should keep interrupts disabled
417 * from here on, until they go to idle.
418 * Otherwise, idle callbacks can misfire.
419 */
420 local_touch_nmi();
421 local_irq_disable();
422
423 enter_idle();
424
425 /* Don't trace irqs off for idle */
426 stop_critical_timings();
427
428 /* enter_idle() needs rcu for notifiers */
429 rcu_idle_enter();
430
431 if (cpuidle_idle_call())
432 pm_idle();
433
434 rcu_idle_exit();
435 start_critical_timings();
436
437 /* In many cases the interrupt that ended idle
438 has already called exit_idle. But some idle
439 loops can be woken up without interrupt. */
440 __exit_idle();
441 }
442
443 tick_nohz_idle_exit();
444 preempt_enable_no_resched();
445 schedule();
446 preempt_disable();
447 }
448}
449
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200450/*
451 * We use this if we don't have any better
452 * idle routine..
453 */
454void default_idle(void)
455{
456 if (hlt_use_halt()) {
Steven Rostedt48454652012-02-07 09:40:30 -0500457 trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
458 trace_cpu_idle_rcuidle(1, smp_processor_id());
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200459 current_thread_info()->status &= ~TS_POLLING;
460 /*
461 * TS_POLLING-cleared state must be visible before we
462 * test NEED_RESCHED:
463 */
464 smp_mb();
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200465
466 if (!need_resched())
467 safe_halt(); /* enables interrupts racelessly */
468 else
469 local_irq_enable();
470 current_thread_info()->status |= TS_POLLING;
Steven Rostedt48454652012-02-07 09:40:30 -0500471 trace_power_end_rcuidle(smp_processor_id());
472 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200473 } else {
474 local_irq_enable();
475 /* loop is done by the caller */
476 cpu_relax();
Thomas Gleixnere9623b32008-05-16 22:55:26 +0200477 }
478}
Andy Whitcroft60b8b1d2011-06-14 12:45:10 -0700479#ifdef CONFIG_APM_MODULE
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200480EXPORT_SYMBOL(default_idle);
481#endif
482
Konrad Rzeszutek Wilke5fd47b2011-11-21 18:02:02 -0500483bool set_pm_idle_to_default(void)
484{
485 bool ret = !!pm_idle;
486
487 pm_idle = default_idle;
488
489 return ret;
490}
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200491void stop_this_cpu(void *dummy)
492{
493 local_irq_disable();
494 /*
495 * Remove this CPU:
496 */
Rusty Russell4f062892009-03-13 14:49:54 +1030497 set_cpu_online(smp_processor_id(), false);
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200498 disable_local_APIC();
499
500 for (;;) {
501 if (hlt_works(smp_processor_id()))
502 halt();
503 }
504}
505
506static void do_nothing(void *unused)
507{
508}
509
510/*
511 * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
512 * pm_idle and update to new pm_idle value. Required while changing pm_idle
513 * handler on SMP systems.
514 *
515 * Caller must have changed pm_idle to the new value before the call. Old
516 * pm_idle value will not be used by any CPU after the return of this function.
517 */
518void cpu_idle_wait(void)
519{
520 smp_mb();
521 /* kick all the CPUs so that they exit out of pm_idle */
522 smp_call_function(do_nothing, NULL, 1);
523}
524EXPORT_SYMBOL_GPL(cpu_idle_wait);
525
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200526/* Default MONITOR/MWAIT with no hints, used for default C1 state */
527static void mwait_idle(void)
528{
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200529 if (!need_resched()) {
Steven Rostedt48454652012-02-07 09:40:30 -0500530 trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
531 trace_cpu_idle_rcuidle(1, smp_processor_id());
Christoph Lameter349c0042011-03-12 12:50:10 +0100532 if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200533 clflush((void *)&current_thread_info()->flags);
534
535 __monitor((void *)&current_thread_info()->flags, 0, 0);
536 smp_mb();
537 if (!need_resched())
538 __sti_mwait(0, 0);
539 else
540 local_irq_enable();
Steven Rostedt48454652012-02-07 09:40:30 -0500541 trace_power_end_rcuidle(smp_processor_id());
542 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200543 } else
544 local_irq_enable();
545}
546
547/*
548 * On SMP it's slightly faster (but much more power-consuming!)
549 * to poll the ->work.need_resched flag instead of waiting for the
550 * cross-CPU IPI to arrive. Use this option with caution.
551 */
Thomas Gleixnere9623b32008-05-16 22:55:26 +0200552static void poll_idle(void)
553{
Steven Rostedt48454652012-02-07 09:40:30 -0500554 trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id());
555 trace_cpu_idle_rcuidle(0, smp_processor_id());
Thomas Gleixnere9623b32008-05-16 22:55:26 +0200556 local_irq_enable();
557 while (!need_resched())
558 cpu_relax();
Steven Rostedt48454652012-02-07 09:40:30 -0500559 trace_power_end_rcuidle(smp_processor_id());
560 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200561}
562
563/*
564 * mwait selection logic:
565 *
566 * It depends on the CPU. For AMD CPUs that support MWAIT this is
567 * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings
568 * then depend on a clock divisor and current Pstate of the core. If
Thomas Gleixnere9623b32008-05-16 22:55:26 +0200569 * all cores of a processor are in halt state (C1) the processor can
570 * enter the C1E (C1 enhanced) state. If mwait is used this will never
571 * happen.
572 *
573 * idle=mwait overrides this decision and forces the usage of mwait.
574 */
Thomas Gleixnere9623b32008-05-16 22:55:26 +0200575
576#define MWAIT_INFO 0x05
577#define MWAIT_ECX_EXTENDED_INFO 0x01
578#define MWAIT_EDX_C1 0xf0
579
Borislav Petkov1c9d16e2011-02-11 18:17:54 +0100580int mwait_usable(const struct cpuinfo_x86 *c)
Thomas Gleixnere9623b32008-05-16 22:55:26 +0200581{
Thomas Gleixner09fd4b42008-06-09 18:04:27 +0200582 u32 eax, ebx, ecx, edx;
583
Thomas Renningerd1896042010-11-03 17:06:14 +0100584 if (boot_option_idle_override == IDLE_FORCE_MWAIT)
Thomas Gleixnere9623b32008-05-16 22:55:26 +0200585 return 1;
586
Thomas Gleixner09fd4b42008-06-09 18:04:27 +0200587 if (c->cpuid_level < MWAIT_INFO)
588 return 0;
589
590 cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx);
591 /* Check, whether EDX has extended info about MWAIT */
592 if (!(ecx & MWAIT_ECX_EXTENDED_INFO))
593 return 1;
594
595 /*
596 * edx enumeratios MONITOR/MWAIT extensions. Check, whether
597 * C1 supports MWAIT
598 */
599 return (edx & MWAIT_EDX_C1);
Thomas Gleixnere9623b32008-05-16 22:55:26 +0200600}
601
Len Brown02c68a02011-04-01 16:59:53 -0400602bool amd_e400_c1e_detected;
603EXPORT_SYMBOL(amd_e400_c1e_detected);
Thomas Gleixneraa276e12008-06-09 19:15:00 +0200604
Len Brown02c68a02011-04-01 16:59:53 -0400605static cpumask_var_t amd_e400_c1e_mask;
Thomas Gleixner4faac972008-09-22 18:54:29 +0200606
Len Brown02c68a02011-04-01 16:59:53 -0400607void amd_e400_remove_cpu(int cpu)
Thomas Gleixner4faac972008-09-22 18:54:29 +0200608{
Len Brown02c68a02011-04-01 16:59:53 -0400609 if (amd_e400_c1e_mask != NULL)
610 cpumask_clear_cpu(cpu, amd_e400_c1e_mask);
Thomas Gleixner4faac972008-09-22 18:54:29 +0200611}
612
Thomas Gleixneraa276e12008-06-09 19:15:00 +0200613/*
Len Brown02c68a02011-04-01 16:59:53 -0400614 * AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt
Thomas Gleixneraa276e12008-06-09 19:15:00 +0200615 * pending message MSR. If we detect C1E, then we handle it the same
616 * way as C3 power states (local apic timer and TSC stop)
617 */
Len Brown02c68a02011-04-01 16:59:53 -0400618static void amd_e400_idle(void)
Thomas Gleixneraa276e12008-06-09 19:15:00 +0200619{
Thomas Gleixneraa276e12008-06-09 19:15:00 +0200620 if (need_resched())
621 return;
622
Len Brown02c68a02011-04-01 16:59:53 -0400623 if (!amd_e400_c1e_detected) {
Thomas Gleixneraa276e12008-06-09 19:15:00 +0200624 u32 lo, hi;
625
626 rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
Michal Schmidte8c534e2010-07-27 18:53:35 +0200627
Thomas Gleixneraa276e12008-06-09 19:15:00 +0200628 if (lo & K8_INTP_C1E_ACTIVE_MASK) {
Len Brown02c68a02011-04-01 16:59:53 -0400629 amd_e400_c1e_detected = true;
Venki Pallipadi40fb1712008-11-17 16:11:37 -0800630 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
Andreas Herrmann09bfeea2008-09-18 21:12:10 +0200631 mark_tsc_unstable("TSC halt in AMD C1E");
632 printk(KERN_INFO "System has AMD C1E enabled\n");
Thomas Gleixneraa276e12008-06-09 19:15:00 +0200633 }
634 }
635
Len Brown02c68a02011-04-01 16:59:53 -0400636 if (amd_e400_c1e_detected) {
Thomas Gleixneraa276e12008-06-09 19:15:00 +0200637 int cpu = smp_processor_id();
638
Len Brown02c68a02011-04-01 16:59:53 -0400639 if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) {
640 cpumask_set_cpu(cpu, amd_e400_c1e_mask);
Thomas Gleixner0beefa22008-06-17 09:12:03 +0200641 /*
Suresh Siddhaf833bab2009-08-17 14:34:59 -0700642 * Force broadcast so ACPI can not interfere.
Thomas Gleixner0beefa22008-06-17 09:12:03 +0200643 */
Thomas Gleixneraa276e12008-06-09 19:15:00 +0200644 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
645 &cpu);
646 printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
647 cpu);
648 }
649 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
Thomas Gleixner0beefa22008-06-17 09:12:03 +0200650
Thomas Gleixneraa276e12008-06-09 19:15:00 +0200651 default_idle();
Thomas Gleixner0beefa22008-06-17 09:12:03 +0200652
653 /*
654 * The switch back from broadcast mode needs to be
655 * called with interrupts disabled.
656 */
657 local_irq_disable();
658 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
659 local_irq_enable();
Thomas Gleixneraa276e12008-06-09 19:15:00 +0200660 } else
661 default_idle();
662}
663
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200664void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
665{
Ingo Molnar3e5095d2009-01-27 17:07:08 +0100666#ifdef CONFIG_SMP
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200667 if (pm_idle == poll_idle && smp_num_siblings > 1) {
Mike Travisd6dd6922010-03-05 13:10:38 -0600668 printk_once(KERN_WARNING "WARNING: polling idle and HT enabled,"
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200669 " performance may degrade.\n");
670 }
671#endif
Thomas Gleixner6ddd2a22008-06-09 16:59:53 +0200672 if (pm_idle)
673 return;
674
Thomas Gleixnere9623b32008-05-16 22:55:26 +0200675 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200676 /*
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200677 * One CPU supports mwait => All CPUs supports mwait
678 */
Thomas Gleixner6ddd2a22008-06-09 16:59:53 +0200679 printk(KERN_INFO "using mwait in idle threads.\n");
680 pm_idle = mwait_idle;
Hans Rosenfeld9d8888c2010-07-28 19:09:31 +0200681 } else if (cpu_has_amd_erratum(amd_erratum_400)) {
682 /* E400: APIC timer interrupt does not wake up CPU from C1e */
Len Brown02c68a02011-04-01 16:59:53 -0400683 printk(KERN_INFO "using AMD E400 aware idle routine\n");
684 pm_idle = amd_e400_idle;
Thomas Gleixner6ddd2a22008-06-09 16:59:53 +0200685 } else
686 pm_idle = default_idle;
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200687}
688
Len Brown02c68a02011-04-01 16:59:53 -0400689void __init init_amd_e400_c1e_mask(void)
Rusty Russell30e1e6d2009-03-17 14:50:34 +1030690{
Len Brown02c68a02011-04-01 16:59:53 -0400691 /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */
692 if (pm_idle == amd_e400_idle)
693 zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL);
Rusty Russell30e1e6d2009-03-17 14:50:34 +1030694}
695
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200696static int __init idle_setup(char *str)
697{
Cyrill Gorcunovab6bc3e2008-07-05 15:53:36 +0400698 if (!str)
699 return -EINVAL;
700
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200701 if (!strcmp(str, "poll")) {
702 printk("using polling idle threads.\n");
703 pm_idle = poll_idle;
Thomas Renningerd1896042010-11-03 17:06:14 +0100704 boot_option_idle_override = IDLE_POLL;
705 } else if (!strcmp(str, "mwait")) {
706 boot_option_idle_override = IDLE_FORCE_MWAIT;
Linus Torvaldsaf0d6a02011-06-01 02:07:22 +0900707 WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n");
Thomas Renningerd1896042010-11-03 17:06:14 +0100708 } else if (!strcmp(str, "halt")) {
Zhao Yakuic1e3b372008-06-24 17:58:53 +0800709 /*
710 * When the boot option of idle=halt is added, halt is
711 * forced to be used for CPU idle. In such case CPU C2/C3
712 * won't be used again.
713 * To continue to load the CPU idle driver, don't touch
714 * the boot_option_idle_override.
715 */
716 pm_idle = default_idle;
Thomas Renningerd1896042010-11-03 17:06:14 +0100717 boot_option_idle_override = IDLE_HALT;
Zhao Yakuida5e09a2008-06-24 18:01:09 +0800718 } else if (!strcmp(str, "nomwait")) {
719 /*
720 * If the boot option of "idle=nomwait" is added,
721 * it means that mwait will be disabled for CPU C2/C3
722 * states. In such case it won't touch the variable
723 * of boot_option_idle_override.
724 */
Thomas Renningerd1896042010-11-03 17:06:14 +0100725 boot_option_idle_override = IDLE_NOMWAIT;
Zhao Yakuic1e3b372008-06-24 17:58:53 +0800726 } else
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200727 return -1;
728
Peter Zijlstra7f424a82008-04-25 17:39:01 +0200729 return 0;
730}
731early_param("idle", idle_setup);
732
Amerigo Wang9d62dcd2009-05-11 22:05:28 -0400733unsigned long arch_align_stack(unsigned long sp)
734{
735 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
736 sp -= get_random_int() % 8192;
737 return sp & ~0xf;
738}
739
740unsigned long arch_randomize_brk(struct mm_struct *mm)
741{
742 unsigned long range_end = mm->brk + 0x02000000;
743 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
744}
745