blob: 59be85d9a4bc166284ac102aea770d52c77a69cf [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/x86-64/kernel/process.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 *
9 * X86-64 port
10 * Andi Kleen.
Ashok Raj76e4f662005-06-25 14:55:00 -070011 *
12 * CPU hotplug support - ashok.raj@intel.com
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
14 */
15
16/*
17 * This file handles the architecture-dependent parts of process handling..
18 */
19
20#include <stdarg.h>
21
Ashok Raj76e4f662005-06-25 14:55:00 -070022#include <linux/cpu.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/errno.h>
24#include <linux/sched.h>
25#include <linux/kernel.h>
26#include <linux/mm.h>
27#include <linux/elfcore.h>
28#include <linux/smp.h>
29#include <linux/slab.h>
30#include <linux/user.h>
31#include <linux/module.h>
32#include <linux/a.out.h>
33#include <linux/interrupt.h>
34#include <linux/delay.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/ptrace.h>
36#include <linux/utsname.h>
37#include <linux/random.h>
Rusty Lynch73649da2005-06-23 00:09:23 -070038#include <linux/kprobes.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
40#include <asm/uaccess.h>
41#include <asm/pgtable.h>
42#include <asm/system.h>
43#include <asm/io.h>
44#include <asm/processor.h>
45#include <asm/i387.h>
46#include <asm/mmu_context.h>
47#include <asm/pda.h>
48#include <asm/prctl.h>
49#include <asm/kdebug.h>
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
53
54asmlinkage extern void ret_from_fork(void);
55
56unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
57
58static atomic_t hlt_counter = ATOMIC_INIT(0);
59
60unsigned long boot_option_idle_override = 0;
61EXPORT_SYMBOL(boot_option_idle_override);
62
63/*
64 * Powermanagement idle function, if any..
65 */
66void (*pm_idle)(void);
67static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
68
69void disable_hlt(void)
70{
71 atomic_inc(&hlt_counter);
72}
73
74EXPORT_SYMBOL(disable_hlt);
75
76void enable_hlt(void)
77{
78 atomic_dec(&hlt_counter);
79}
80
81EXPORT_SYMBOL(enable_hlt);
82
83/*
84 * We use this if we don't have any better
85 * idle routine..
86 */
87void default_idle(void)
88{
Nick Piggin64c7c8f2005-11-08 21:39:04 -080089 local_irq_enable();
90
Linus Torvalds1da177e2005-04-16 15:20:36 -070091 if (!atomic_read(&hlt_counter)) {
Nick Piggin64c7c8f2005-11-08 21:39:04 -080092 clear_thread_flag(TIF_POLLING_NRFLAG);
93 smp_mb__after_clear_bit();
94 while (!need_resched()) {
95 local_irq_disable();
96 if (!need_resched())
97 safe_halt();
98 else
99 local_irq_enable();
100 }
101 set_thread_flag(TIF_POLLING_NRFLAG);
102 } else {
103 while (!need_resched())
104 cpu_relax();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105 }
106}
107
108/*
109 * On SMP it's slightly faster (but much more power-consuming!)
110 * to poll the ->need_resched flag instead of waiting for the
111 * cross-CPU IPI to arrive. Use this option with caution.
112 */
113static void poll_idle (void)
114{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 local_irq_enable();
116
Nick Piggin64c7c8f2005-11-08 21:39:04 -0800117 asm volatile(
118 "2:"
119 "testl %0,%1;"
120 "rep; nop;"
121 "je 2b;"
122 : :
123 "i" (_TIF_NEED_RESCHED),
124 "m" (current_thread_info()->flags));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125}
126
127void cpu_idle_wait(void)
128{
129 unsigned int cpu, this_cpu = get_cpu();
130 cpumask_t map;
131
132 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
133 put_cpu();
134
135 cpus_clear(map);
136 for_each_online_cpu(cpu) {
137 per_cpu(cpu_idle_state, cpu) = 1;
138 cpu_set(cpu, map);
139 }
140
141 __get_cpu_var(cpu_idle_state) = 0;
142
143 wmb();
144 do {
145 ssleep(1);
146 for_each_online_cpu(cpu) {
147 if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
148 cpu_clear(cpu, map);
149 }
150 cpus_and(map, map, cpu_online_map);
151 } while (!cpus_empty(map));
152}
153EXPORT_SYMBOL_GPL(cpu_idle_wait);
154
Ashok Raj76e4f662005-06-25 14:55:00 -0700155#ifdef CONFIG_HOTPLUG_CPU
156DECLARE_PER_CPU(int, cpu_state);
157
158#include <asm/nmi.h>
159/* We don't actually take CPU down, just spin without interrupts. */
160static inline void play_dead(void)
161{
162 idle_task_exit();
163 wbinvd();
164 mb();
165 /* Ack it */
166 __get_cpu_var(cpu_state) = CPU_DEAD;
167
168 while (1)
169 safe_halt();
170}
171#else
172static inline void play_dead(void)
173{
174 BUG();
175}
176#endif /* CONFIG_HOTPLUG_CPU */
177
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178/*
179 * The idle thread. There's no useful work to be
180 * done, so just try to conserve power and have a
181 * low exit latency (ie sit in a loop waiting for
182 * somebody to say that they'd like to reschedule)
183 */
184void cpu_idle (void)
185{
Nick Piggin64c7c8f2005-11-08 21:39:04 -0800186 set_thread_flag(TIF_POLLING_NRFLAG);
187
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188 /* endless idle loop with no priority at all */
189 while (1) {
190 while (!need_resched()) {
191 void (*idle)(void);
192
193 if (__get_cpu_var(cpu_idle_state))
194 __get_cpu_var(cpu_idle_state) = 0;
195
196 rmb();
197 idle = pm_idle;
198 if (!idle)
199 idle = default_idle;
Ashok Raj76e4f662005-06-25 14:55:00 -0700200 if (cpu_is_offline(smp_processor_id()))
201 play_dead();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202 idle();
203 }
204
Nick Piggin5bfb5d62005-11-08 21:39:01 -0800205 preempt_enable_no_resched();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 schedule();
Nick Piggin5bfb5d62005-11-08 21:39:01 -0800207 preempt_disable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 }
209}
210
211/*
212 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
213 * which can obviate IPI to trigger checking of need_resched.
214 * We execute MONITOR against need_resched and enter optimized wait state
215 * through MWAIT. Whenever someone changes need_resched, we would be woken
216 * up from MWAIT (without an IPI).
217 */
218static void mwait_idle(void)
219{
220 local_irq_enable();
221
Nick Piggin64c7c8f2005-11-08 21:39:04 -0800222 while (!need_resched()) {
223 __monitor((void *)&current_thread_info()->flags, 0, 0);
224 smp_mb();
225 if (need_resched())
226 break;
227 __mwait(0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228 }
229}
230
Ashok Raje6982c62005-06-25 14:54:58 -0700231void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232{
233 static int printed;
234 if (cpu_has(c, X86_FEATURE_MWAIT)) {
235 /*
236 * Skip, if setup has overridden idle.
237 * One CPU supports mwait => All CPUs supports mwait
238 */
239 if (!pm_idle) {
240 if (!printed) {
241 printk("using mwait in idle threads.\n");
242 printed = 1;
243 }
244 pm_idle = mwait_idle;
245 }
246 }
247}
248
249static int __init idle_setup (char *str)
250{
251 if (!strncmp(str, "poll", 4)) {
252 printk("using polling idle threads.\n");
253 pm_idle = poll_idle;
254 }
255
256 boot_option_idle_override = 1;
257 return 1;
258}
259
260__setup("idle=", idle_setup);
261
262/* Prints also some state that isn't saved in the pt_regs */
263void __show_regs(struct pt_regs * regs)
264{
265 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
266 unsigned int fsindex,gsindex;
267 unsigned int ds,cs,es;
268
269 printk("\n");
270 print_modules();
Andi Kleen9acf23c2005-09-12 18:49:24 +0200271 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
272 current->pid, current->comm, print_tainted(),
273 system_utsname.release,
274 (int)strcspn(system_utsname.version, " "),
275 system_utsname.version);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
277 printk_address(regs->rip);
278 printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
279 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
280 regs->rax, regs->rbx, regs->rcx);
281 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
282 regs->rdx, regs->rsi, regs->rdi);
283 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
284 regs->rbp, regs->r8, regs->r9);
285 printk("R10: %016lx R11: %016lx R12: %016lx\n",
286 regs->r10, regs->r11, regs->r12);
287 printk("R13: %016lx R14: %016lx R15: %016lx\n",
288 regs->r13, regs->r14, regs->r15);
289
290 asm("movl %%ds,%0" : "=r" (ds));
291 asm("movl %%cs,%0" : "=r" (cs));
292 asm("movl %%es,%0" : "=r" (es));
293 asm("movl %%fs,%0" : "=r" (fsindex));
294 asm("movl %%gs,%0" : "=r" (gsindex));
295
296 rdmsrl(MSR_FS_BASE, fs);
297 rdmsrl(MSR_GS_BASE, gs);
298 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
299
300 asm("movq %%cr0, %0": "=r" (cr0));
301 asm("movq %%cr2, %0": "=r" (cr2));
302 asm("movq %%cr3, %0": "=r" (cr3));
303 asm("movq %%cr4, %0": "=r" (cr4));
304
305 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
306 fs,fsindex,gs,gsindex,shadowgs);
307 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
308 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
309}
310
311void show_regs(struct pt_regs *regs)
312{
Zwane Mwaikamboc078d322005-09-06 15:16:16 -0700313 printk("CPU %d:", smp_processor_id());
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314 __show_regs(regs);
315 show_trace(&regs->rsp);
316}
317
318/*
319 * Free current thread data structures etc..
320 */
321void exit_thread(void)
322{
323 struct task_struct *me = current;
324 struct thread_struct *t = &me->thread;
Rusty Lynch73649da2005-06-23 00:09:23 -0700325
326 /*
327 * Remove function-return probe instances associated with this task
328 * and put them back on the free list. Do not insert an exit probe for
329 * this function, it will be disabled by kprobe_flush_task if you do.
330 */
331 kprobe_flush_task(me);
332
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 if (me->thread.io_bitmap_ptr) {
334 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
335
336 kfree(t->io_bitmap_ptr);
337 t->io_bitmap_ptr = NULL;
338 /*
339 * Careful, clear this in the TSS too:
340 */
341 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
342 t->io_bitmap_max = 0;
343 put_cpu();
344 }
345}
346
347void flush_thread(void)
348{
349 struct task_struct *tsk = current;
350 struct thread_info *t = current_thread_info();
351
Rusty Lynch73649da2005-06-23 00:09:23 -0700352 /*
353 * Remove function-return probe instances associated with this task
354 * and put them back on the free list. Do not insert an exit probe for
355 * this function, it will be disabled by kprobe_flush_task if you do.
356 */
357 kprobe_flush_task(tsk);
358
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 if (t->flags & _TIF_ABI_PENDING)
360 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
361
362 tsk->thread.debugreg0 = 0;
363 tsk->thread.debugreg1 = 0;
364 tsk->thread.debugreg2 = 0;
365 tsk->thread.debugreg3 = 0;
366 tsk->thread.debugreg6 = 0;
367 tsk->thread.debugreg7 = 0;
368 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
369 /*
370 * Forget coprocessor state..
371 */
372 clear_fpu(tsk);
373 clear_used_math();
374}
375
376void release_thread(struct task_struct *dead_task)
377{
378 if (dead_task->mm) {
379 if (dead_task->mm->context.size) {
380 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
381 dead_task->comm,
382 dead_task->mm->context.ldt,
383 dead_task->mm->context.size);
384 BUG();
385 }
386 }
387}
388
389static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
390{
391 struct user_desc ud = {
392 .base_addr = addr,
393 .limit = 0xfffff,
394 .seg_32bit = 1,
395 .limit_in_pages = 1,
396 .useable = 1,
397 };
398 struct n_desc_struct *desc = (void *)t->thread.tls_array;
399 desc += tls;
400 desc->a = LDT_entry_a(&ud);
401 desc->b = LDT_entry_b(&ud);
402}
403
404static inline u32 read_32bit_tls(struct task_struct *t, int tls)
405{
406 struct desc_struct *desc = (void *)t->thread.tls_array;
407 desc += tls;
408 return desc->base0 |
409 (((u32)desc->base1) << 16) |
410 (((u32)desc->base2) << 24);
411}
412
413/*
414 * This gets called before we allocate a new thread and copy
415 * the current task into it.
416 */
417void prepare_to_copy(struct task_struct *tsk)
418{
419 unlazy_fpu(tsk);
420}
421
422int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
423 unsigned long unused,
424 struct task_struct * p, struct pt_regs * regs)
425{
426 int err;
427 struct pt_regs * childregs;
428 struct task_struct *me = current;
429
430 childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
431
432 *childregs = *regs;
433
434 childregs->rax = 0;
435 childregs->rsp = rsp;
436 if (rsp == ~0UL) {
437 childregs->rsp = (unsigned long)childregs;
438 }
439
440 p->thread.rsp = (unsigned long) childregs;
441 p->thread.rsp0 = (unsigned long) (childregs+1);
442 p->thread.userrsp = me->thread.userrsp;
443
444 set_ti_thread_flag(p->thread_info, TIF_FORK);
445
446 p->thread.fs = me->thread.fs;
447 p->thread.gs = me->thread.gs;
448
H. J. Lufd51f662005-05-01 08:58:48 -0700449 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
450 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
451 asm("mov %%es,%0" : "=m" (p->thread.es));
452 asm("mov %%ds,%0" : "=m" (p->thread.ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453
454 if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
455 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
456 if (!p->thread.io_bitmap_ptr) {
457 p->thread.io_bitmap_max = 0;
458 return -ENOMEM;
459 }
460 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES);
461 }
462
463 /*
464 * Set a new TLS for the child thread?
465 */
466 if (clone_flags & CLONE_SETTLS) {
467#ifdef CONFIG_IA32_EMULATION
468 if (test_thread_flag(TIF_IA32))
469 err = ia32_child_tls(p, childregs);
470 else
471#endif
472 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
473 if (err)
474 goto out;
475 }
476 err = 0;
477out:
478 if (err && p->thread.io_bitmap_ptr) {
479 kfree(p->thread.io_bitmap_ptr);
480 p->thread.io_bitmap_max = 0;
481 }
482 return err;
483}
484
485/*
486 * This special macro can be used to load a debugging register
487 */
488#define loaddebug(thread,r) set_debug(thread->debugreg ## r, r)
489
490/*
491 * switch_to(x,y) should switch tasks from x to y.
492 *
493 * This could still be optimized:
494 * - fold all the options into a flag word and test it with a single test.
495 * - could test fs/gs bitsliced
496 */
497struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
498{
499 struct thread_struct *prev = &prev_p->thread,
500 *next = &next_p->thread;
501 int cpu = smp_processor_id();
502 struct tss_struct *tss = &per_cpu(init_tss, cpu);
503
504 unlazy_fpu(prev_p);
505
506 /*
507 * Reload esp0, LDT and the page table pointer:
508 */
509 tss->rsp0 = next->rsp0;
510
511 /*
512 * Switch DS and ES.
513 * This won't pick up thread selector changes, but I guess that is ok.
514 */
H. J. Lufd51f662005-05-01 08:58:48 -0700515 asm volatile("mov %%es,%0" : "=m" (prev->es));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 if (unlikely(next->es | prev->es))
517 loadsegment(es, next->es);
518
H. J. Lufd51f662005-05-01 08:58:48 -0700519 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 if (unlikely(next->ds | prev->ds))
521 loadsegment(ds, next->ds);
522
523 load_TLS(next, cpu);
524
525 /*
526 * Switch FS and GS.
527 */
528 {
529 unsigned fsindex;
530 asm volatile("movl %%fs,%0" : "=r" (fsindex));
531 /* segment register != 0 always requires a reload.
532 also reload when it has changed.
533 when prev process used 64bit base always reload
534 to avoid an information leak. */
535 if (unlikely(fsindex | next->fsindex | prev->fs)) {
536 loadsegment(fs, next->fsindex);
537 /* check if the user used a selector != 0
538 * if yes clear 64bit base, since overloaded base
539 * is always mapped to the Null selector
540 */
541 if (fsindex)
542 prev->fs = 0;
543 }
544 /* when next process has a 64bit base use it */
545 if (next->fs)
546 wrmsrl(MSR_FS_BASE, next->fs);
547 prev->fsindex = fsindex;
548 }
549 {
550 unsigned gsindex;
551 asm volatile("movl %%gs,%0" : "=r" (gsindex));
552 if (unlikely(gsindex | next->gsindex | prev->gs)) {
553 load_gs_index(next->gsindex);
554 if (gsindex)
555 prev->gs = 0;
556 }
557 if (next->gs)
558 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
559 prev->gsindex = gsindex;
560 }
561
562 /*
563 * Switch the PDA context.
564 */
565 prev->userrsp = read_pda(oldrsp);
566 write_pda(oldrsp, next->userrsp);
567 write_pda(pcurrent, next_p);
568 write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
569
570 /*
571 * Now maybe reload the debug registers
572 */
573 if (unlikely(next->debugreg7)) {
574 loaddebug(next, 0);
575 loaddebug(next, 1);
576 loaddebug(next, 2);
577 loaddebug(next, 3);
578 /* no 4 and 5 */
579 loaddebug(next, 6);
580 loaddebug(next, 7);
581 }
582
583
584 /*
585 * Handle the IO bitmap
586 */
587 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
588 if (next->io_bitmap_ptr)
589 /*
590 * Copy the relevant range of the IO bitmap.
591 * Normally this is 128 bytes or less:
592 */
593 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
594 max(prev->io_bitmap_max, next->io_bitmap_max));
595 else {
596 /*
597 * Clear any possible leftover bits:
598 */
599 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
600 }
601 }
602
603 return prev_p;
604}
605
606/*
607 * sys_execve() executes a new program.
608 */
609asmlinkage
610long sys_execve(char __user *name, char __user * __user *argv,
611 char __user * __user *envp, struct pt_regs regs)
612{
613 long error;
614 char * filename;
615
616 filename = getname(name);
617 error = PTR_ERR(filename);
618 if (IS_ERR(filename))
619 return error;
620 error = do_execve(filename, argv, envp, &regs);
621 if (error == 0) {
622 task_lock(current);
623 current->ptrace &= ~PT_DTRACE;
624 task_unlock(current);
625 }
626 putname(filename);
627 return error;
628}
629
630void set_personality_64bit(void)
631{
632 /* inherit personality from parent */
633
634 /* Make sure to be in 64bit mode */
635 clear_thread_flag(TIF_IA32);
636
637 /* TBD: overwrites user setup. Should have two bits.
638 But 64bit processes have always behaved this way,
639 so it's not too bad. The main problem is just that
640 32bit childs are affected again. */
641 current->personality &= ~READ_IMPLIES_EXEC;
642}
643
644asmlinkage long sys_fork(struct pt_regs *regs)
645{
646 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
647}
648
649asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
650{
651 if (!newsp)
652 newsp = regs->rsp;
653 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
654}
655
656/*
657 * This is trivial, and on the face of it looks like it
658 * could equally well be done in user mode.
659 *
660 * Not so, for quite unobvious reasons - register pressure.
661 * In user mode vfork() cannot have a stack frame, and if
662 * done by calling the "clone()" system call directly, you
663 * do not have enough call-clobbered registers to hold all
664 * the information you need.
665 */
666asmlinkage long sys_vfork(struct pt_regs *regs)
667{
668 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
669 NULL, NULL);
670}
671
672unsigned long get_wchan(struct task_struct *p)
673{
674 unsigned long stack;
675 u64 fp,rip;
676 int count = 0;
677
678 if (!p || p == current || p->state==TASK_RUNNING)
679 return 0;
680 stack = (unsigned long)p->thread_info;
681 if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
682 return 0;
683 fp = *(u64 *)(p->thread.rsp);
684 do {
685 if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE)
686 return 0;
687 rip = *(u64 *)(fp+8);
688 if (!in_sched_functions(rip))
689 return rip;
690 fp = *(u64 *)fp;
691 } while (count++ < 16);
692 return 0;
693}
694
695long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
696{
697 int ret = 0;
698 int doit = task == current;
699 int cpu;
700
701 switch (code) {
702 case ARCH_SET_GS:
Suresh Siddha84929802005-06-21 17:14:32 -0700703 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 return -EPERM;
705 cpu = get_cpu();
706 /* handle small bases via the GDT because that's faster to
707 switch. */
708 if (addr <= 0xffffffff) {
709 set_32bit_tls(task, GS_TLS, addr);
710 if (doit) {
711 load_TLS(&task->thread, cpu);
712 load_gs_index(GS_TLS_SEL);
713 }
714 task->thread.gsindex = GS_TLS_SEL;
715 task->thread.gs = 0;
716 } else {
717 task->thread.gsindex = 0;
718 task->thread.gs = addr;
719 if (doit) {
720 load_gs_index(0);
721 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
722 }
723 }
724 put_cpu();
725 break;
726 case ARCH_SET_FS:
727 /* Not strictly needed for fs, but do it for symmetry
728 with gs */
Suresh Siddha84929802005-06-21 17:14:32 -0700729 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 return -EPERM;
731 cpu = get_cpu();
732 /* handle small bases via the GDT because that's faster to
733 switch. */
734 if (addr <= 0xffffffff) {
735 set_32bit_tls(task, FS_TLS, addr);
736 if (doit) {
737 load_TLS(&task->thread, cpu);
738 asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL));
739 }
740 task->thread.fsindex = FS_TLS_SEL;
741 task->thread.fs = 0;
742 } else {
743 task->thread.fsindex = 0;
744 task->thread.fs = addr;
745 if (doit) {
746 /* set the selector to 0 to not confuse
747 __switch_to */
748 asm volatile("movl %0,%%fs" :: "r" (0));
749 ret = checking_wrmsrl(MSR_FS_BASE, addr);
750 }
751 }
752 put_cpu();
753 break;
754 case ARCH_GET_FS: {
755 unsigned long base;
756 if (task->thread.fsindex == FS_TLS_SEL)
757 base = read_32bit_tls(task, FS_TLS);
758 else if (doit) {
759 rdmsrl(MSR_FS_BASE, base);
760 } else
761 base = task->thread.fs;
762 ret = put_user(base, (unsigned long __user *)addr);
763 break;
764 }
765 case ARCH_GET_GS: {
766 unsigned long base;
767 if (task->thread.gsindex == GS_TLS_SEL)
768 base = read_32bit_tls(task, GS_TLS);
769 else if (doit) {
770 rdmsrl(MSR_KERNEL_GS_BASE, base);
771 } else
772 base = task->thread.gs;
773 ret = put_user(base, (unsigned long __user *)addr);
774 break;
775 }
776
777 default:
778 ret = -EINVAL;
779 break;
780 }
781
782 return ret;
783}
784
785long sys_arch_prctl(int code, unsigned long addr)
786{
787 return do_arch_prctl(current, code, addr);
788}
789
790/*
791 * Capture the user space registers if the task is not running (in user space)
792 */
793int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
794{
795 struct pt_regs *pp, ptregs;
796
797 pp = (struct pt_regs *)(tsk->thread.rsp0);
798 --pp;
799
800 ptregs = *pp;
801 ptregs.cs &= 0xffff;
802 ptregs.ss &= 0xffff;
803
804 elf_core_copy_regs(regs, &ptregs);
805
806 return 1;
807}
808
809unsigned long arch_align_stack(unsigned long sp)
810{
811 if (randomize_va_space)
812 sp -= get_random_int() % 8192;
813 return sp & ~0xf;
814}