blob: 7577f9d7a75d8373f1a79be83c4e30c63b867f19 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/x86-64/kernel/process.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 *
9 * X86-64 port
10 * Andi Kleen.
Ashok Raj76e4f662005-06-25 14:55:00 -070011 *
12 * CPU hotplug support - ashok.raj@intel.com
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
14 */
15
16/*
17 * This file handles the architecture-dependent parts of process handling..
18 */
19
20#include <stdarg.h>
21
Ashok Raj76e4f662005-06-25 14:55:00 -070022#include <linux/cpu.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/errno.h>
24#include <linux/sched.h>
25#include <linux/kernel.h>
26#include <linux/mm.h>
27#include <linux/elfcore.h>
28#include <linux/smp.h>
29#include <linux/slab.h>
30#include <linux/user.h>
31#include <linux/module.h>
32#include <linux/a.out.h>
33#include <linux/interrupt.h>
34#include <linux/delay.h>
35#include <linux/irq.h>
36#include <linux/ptrace.h>
37#include <linux/utsname.h>
38#include <linux/random.h>
Rusty Lynch73649da2005-06-23 00:09:23 -070039#include <linux/kprobes.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040
41#include <asm/uaccess.h>
42#include <asm/pgtable.h>
43#include <asm/system.h>
44#include <asm/io.h>
45#include <asm/processor.h>
46#include <asm/i387.h>
47#include <asm/mmu_context.h>
48#include <asm/pda.h>
49#include <asm/prctl.h>
50#include <asm/kdebug.h>
51#include <asm/desc.h>
52#include <asm/proto.h>
53#include <asm/ia32.h>
54
55asmlinkage extern void ret_from_fork(void);
56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
59static atomic_t hlt_counter = ATOMIC_INIT(0);
60
61unsigned long boot_option_idle_override = 0;
62EXPORT_SYMBOL(boot_option_idle_override);
63
64/*
65 * Powermanagement idle function, if any..
66 */
67void (*pm_idle)(void);
68static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
69
70void disable_hlt(void)
71{
72 atomic_inc(&hlt_counter);
73}
74
75EXPORT_SYMBOL(disable_hlt);
76
77void enable_hlt(void)
78{
79 atomic_dec(&hlt_counter);
80}
81
82EXPORT_SYMBOL(enable_hlt);
83
84/*
85 * We use this if we don't have any better
86 * idle routine..
87 */
88void default_idle(void)
89{
90 if (!atomic_read(&hlt_counter)) {
91 local_irq_disable();
92 if (!need_resched())
93 safe_halt();
94 else
95 local_irq_enable();
96 }
97}
98
99/*
100 * On SMP it's slightly faster (but much more power-consuming!)
101 * to poll the ->need_resched flag instead of waiting for the
102 * cross-CPU IPI to arrive. Use this option with caution.
103 */
104static void poll_idle (void)
105{
106 int oldval;
107
108 local_irq_enable();
109
110 /*
111 * Deal with another CPU just having chosen a thread to
112 * run here:
113 */
114 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
115
116 if (!oldval) {
117 set_thread_flag(TIF_POLLING_NRFLAG);
118 asm volatile(
119 "2:"
120 "testl %0,%1;"
121 "rep; nop;"
122 "je 2b;"
123 : :
124 "i" (_TIF_NEED_RESCHED),
125 "m" (current_thread_info()->flags));
126 } else {
127 set_need_resched();
128 }
129}
130
131void cpu_idle_wait(void)
132{
133 unsigned int cpu, this_cpu = get_cpu();
134 cpumask_t map;
135
136 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
137 put_cpu();
138
139 cpus_clear(map);
140 for_each_online_cpu(cpu) {
141 per_cpu(cpu_idle_state, cpu) = 1;
142 cpu_set(cpu, map);
143 }
144
145 __get_cpu_var(cpu_idle_state) = 0;
146
147 wmb();
148 do {
149 ssleep(1);
150 for_each_online_cpu(cpu) {
151 if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
152 cpu_clear(cpu, map);
153 }
154 cpus_and(map, map, cpu_online_map);
155 } while (!cpus_empty(map));
156}
157EXPORT_SYMBOL_GPL(cpu_idle_wait);
158
Ashok Raj76e4f662005-06-25 14:55:00 -0700159#ifdef CONFIG_HOTPLUG_CPU
160DECLARE_PER_CPU(int, cpu_state);
161
162#include <asm/nmi.h>
163/* We don't actually take CPU down, just spin without interrupts. */
164static inline void play_dead(void)
165{
166 idle_task_exit();
167 wbinvd();
168 mb();
169 /* Ack it */
170 __get_cpu_var(cpu_state) = CPU_DEAD;
171
172 while (1)
173 safe_halt();
174}
175#else
176static inline void play_dead(void)
177{
178 BUG();
179}
180#endif /* CONFIG_HOTPLUG_CPU */
181
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182/*
183 * The idle thread. There's no useful work to be
184 * done, so just try to conserve power and have a
185 * low exit latency (ie sit in a loop waiting for
186 * somebody to say that they'd like to reschedule)
187 */
188void cpu_idle (void)
189{
190 /* endless idle loop with no priority at all */
191 while (1) {
192 while (!need_resched()) {
193 void (*idle)(void);
194
195 if (__get_cpu_var(cpu_idle_state))
196 __get_cpu_var(cpu_idle_state) = 0;
197
198 rmb();
199 idle = pm_idle;
200 if (!idle)
201 idle = default_idle;
Ashok Raj76e4f662005-06-25 14:55:00 -0700202 if (cpu_is_offline(smp_processor_id()))
203 play_dead();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204 idle();
205 }
206
207 schedule();
208 }
209}
210
211/*
212 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
213 * which can obviate IPI to trigger checking of need_resched.
214 * We execute MONITOR against need_resched and enter optimized wait state
215 * through MWAIT. Whenever someone changes need_resched, we would be woken
216 * up from MWAIT (without an IPI).
217 */
218static void mwait_idle(void)
219{
220 local_irq_enable();
221
222 if (!need_resched()) {
223 set_thread_flag(TIF_POLLING_NRFLAG);
224 do {
225 __monitor((void *)&current_thread_info()->flags, 0, 0);
226 if (need_resched())
227 break;
228 __mwait(0, 0);
229 } while (!need_resched());
230 clear_thread_flag(TIF_POLLING_NRFLAG);
231 }
232}
233
Ashok Raje6982c62005-06-25 14:54:58 -0700234void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235{
236 static int printed;
237 if (cpu_has(c, X86_FEATURE_MWAIT)) {
238 /*
239 * Skip, if setup has overridden idle.
240 * One CPU supports mwait => All CPUs supports mwait
241 */
242 if (!pm_idle) {
243 if (!printed) {
244 printk("using mwait in idle threads.\n");
245 printed = 1;
246 }
247 pm_idle = mwait_idle;
248 }
249 }
250}
251
252static int __init idle_setup (char *str)
253{
254 if (!strncmp(str, "poll", 4)) {
255 printk("using polling idle threads.\n");
256 pm_idle = poll_idle;
257 }
258
259 boot_option_idle_override = 1;
260 return 1;
261}
262
263__setup("idle=", idle_setup);
264
265/* Prints also some state that isn't saved in the pt_regs */
266void __show_regs(struct pt_regs * regs)
267{
268 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
269 unsigned int fsindex,gsindex;
270 unsigned int ds,cs,es;
271
272 printk("\n");
273 print_modules();
274 printk("Pid: %d, comm: %.20s %s %s\n",
275 current->pid, current->comm, print_tainted(), system_utsname.release);
276 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
277 printk_address(regs->rip);
278 printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
279 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
280 regs->rax, regs->rbx, regs->rcx);
281 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
282 regs->rdx, regs->rsi, regs->rdi);
283 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
284 regs->rbp, regs->r8, regs->r9);
285 printk("R10: %016lx R11: %016lx R12: %016lx\n",
286 regs->r10, regs->r11, regs->r12);
287 printk("R13: %016lx R14: %016lx R15: %016lx\n",
288 regs->r13, regs->r14, regs->r15);
289
290 asm("movl %%ds,%0" : "=r" (ds));
291 asm("movl %%cs,%0" : "=r" (cs));
292 asm("movl %%es,%0" : "=r" (es));
293 asm("movl %%fs,%0" : "=r" (fsindex));
294 asm("movl %%gs,%0" : "=r" (gsindex));
295
296 rdmsrl(MSR_FS_BASE, fs);
297 rdmsrl(MSR_GS_BASE, gs);
298 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
299
300 asm("movq %%cr0, %0": "=r" (cr0));
301 asm("movq %%cr2, %0": "=r" (cr2));
302 asm("movq %%cr3, %0": "=r" (cr3));
303 asm("movq %%cr4, %0": "=r" (cr4));
304
305 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
306 fs,fsindex,gs,gsindex,shadowgs);
307 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
308 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
309}
310
311void show_regs(struct pt_regs *regs)
312{
313 __show_regs(regs);
314 show_trace(&regs->rsp);
315}
316
317/*
318 * Free current thread data structures etc..
319 */
320void exit_thread(void)
321{
322 struct task_struct *me = current;
323 struct thread_struct *t = &me->thread;
Rusty Lynch73649da2005-06-23 00:09:23 -0700324
325 /*
326 * Remove function-return probe instances associated with this task
327 * and put them back on the free list. Do not insert an exit probe for
328 * this function, it will be disabled by kprobe_flush_task if you do.
329 */
330 kprobe_flush_task(me);
331
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332 if (me->thread.io_bitmap_ptr) {
333 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
334
335 kfree(t->io_bitmap_ptr);
336 t->io_bitmap_ptr = NULL;
337 /*
338 * Careful, clear this in the TSS too:
339 */
340 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
341 t->io_bitmap_max = 0;
342 put_cpu();
343 }
344}
345
346void flush_thread(void)
347{
348 struct task_struct *tsk = current;
349 struct thread_info *t = current_thread_info();
350
Rusty Lynch73649da2005-06-23 00:09:23 -0700351 /*
352 * Remove function-return probe instances associated with this task
353 * and put them back on the free list. Do not insert an exit probe for
354 * this function, it will be disabled by kprobe_flush_task if you do.
355 */
356 kprobe_flush_task(tsk);
357
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 if (t->flags & _TIF_ABI_PENDING)
359 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
360
361 tsk->thread.debugreg0 = 0;
362 tsk->thread.debugreg1 = 0;
363 tsk->thread.debugreg2 = 0;
364 tsk->thread.debugreg3 = 0;
365 tsk->thread.debugreg6 = 0;
366 tsk->thread.debugreg7 = 0;
367 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
368 /*
369 * Forget coprocessor state..
370 */
371 clear_fpu(tsk);
372 clear_used_math();
373}
374
375void release_thread(struct task_struct *dead_task)
376{
377 if (dead_task->mm) {
378 if (dead_task->mm->context.size) {
379 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
380 dead_task->comm,
381 dead_task->mm->context.ldt,
382 dead_task->mm->context.size);
383 BUG();
384 }
385 }
386}
387
388static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
389{
390 struct user_desc ud = {
391 .base_addr = addr,
392 .limit = 0xfffff,
393 .seg_32bit = 1,
394 .limit_in_pages = 1,
395 .useable = 1,
396 };
397 struct n_desc_struct *desc = (void *)t->thread.tls_array;
398 desc += tls;
399 desc->a = LDT_entry_a(&ud);
400 desc->b = LDT_entry_b(&ud);
401}
402
403static inline u32 read_32bit_tls(struct task_struct *t, int tls)
404{
405 struct desc_struct *desc = (void *)t->thread.tls_array;
406 desc += tls;
407 return desc->base0 |
408 (((u32)desc->base1) << 16) |
409 (((u32)desc->base2) << 24);
410}
411
412/*
413 * This gets called before we allocate a new thread and copy
414 * the current task into it.
415 */
416void prepare_to_copy(struct task_struct *tsk)
417{
418 unlazy_fpu(tsk);
419}
420
421int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
422 unsigned long unused,
423 struct task_struct * p, struct pt_regs * regs)
424{
425 int err;
426 struct pt_regs * childregs;
427 struct task_struct *me = current;
428
429 childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
430
431 *childregs = *regs;
432
433 childregs->rax = 0;
434 childregs->rsp = rsp;
435 if (rsp == ~0UL) {
436 childregs->rsp = (unsigned long)childregs;
437 }
438
439 p->thread.rsp = (unsigned long) childregs;
440 p->thread.rsp0 = (unsigned long) (childregs+1);
441 p->thread.userrsp = me->thread.userrsp;
442
443 set_ti_thread_flag(p->thread_info, TIF_FORK);
444
445 p->thread.fs = me->thread.fs;
446 p->thread.gs = me->thread.gs;
447
H. J. Lufd51f662005-05-01 08:58:48 -0700448 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
449 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
450 asm("mov %%es,%0" : "=m" (p->thread.es));
451 asm("mov %%ds,%0" : "=m" (p->thread.ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452
453 if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
454 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
455 if (!p->thread.io_bitmap_ptr) {
456 p->thread.io_bitmap_max = 0;
457 return -ENOMEM;
458 }
459 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES);
460 }
461
462 /*
463 * Set a new TLS for the child thread?
464 */
465 if (clone_flags & CLONE_SETTLS) {
466#ifdef CONFIG_IA32_EMULATION
467 if (test_thread_flag(TIF_IA32))
468 err = ia32_child_tls(p, childregs);
469 else
470#endif
471 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
472 if (err)
473 goto out;
474 }
475 err = 0;
476out:
477 if (err && p->thread.io_bitmap_ptr) {
478 kfree(p->thread.io_bitmap_ptr);
479 p->thread.io_bitmap_max = 0;
480 }
481 return err;
482}
483
484/*
Andrea Arcangeliffaa8bd2005-06-27 14:36:36 -0700485 * This function selects if the context switch from prev to next
486 * has to tweak the TSC disable bit in the cr4.
487 */
488static inline void disable_tsc(struct task_struct *prev_p,
489 struct task_struct *next_p)
490{
491 struct thread_info *prev, *next;
492
493 /*
494 * gcc should eliminate the ->thread_info dereference if
495 * has_secure_computing returns 0 at compile time (SECCOMP=n).
496 */
497 prev = prev_p->thread_info;
498 next = next_p->thread_info;
499
500 if (has_secure_computing(prev) || has_secure_computing(next)) {
501 /* slow path here */
502 if (has_secure_computing(prev) &&
503 !has_secure_computing(next)) {
504 write_cr4(read_cr4() & ~X86_CR4_TSD);
505 } else if (!has_secure_computing(prev) &&
506 has_secure_computing(next))
507 write_cr4(read_cr4() | X86_CR4_TSD);
508 }
509}
510
511/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512 * This special macro can be used to load a debugging register
513 */
514#define loaddebug(thread,r) set_debug(thread->debugreg ## r, r)
515
516/*
517 * switch_to(x,y) should switch tasks from x to y.
518 *
519 * This could still be optimized:
520 * - fold all the options into a flag word and test it with a single test.
521 * - could test fs/gs bitsliced
522 */
523struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
524{
525 struct thread_struct *prev = &prev_p->thread,
526 *next = &next_p->thread;
527 int cpu = smp_processor_id();
528 struct tss_struct *tss = &per_cpu(init_tss, cpu);
529
530 unlazy_fpu(prev_p);
531
532 /*
533 * Reload esp0, LDT and the page table pointer:
534 */
535 tss->rsp0 = next->rsp0;
536
537 /*
538 * Switch DS and ES.
539 * This won't pick up thread selector changes, but I guess that is ok.
540 */
H. J. Lufd51f662005-05-01 08:58:48 -0700541 asm volatile("mov %%es,%0" : "=m" (prev->es));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542 if (unlikely(next->es | prev->es))
543 loadsegment(es, next->es);
544
H. J. Lufd51f662005-05-01 08:58:48 -0700545 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546 if (unlikely(next->ds | prev->ds))
547 loadsegment(ds, next->ds);
548
549 load_TLS(next, cpu);
550
551 /*
552 * Switch FS and GS.
553 */
554 {
555 unsigned fsindex;
556 asm volatile("movl %%fs,%0" : "=r" (fsindex));
557 /* segment register != 0 always requires a reload.
558 also reload when it has changed.
559 when prev process used 64bit base always reload
560 to avoid an information leak. */
561 if (unlikely(fsindex | next->fsindex | prev->fs)) {
562 loadsegment(fs, next->fsindex);
563 /* check if the user used a selector != 0
564 * if yes clear 64bit base, since overloaded base
565 * is always mapped to the Null selector
566 */
567 if (fsindex)
568 prev->fs = 0;
569 }
570 /* when next process has a 64bit base use it */
571 if (next->fs)
572 wrmsrl(MSR_FS_BASE, next->fs);
573 prev->fsindex = fsindex;
574 }
575 {
576 unsigned gsindex;
577 asm volatile("movl %%gs,%0" : "=r" (gsindex));
578 if (unlikely(gsindex | next->gsindex | prev->gs)) {
579 load_gs_index(next->gsindex);
580 if (gsindex)
581 prev->gs = 0;
582 }
583 if (next->gs)
584 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
585 prev->gsindex = gsindex;
586 }
587
588 /*
589 * Switch the PDA context.
590 */
591 prev->userrsp = read_pda(oldrsp);
592 write_pda(oldrsp, next->userrsp);
593 write_pda(pcurrent, next_p);
594 write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
595
596 /*
597 * Now maybe reload the debug registers
598 */
599 if (unlikely(next->debugreg7)) {
600 loaddebug(next, 0);
601 loaddebug(next, 1);
602 loaddebug(next, 2);
603 loaddebug(next, 3);
604 /* no 4 and 5 */
605 loaddebug(next, 6);
606 loaddebug(next, 7);
607 }
608
609
610 /*
611 * Handle the IO bitmap
612 */
613 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
614 if (next->io_bitmap_ptr)
615 /*
616 * Copy the relevant range of the IO bitmap.
617 * Normally this is 128 bytes or less:
618 */
619 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
620 max(prev->io_bitmap_max, next->io_bitmap_max));
621 else {
622 /*
623 * Clear any possible leftover bits:
624 */
625 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
626 }
627 }
628
Andrea Arcangeliffaa8bd2005-06-27 14:36:36 -0700629 disable_tsc(prev_p, next_p);
630
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 return prev_p;
632}
633
634/*
635 * sys_execve() executes a new program.
636 */
637asmlinkage
638long sys_execve(char __user *name, char __user * __user *argv,
639 char __user * __user *envp, struct pt_regs regs)
640{
641 long error;
642 char * filename;
643
644 filename = getname(name);
645 error = PTR_ERR(filename);
646 if (IS_ERR(filename))
647 return error;
648 error = do_execve(filename, argv, envp, &regs);
649 if (error == 0) {
650 task_lock(current);
651 current->ptrace &= ~PT_DTRACE;
652 task_unlock(current);
653 }
654 putname(filename);
655 return error;
656}
657
658void set_personality_64bit(void)
659{
660 /* inherit personality from parent */
661
662 /* Make sure to be in 64bit mode */
663 clear_thread_flag(TIF_IA32);
664
665 /* TBD: overwrites user setup. Should have two bits.
666 But 64bit processes have always behaved this way,
667 so it's not too bad. The main problem is just that
668 32bit childs are affected again. */
669 current->personality &= ~READ_IMPLIES_EXEC;
670}
671
672asmlinkage long sys_fork(struct pt_regs *regs)
673{
674 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
675}
676
677asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
678{
679 if (!newsp)
680 newsp = regs->rsp;
681 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
682}
683
684/*
685 * This is trivial, and on the face of it looks like it
686 * could equally well be done in user mode.
687 *
688 * Not so, for quite unobvious reasons - register pressure.
689 * In user mode vfork() cannot have a stack frame, and if
690 * done by calling the "clone()" system call directly, you
691 * do not have enough call-clobbered registers to hold all
692 * the information you need.
693 */
694asmlinkage long sys_vfork(struct pt_regs *regs)
695{
696 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
697 NULL, NULL);
698}
699
700unsigned long get_wchan(struct task_struct *p)
701{
702 unsigned long stack;
703 u64 fp,rip;
704 int count = 0;
705
706 if (!p || p == current || p->state==TASK_RUNNING)
707 return 0;
708 stack = (unsigned long)p->thread_info;
709 if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
710 return 0;
711 fp = *(u64 *)(p->thread.rsp);
712 do {
713 if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE)
714 return 0;
715 rip = *(u64 *)(fp+8);
716 if (!in_sched_functions(rip))
717 return rip;
718 fp = *(u64 *)fp;
719 } while (count++ < 16);
720 return 0;
721}
722
723long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
724{
725 int ret = 0;
726 int doit = task == current;
727 int cpu;
728
729 switch (code) {
730 case ARCH_SET_GS:
Suresh Siddha84929802005-06-21 17:14:32 -0700731 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 return -EPERM;
733 cpu = get_cpu();
734 /* handle small bases via the GDT because that's faster to
735 switch. */
736 if (addr <= 0xffffffff) {
737 set_32bit_tls(task, GS_TLS, addr);
738 if (doit) {
739 load_TLS(&task->thread, cpu);
740 load_gs_index(GS_TLS_SEL);
741 }
742 task->thread.gsindex = GS_TLS_SEL;
743 task->thread.gs = 0;
744 } else {
745 task->thread.gsindex = 0;
746 task->thread.gs = addr;
747 if (doit) {
748 load_gs_index(0);
749 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
750 }
751 }
752 put_cpu();
753 break;
754 case ARCH_SET_FS:
755 /* Not strictly needed for fs, but do it for symmetry
756 with gs */
Suresh Siddha84929802005-06-21 17:14:32 -0700757 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758 return -EPERM;
759 cpu = get_cpu();
760 /* handle small bases via the GDT because that's faster to
761 switch. */
762 if (addr <= 0xffffffff) {
763 set_32bit_tls(task, FS_TLS, addr);
764 if (doit) {
765 load_TLS(&task->thread, cpu);
766 asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL));
767 }
768 task->thread.fsindex = FS_TLS_SEL;
769 task->thread.fs = 0;
770 } else {
771 task->thread.fsindex = 0;
772 task->thread.fs = addr;
773 if (doit) {
774 /* set the selector to 0 to not confuse
775 __switch_to */
776 asm volatile("movl %0,%%fs" :: "r" (0));
777 ret = checking_wrmsrl(MSR_FS_BASE, addr);
778 }
779 }
780 put_cpu();
781 break;
782 case ARCH_GET_FS: {
783 unsigned long base;
784 if (task->thread.fsindex == FS_TLS_SEL)
785 base = read_32bit_tls(task, FS_TLS);
786 else if (doit) {
787 rdmsrl(MSR_FS_BASE, base);
788 } else
789 base = task->thread.fs;
790 ret = put_user(base, (unsigned long __user *)addr);
791 break;
792 }
793 case ARCH_GET_GS: {
794 unsigned long base;
795 if (task->thread.gsindex == GS_TLS_SEL)
796 base = read_32bit_tls(task, GS_TLS);
797 else if (doit) {
798 rdmsrl(MSR_KERNEL_GS_BASE, base);
799 } else
800 base = task->thread.gs;
801 ret = put_user(base, (unsigned long __user *)addr);
802 break;
803 }
804
805 default:
806 ret = -EINVAL;
807 break;
808 }
809
810 return ret;
811}
812
813long sys_arch_prctl(int code, unsigned long addr)
814{
815 return do_arch_prctl(current, code, addr);
816}
817
818/*
819 * Capture the user space registers if the task is not running (in user space)
820 */
821int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
822{
823 struct pt_regs *pp, ptregs;
824
825 pp = (struct pt_regs *)(tsk->thread.rsp0);
826 --pp;
827
828 ptregs = *pp;
829 ptregs.cs &= 0xffff;
830 ptregs.ss &= 0xffff;
831
832 elf_core_copy_regs(regs, &ptregs);
833
834 return 1;
835}
836
837unsigned long arch_align_stack(unsigned long sp)
838{
839 if (randomize_va_space)
840 sp -= get_random_int() % 8192;
841 return sp & ~0xf;
842}