blob: e9f35c60f8c6f1bf47443143653beccfbcac3fb7 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/x86-64/kernel/process.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 *
9 * X86-64 port
10 * Andi Kleen.
Ashok Raj76e4f662005-06-25 14:55:00 -070011 *
12 * CPU hotplug support - ashok.raj@intel.com
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
14 */
15
16/*
17 * This file handles the architecture-dependent parts of process handling..
18 */
19
20#include <stdarg.h>
21
Ashok Raj76e4f662005-06-25 14:55:00 -070022#include <linux/cpu.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/errno.h>
24#include <linux/sched.h>
25#include <linux/kernel.h>
26#include <linux/mm.h>
27#include <linux/elfcore.h>
28#include <linux/smp.h>
29#include <linux/slab.h>
30#include <linux/user.h>
31#include <linux/module.h>
32#include <linux/a.out.h>
33#include <linux/interrupt.h>
34#include <linux/delay.h>
35#include <linux/irq.h>
36#include <linux/ptrace.h>
37#include <linux/utsname.h>
38#include <linux/random.h>
Rusty Lynch73649da2005-06-23 00:09:23 -070039#include <linux/kprobes.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040
41#include <asm/uaccess.h>
42#include <asm/pgtable.h>
43#include <asm/system.h>
44#include <asm/io.h>
45#include <asm/processor.h>
46#include <asm/i387.h>
47#include <asm/mmu_context.h>
48#include <asm/pda.h>
49#include <asm/prctl.h>
50#include <asm/kdebug.h>
51#include <asm/desc.h>
52#include <asm/proto.h>
53#include <asm/ia32.h>
54
55asmlinkage extern void ret_from_fork(void);
56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
59static atomic_t hlt_counter = ATOMIC_INIT(0);
60
61unsigned long boot_option_idle_override = 0;
62EXPORT_SYMBOL(boot_option_idle_override);
63
64/*
65 * Powermanagement idle function, if any..
66 */
67void (*pm_idle)(void);
68static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
69
70void disable_hlt(void)
71{
72 atomic_inc(&hlt_counter);
73}
74
75EXPORT_SYMBOL(disable_hlt);
76
77void enable_hlt(void)
78{
79 atomic_dec(&hlt_counter);
80}
81
82EXPORT_SYMBOL(enable_hlt);
83
84/*
85 * We use this if we don't have any better
86 * idle routine..
87 */
88void default_idle(void)
89{
90 if (!atomic_read(&hlt_counter)) {
91 local_irq_disable();
92 if (!need_resched())
93 safe_halt();
94 else
95 local_irq_enable();
96 }
97}
98
99/*
100 * On SMP it's slightly faster (but much more power-consuming!)
101 * to poll the ->need_resched flag instead of waiting for the
102 * cross-CPU IPI to arrive. Use this option with caution.
103 */
104static void poll_idle (void)
105{
106 int oldval;
107
108 local_irq_enable();
109
110 /*
111 * Deal with another CPU just having chosen a thread to
112 * run here:
113 */
114 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
115
116 if (!oldval) {
117 set_thread_flag(TIF_POLLING_NRFLAG);
118 asm volatile(
119 "2:"
120 "testl %0,%1;"
121 "rep; nop;"
122 "je 2b;"
123 : :
124 "i" (_TIF_NEED_RESCHED),
125 "m" (current_thread_info()->flags));
Hugh Dickinsb8f68e92005-09-12 18:49:24 +0200126 clear_thread_flag(TIF_POLLING_NRFLAG);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127 } else {
128 set_need_resched();
129 }
130}
131
132void cpu_idle_wait(void)
133{
134 unsigned int cpu, this_cpu = get_cpu();
135 cpumask_t map;
136
137 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
138 put_cpu();
139
140 cpus_clear(map);
141 for_each_online_cpu(cpu) {
142 per_cpu(cpu_idle_state, cpu) = 1;
143 cpu_set(cpu, map);
144 }
145
146 __get_cpu_var(cpu_idle_state) = 0;
147
148 wmb();
149 do {
150 ssleep(1);
151 for_each_online_cpu(cpu) {
152 if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
153 cpu_clear(cpu, map);
154 }
155 cpus_and(map, map, cpu_online_map);
156 } while (!cpus_empty(map));
157}
158EXPORT_SYMBOL_GPL(cpu_idle_wait);
159
Ashok Raj76e4f662005-06-25 14:55:00 -0700160#ifdef CONFIG_HOTPLUG_CPU
161DECLARE_PER_CPU(int, cpu_state);
162
163#include <asm/nmi.h>
164/* We don't actually take CPU down, just spin without interrupts. */
165static inline void play_dead(void)
166{
167 idle_task_exit();
168 wbinvd();
169 mb();
170 /* Ack it */
171 __get_cpu_var(cpu_state) = CPU_DEAD;
172
173 while (1)
174 safe_halt();
175}
176#else
177static inline void play_dead(void)
178{
179 BUG();
180}
181#endif /* CONFIG_HOTPLUG_CPU */
182
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183/*
184 * The idle thread. There's no useful work to be
185 * done, so just try to conserve power and have a
186 * low exit latency (ie sit in a loop waiting for
187 * somebody to say that they'd like to reschedule)
188 */
189void cpu_idle (void)
190{
191 /* endless idle loop with no priority at all */
192 while (1) {
193 while (!need_resched()) {
194 void (*idle)(void);
195
196 if (__get_cpu_var(cpu_idle_state))
197 __get_cpu_var(cpu_idle_state) = 0;
198
199 rmb();
200 idle = pm_idle;
201 if (!idle)
202 idle = default_idle;
Ashok Raj76e4f662005-06-25 14:55:00 -0700203 if (cpu_is_offline(smp_processor_id()))
204 play_dead();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205 idle();
206 }
207
208 schedule();
209 }
210}
211
212/*
213 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
214 * which can obviate IPI to trigger checking of need_resched.
215 * We execute MONITOR against need_resched and enter optimized wait state
216 * through MWAIT. Whenever someone changes need_resched, we would be woken
217 * up from MWAIT (without an IPI).
218 */
219static void mwait_idle(void)
220{
221 local_irq_enable();
222
223 if (!need_resched()) {
224 set_thread_flag(TIF_POLLING_NRFLAG);
225 do {
226 __monitor((void *)&current_thread_info()->flags, 0, 0);
227 if (need_resched())
228 break;
229 __mwait(0, 0);
230 } while (!need_resched());
231 clear_thread_flag(TIF_POLLING_NRFLAG);
232 }
233}
234
Ashok Raje6982c62005-06-25 14:54:58 -0700235void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236{
237 static int printed;
238 if (cpu_has(c, X86_FEATURE_MWAIT)) {
239 /*
240 * Skip, if setup has overridden idle.
241 * One CPU supports mwait => All CPUs supports mwait
242 */
243 if (!pm_idle) {
244 if (!printed) {
245 printk("using mwait in idle threads.\n");
246 printed = 1;
247 }
248 pm_idle = mwait_idle;
249 }
250 }
251}
252
253static int __init idle_setup (char *str)
254{
255 if (!strncmp(str, "poll", 4)) {
256 printk("using polling idle threads.\n");
257 pm_idle = poll_idle;
258 }
259
260 boot_option_idle_override = 1;
261 return 1;
262}
263
264__setup("idle=", idle_setup);
265
266/* Prints also some state that isn't saved in the pt_regs */
267void __show_regs(struct pt_regs * regs)
268{
269 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
270 unsigned int fsindex,gsindex;
271 unsigned int ds,cs,es;
272
273 printk("\n");
274 print_modules();
Andi Kleen9acf23c2005-09-12 18:49:24 +0200275 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
276 current->pid, current->comm, print_tainted(),
277 system_utsname.release,
278 (int)strcspn(system_utsname.version, " "),
279 system_utsname.version);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
281 printk_address(regs->rip);
282 printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
283 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
284 regs->rax, regs->rbx, regs->rcx);
285 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
286 regs->rdx, regs->rsi, regs->rdi);
287 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
288 regs->rbp, regs->r8, regs->r9);
289 printk("R10: %016lx R11: %016lx R12: %016lx\n",
290 regs->r10, regs->r11, regs->r12);
291 printk("R13: %016lx R14: %016lx R15: %016lx\n",
292 regs->r13, regs->r14, regs->r15);
293
294 asm("movl %%ds,%0" : "=r" (ds));
295 asm("movl %%cs,%0" : "=r" (cs));
296 asm("movl %%es,%0" : "=r" (es));
297 asm("movl %%fs,%0" : "=r" (fsindex));
298 asm("movl %%gs,%0" : "=r" (gsindex));
299
300 rdmsrl(MSR_FS_BASE, fs);
301 rdmsrl(MSR_GS_BASE, gs);
302 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
303
304 asm("movq %%cr0, %0": "=r" (cr0));
305 asm("movq %%cr2, %0": "=r" (cr2));
306 asm("movq %%cr3, %0": "=r" (cr3));
307 asm("movq %%cr4, %0": "=r" (cr4));
308
309 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
310 fs,fsindex,gs,gsindex,shadowgs);
311 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
312 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
313}
314
315void show_regs(struct pt_regs *regs)
316{
Zwane Mwaikamboc078d322005-09-06 15:16:16 -0700317 printk("CPU %d:", smp_processor_id());
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318 __show_regs(regs);
319 show_trace(&regs->rsp);
320}
321
322/*
323 * Free current thread data structures etc..
324 */
325void exit_thread(void)
326{
327 struct task_struct *me = current;
328 struct thread_struct *t = &me->thread;
Rusty Lynch73649da2005-06-23 00:09:23 -0700329
330 /*
331 * Remove function-return probe instances associated with this task
332 * and put them back on the free list. Do not insert an exit probe for
333 * this function, it will be disabled by kprobe_flush_task if you do.
334 */
335 kprobe_flush_task(me);
336
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 if (me->thread.io_bitmap_ptr) {
338 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
339
340 kfree(t->io_bitmap_ptr);
341 t->io_bitmap_ptr = NULL;
342 /*
343 * Careful, clear this in the TSS too:
344 */
345 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
346 t->io_bitmap_max = 0;
347 put_cpu();
348 }
349}
350
351void flush_thread(void)
352{
353 struct task_struct *tsk = current;
354 struct thread_info *t = current_thread_info();
355
Rusty Lynch73649da2005-06-23 00:09:23 -0700356 /*
357 * Remove function-return probe instances associated with this task
358 * and put them back on the free list. Do not insert an exit probe for
359 * this function, it will be disabled by kprobe_flush_task if you do.
360 */
361 kprobe_flush_task(tsk);
362
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 if (t->flags & _TIF_ABI_PENDING)
364 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
365
366 tsk->thread.debugreg0 = 0;
367 tsk->thread.debugreg1 = 0;
368 tsk->thread.debugreg2 = 0;
369 tsk->thread.debugreg3 = 0;
370 tsk->thread.debugreg6 = 0;
371 tsk->thread.debugreg7 = 0;
372 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
373 /*
374 * Forget coprocessor state..
375 */
376 clear_fpu(tsk);
377 clear_used_math();
378}
379
380void release_thread(struct task_struct *dead_task)
381{
382 if (dead_task->mm) {
383 if (dead_task->mm->context.size) {
384 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
385 dead_task->comm,
386 dead_task->mm->context.ldt,
387 dead_task->mm->context.size);
388 BUG();
389 }
390 }
391}
392
393static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
394{
395 struct user_desc ud = {
396 .base_addr = addr,
397 .limit = 0xfffff,
398 .seg_32bit = 1,
399 .limit_in_pages = 1,
400 .useable = 1,
401 };
402 struct n_desc_struct *desc = (void *)t->thread.tls_array;
403 desc += tls;
404 desc->a = LDT_entry_a(&ud);
405 desc->b = LDT_entry_b(&ud);
406}
407
408static inline u32 read_32bit_tls(struct task_struct *t, int tls)
409{
410 struct desc_struct *desc = (void *)t->thread.tls_array;
411 desc += tls;
412 return desc->base0 |
413 (((u32)desc->base1) << 16) |
414 (((u32)desc->base2) << 24);
415}
416
417/*
418 * This gets called before we allocate a new thread and copy
419 * the current task into it.
420 */
421void prepare_to_copy(struct task_struct *tsk)
422{
423 unlazy_fpu(tsk);
424}
425
426int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
427 unsigned long unused,
428 struct task_struct * p, struct pt_regs * regs)
429{
430 int err;
431 struct pt_regs * childregs;
432 struct task_struct *me = current;
433
434 childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
435
436 *childregs = *regs;
437
438 childregs->rax = 0;
439 childregs->rsp = rsp;
440 if (rsp == ~0UL) {
441 childregs->rsp = (unsigned long)childregs;
442 }
443
444 p->thread.rsp = (unsigned long) childregs;
445 p->thread.rsp0 = (unsigned long) (childregs+1);
446 p->thread.userrsp = me->thread.userrsp;
447
448 set_ti_thread_flag(p->thread_info, TIF_FORK);
449
450 p->thread.fs = me->thread.fs;
451 p->thread.gs = me->thread.gs;
452
H. J. Lufd51f662005-05-01 08:58:48 -0700453 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
454 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
455 asm("mov %%es,%0" : "=m" (p->thread.es));
456 asm("mov %%ds,%0" : "=m" (p->thread.ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457
458 if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
459 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
460 if (!p->thread.io_bitmap_ptr) {
461 p->thread.io_bitmap_max = 0;
462 return -ENOMEM;
463 }
464 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES);
465 }
466
467 /*
468 * Set a new TLS for the child thread?
469 */
470 if (clone_flags & CLONE_SETTLS) {
471#ifdef CONFIG_IA32_EMULATION
472 if (test_thread_flag(TIF_IA32))
473 err = ia32_child_tls(p, childregs);
474 else
475#endif
476 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
477 if (err)
478 goto out;
479 }
480 err = 0;
481out:
482 if (err && p->thread.io_bitmap_ptr) {
483 kfree(p->thread.io_bitmap_ptr);
484 p->thread.io_bitmap_max = 0;
485 }
486 return err;
487}
488
489/*
Andrea Arcangeliffaa8bd2005-06-27 14:36:36 -0700490 * This function selects if the context switch from prev to next
491 * has to tweak the TSC disable bit in the cr4.
492 */
493static inline void disable_tsc(struct task_struct *prev_p,
494 struct task_struct *next_p)
495{
496 struct thread_info *prev, *next;
497
498 /*
499 * gcc should eliminate the ->thread_info dereference if
500 * has_secure_computing returns 0 at compile time (SECCOMP=n).
501 */
502 prev = prev_p->thread_info;
503 next = next_p->thread_info;
504
505 if (has_secure_computing(prev) || has_secure_computing(next)) {
506 /* slow path here */
507 if (has_secure_computing(prev) &&
508 !has_secure_computing(next)) {
509 write_cr4(read_cr4() & ~X86_CR4_TSD);
510 } else if (!has_secure_computing(prev) &&
511 has_secure_computing(next))
512 write_cr4(read_cr4() | X86_CR4_TSD);
513 }
514}
515
516/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517 * This special macro can be used to load a debugging register
518 */
519#define loaddebug(thread,r) set_debug(thread->debugreg ## r, r)
520
521/*
522 * switch_to(x,y) should switch tasks from x to y.
523 *
524 * This could still be optimized:
525 * - fold all the options into a flag word and test it with a single test.
526 * - could test fs/gs bitsliced
527 */
528struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
529{
530 struct thread_struct *prev = &prev_p->thread,
531 *next = &next_p->thread;
532 int cpu = smp_processor_id();
533 struct tss_struct *tss = &per_cpu(init_tss, cpu);
534
535 unlazy_fpu(prev_p);
536
537 /*
538 * Reload esp0, LDT and the page table pointer:
539 */
540 tss->rsp0 = next->rsp0;
541
542 /*
543 * Switch DS and ES.
544 * This won't pick up thread selector changes, but I guess that is ok.
545 */
H. J. Lufd51f662005-05-01 08:58:48 -0700546 asm volatile("mov %%es,%0" : "=m" (prev->es));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547 if (unlikely(next->es | prev->es))
548 loadsegment(es, next->es);
549
H. J. Lufd51f662005-05-01 08:58:48 -0700550 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551 if (unlikely(next->ds | prev->ds))
552 loadsegment(ds, next->ds);
553
554 load_TLS(next, cpu);
555
556 /*
557 * Switch FS and GS.
558 */
559 {
560 unsigned fsindex;
561 asm volatile("movl %%fs,%0" : "=r" (fsindex));
562 /* segment register != 0 always requires a reload.
563 also reload when it has changed.
564 when prev process used 64bit base always reload
565 to avoid an information leak. */
566 if (unlikely(fsindex | next->fsindex | prev->fs)) {
567 loadsegment(fs, next->fsindex);
568 /* check if the user used a selector != 0
569 * if yes clear 64bit base, since overloaded base
570 * is always mapped to the Null selector
571 */
572 if (fsindex)
573 prev->fs = 0;
574 }
575 /* when next process has a 64bit base use it */
576 if (next->fs)
577 wrmsrl(MSR_FS_BASE, next->fs);
578 prev->fsindex = fsindex;
579 }
580 {
581 unsigned gsindex;
582 asm volatile("movl %%gs,%0" : "=r" (gsindex));
583 if (unlikely(gsindex | next->gsindex | prev->gs)) {
584 load_gs_index(next->gsindex);
585 if (gsindex)
586 prev->gs = 0;
587 }
588 if (next->gs)
589 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
590 prev->gsindex = gsindex;
591 }
592
593 /*
594 * Switch the PDA context.
595 */
596 prev->userrsp = read_pda(oldrsp);
597 write_pda(oldrsp, next->userrsp);
598 write_pda(pcurrent, next_p);
599 write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
600
601 /*
602 * Now maybe reload the debug registers
603 */
604 if (unlikely(next->debugreg7)) {
605 loaddebug(next, 0);
606 loaddebug(next, 1);
607 loaddebug(next, 2);
608 loaddebug(next, 3);
609 /* no 4 and 5 */
610 loaddebug(next, 6);
611 loaddebug(next, 7);
612 }
613
614
615 /*
616 * Handle the IO bitmap
617 */
618 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
619 if (next->io_bitmap_ptr)
620 /*
621 * Copy the relevant range of the IO bitmap.
622 * Normally this is 128 bytes or less:
623 */
624 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
625 max(prev->io_bitmap_max, next->io_bitmap_max));
626 else {
627 /*
628 * Clear any possible leftover bits:
629 */
630 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
631 }
632 }
633
Andrea Arcangeliffaa8bd2005-06-27 14:36:36 -0700634 disable_tsc(prev_p, next_p);
635
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 return prev_p;
637}
638
639/*
640 * sys_execve() executes a new program.
641 */
642asmlinkage
643long sys_execve(char __user *name, char __user * __user *argv,
644 char __user * __user *envp, struct pt_regs regs)
645{
646 long error;
647 char * filename;
648
649 filename = getname(name);
650 error = PTR_ERR(filename);
651 if (IS_ERR(filename))
652 return error;
653 error = do_execve(filename, argv, envp, &regs);
654 if (error == 0) {
655 task_lock(current);
656 current->ptrace &= ~PT_DTRACE;
657 task_unlock(current);
658 }
659 putname(filename);
660 return error;
661}
662
663void set_personality_64bit(void)
664{
665 /* inherit personality from parent */
666
667 /* Make sure to be in 64bit mode */
668 clear_thread_flag(TIF_IA32);
669
670 /* TBD: overwrites user setup. Should have two bits.
671 But 64bit processes have always behaved this way,
672 so it's not too bad. The main problem is just that
673 32bit childs are affected again. */
674 current->personality &= ~READ_IMPLIES_EXEC;
675}
676
677asmlinkage long sys_fork(struct pt_regs *regs)
678{
679 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
680}
681
682asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
683{
684 if (!newsp)
685 newsp = regs->rsp;
686 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
687}
688
689/*
690 * This is trivial, and on the face of it looks like it
691 * could equally well be done in user mode.
692 *
693 * Not so, for quite unobvious reasons - register pressure.
694 * In user mode vfork() cannot have a stack frame, and if
695 * done by calling the "clone()" system call directly, you
696 * do not have enough call-clobbered registers to hold all
697 * the information you need.
698 */
699asmlinkage long sys_vfork(struct pt_regs *regs)
700{
701 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
702 NULL, NULL);
703}
704
705unsigned long get_wchan(struct task_struct *p)
706{
707 unsigned long stack;
708 u64 fp,rip;
709 int count = 0;
710
711 if (!p || p == current || p->state==TASK_RUNNING)
712 return 0;
713 stack = (unsigned long)p->thread_info;
714 if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
715 return 0;
716 fp = *(u64 *)(p->thread.rsp);
717 do {
718 if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE)
719 return 0;
720 rip = *(u64 *)(fp+8);
721 if (!in_sched_functions(rip))
722 return rip;
723 fp = *(u64 *)fp;
724 } while (count++ < 16);
725 return 0;
726}
727
728long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
729{
730 int ret = 0;
731 int doit = task == current;
732 int cpu;
733
734 switch (code) {
735 case ARCH_SET_GS:
Suresh Siddha84929802005-06-21 17:14:32 -0700736 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 return -EPERM;
738 cpu = get_cpu();
739 /* handle small bases via the GDT because that's faster to
740 switch. */
741 if (addr <= 0xffffffff) {
742 set_32bit_tls(task, GS_TLS, addr);
743 if (doit) {
744 load_TLS(&task->thread, cpu);
745 load_gs_index(GS_TLS_SEL);
746 }
747 task->thread.gsindex = GS_TLS_SEL;
748 task->thread.gs = 0;
749 } else {
750 task->thread.gsindex = 0;
751 task->thread.gs = addr;
752 if (doit) {
753 load_gs_index(0);
754 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
755 }
756 }
757 put_cpu();
758 break;
759 case ARCH_SET_FS:
760 /* Not strictly needed for fs, but do it for symmetry
761 with gs */
Suresh Siddha84929802005-06-21 17:14:32 -0700762 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 return -EPERM;
764 cpu = get_cpu();
765 /* handle small bases via the GDT because that's faster to
766 switch. */
767 if (addr <= 0xffffffff) {
768 set_32bit_tls(task, FS_TLS, addr);
769 if (doit) {
770 load_TLS(&task->thread, cpu);
771 asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL));
772 }
773 task->thread.fsindex = FS_TLS_SEL;
774 task->thread.fs = 0;
775 } else {
776 task->thread.fsindex = 0;
777 task->thread.fs = addr;
778 if (doit) {
779 /* set the selector to 0 to not confuse
780 __switch_to */
781 asm volatile("movl %0,%%fs" :: "r" (0));
782 ret = checking_wrmsrl(MSR_FS_BASE, addr);
783 }
784 }
785 put_cpu();
786 break;
787 case ARCH_GET_FS: {
788 unsigned long base;
789 if (task->thread.fsindex == FS_TLS_SEL)
790 base = read_32bit_tls(task, FS_TLS);
791 else if (doit) {
792 rdmsrl(MSR_FS_BASE, base);
793 } else
794 base = task->thread.fs;
795 ret = put_user(base, (unsigned long __user *)addr);
796 break;
797 }
798 case ARCH_GET_GS: {
799 unsigned long base;
800 if (task->thread.gsindex == GS_TLS_SEL)
801 base = read_32bit_tls(task, GS_TLS);
802 else if (doit) {
803 rdmsrl(MSR_KERNEL_GS_BASE, base);
804 } else
805 base = task->thread.gs;
806 ret = put_user(base, (unsigned long __user *)addr);
807 break;
808 }
809
810 default:
811 ret = -EINVAL;
812 break;
813 }
814
815 return ret;
816}
817
818long sys_arch_prctl(int code, unsigned long addr)
819{
820 return do_arch_prctl(current, code, addr);
821}
822
823/*
824 * Capture the user space registers if the task is not running (in user space)
825 */
826int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
827{
828 struct pt_regs *pp, ptregs;
829
830 pp = (struct pt_regs *)(tsk->thread.rsp0);
831 --pp;
832
833 ptregs = *pp;
834 ptregs.cs &= 0xffff;
835 ptregs.ss &= 0xffff;
836
837 elf_core_copy_regs(regs, &ptregs);
838
839 return 1;
840}
841
842unsigned long arch_align_stack(unsigned long sp)
843{
844 if (randomize_va_space)
845 sp -= get_random_int() % 8192;
846 return sp & ~0xf;
847}