blob: 7519fc520eb3f2d157d1da41cb4c3c1d5b40bc82 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/x86-64/kernel/process.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 *
9 * X86-64 port
10 * Andi Kleen.
Ashok Raj76e4f662005-06-25 14:55:00 -070011 *
12 * CPU hotplug support - ashok.raj@intel.com
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
14 */
15
16/*
17 * This file handles the architecture-dependent parts of process handling..
18 */
19
20#include <stdarg.h>
21
Ashok Raj76e4f662005-06-25 14:55:00 -070022#include <linux/cpu.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/errno.h>
24#include <linux/sched.h>
25#include <linux/kernel.h>
26#include <linux/mm.h>
27#include <linux/elfcore.h>
28#include <linux/smp.h>
29#include <linux/slab.h>
30#include <linux/user.h>
31#include <linux/module.h>
32#include <linux/a.out.h>
33#include <linux/interrupt.h>
34#include <linux/delay.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/ptrace.h>
36#include <linux/utsname.h>
37#include <linux/random.h>
Rusty Lynch73649da2005-06-23 00:09:23 -070038#include <linux/kprobes.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
40#include <asm/uaccess.h>
41#include <asm/pgtable.h>
42#include <asm/system.h>
43#include <asm/io.h>
44#include <asm/processor.h>
45#include <asm/i387.h>
46#include <asm/mmu_context.h>
47#include <asm/pda.h>
48#include <asm/prctl.h>
49#include <asm/kdebug.h>
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
53
54asmlinkage extern void ret_from_fork(void);
55
56unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
57
58static atomic_t hlt_counter = ATOMIC_INIT(0);
59
60unsigned long boot_option_idle_override = 0;
61EXPORT_SYMBOL(boot_option_idle_override);
62
63/*
64 * Powermanagement idle function, if any..
65 */
66void (*pm_idle)(void);
67static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
68
69void disable_hlt(void)
70{
71 atomic_inc(&hlt_counter);
72}
73
74EXPORT_SYMBOL(disable_hlt);
75
76void enable_hlt(void)
77{
78 atomic_dec(&hlt_counter);
79}
80
81EXPORT_SYMBOL(enable_hlt);
82
83/*
84 * We use this if we don't have any better
85 * idle routine..
86 */
87void default_idle(void)
88{
Nick Piggin64c7c8f2005-11-08 21:39:04 -080089 local_irq_enable();
90
Linus Torvalds1da177e2005-04-16 15:20:36 -070091 if (!atomic_read(&hlt_counter)) {
Nick Piggin64c7c8f2005-11-08 21:39:04 -080092 clear_thread_flag(TIF_POLLING_NRFLAG);
93 smp_mb__after_clear_bit();
94 while (!need_resched()) {
95 local_irq_disable();
96 if (!need_resched())
97 safe_halt();
98 else
99 local_irq_enable();
100 }
101 set_thread_flag(TIF_POLLING_NRFLAG);
102 } else {
103 while (!need_resched())
104 cpu_relax();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105 }
106}
107
108/*
109 * On SMP it's slightly faster (but much more power-consuming!)
110 * to poll the ->need_resched flag instead of waiting for the
111 * cross-CPU IPI to arrive. Use this option with caution.
112 */
113static void poll_idle (void)
114{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 local_irq_enable();
116
Nick Piggin64c7c8f2005-11-08 21:39:04 -0800117 asm volatile(
118 "2:"
119 "testl %0,%1;"
120 "rep; nop;"
121 "je 2b;"
122 : :
123 "i" (_TIF_NEED_RESCHED),
124 "m" (current_thread_info()->flags));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125}
126
127void cpu_idle_wait(void)
128{
129 unsigned int cpu, this_cpu = get_cpu();
130 cpumask_t map;
131
132 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
133 put_cpu();
134
135 cpus_clear(map);
136 for_each_online_cpu(cpu) {
137 per_cpu(cpu_idle_state, cpu) = 1;
138 cpu_set(cpu, map);
139 }
140
141 __get_cpu_var(cpu_idle_state) = 0;
142
143 wmb();
144 do {
145 ssleep(1);
146 for_each_online_cpu(cpu) {
Andi Kleena88cde12005-11-05 17:25:54 +0100147 if (cpu_isset(cpu, map) &&
148 !per_cpu(cpu_idle_state, cpu))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149 cpu_clear(cpu, map);
150 }
151 cpus_and(map, map, cpu_online_map);
152 } while (!cpus_empty(map));
153}
154EXPORT_SYMBOL_GPL(cpu_idle_wait);
155
Ashok Raj76e4f662005-06-25 14:55:00 -0700156#ifdef CONFIG_HOTPLUG_CPU
157DECLARE_PER_CPU(int, cpu_state);
158
159#include <asm/nmi.h>
160/* We don't actually take CPU down, just spin without interrupts. */
161static inline void play_dead(void)
162{
163 idle_task_exit();
164 wbinvd();
165 mb();
166 /* Ack it */
167 __get_cpu_var(cpu_state) = CPU_DEAD;
168
169 while (1)
170 safe_halt();
171}
172#else
173static inline void play_dead(void)
174{
175 BUG();
176}
177#endif /* CONFIG_HOTPLUG_CPU */
178
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179/*
180 * The idle thread. There's no useful work to be
181 * done, so just try to conserve power and have a
182 * low exit latency (ie sit in a loop waiting for
183 * somebody to say that they'd like to reschedule)
184 */
185void cpu_idle (void)
186{
Nick Piggin64c7c8f2005-11-08 21:39:04 -0800187 set_thread_flag(TIF_POLLING_NRFLAG);
188
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 /* endless idle loop with no priority at all */
190 while (1) {
191 while (!need_resched()) {
192 void (*idle)(void);
193
194 if (__get_cpu_var(cpu_idle_state))
195 __get_cpu_var(cpu_idle_state) = 0;
196
197 rmb();
198 idle = pm_idle;
199 if (!idle)
200 idle = default_idle;
Ashok Raj76e4f662005-06-25 14:55:00 -0700201 if (cpu_is_offline(smp_processor_id()))
202 play_dead();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203 idle();
204 }
205
Nick Piggin5bfb5d62005-11-08 21:39:01 -0800206 preempt_enable_no_resched();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 schedule();
Nick Piggin5bfb5d62005-11-08 21:39:01 -0800208 preempt_disable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209 }
210}
211
212/*
213 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
214 * which can obviate IPI to trigger checking of need_resched.
215 * We execute MONITOR against need_resched and enter optimized wait state
216 * through MWAIT. Whenever someone changes need_resched, we would be woken
217 * up from MWAIT (without an IPI).
218 */
219static void mwait_idle(void)
220{
221 local_irq_enable();
222
Nick Piggin64c7c8f2005-11-08 21:39:04 -0800223 while (!need_resched()) {
224 __monitor((void *)&current_thread_info()->flags, 0, 0);
225 smp_mb();
226 if (need_resched())
227 break;
228 __mwait(0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 }
230}
231
Ashok Raje6982c62005-06-25 14:54:58 -0700232void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233{
234 static int printed;
235 if (cpu_has(c, X86_FEATURE_MWAIT)) {
236 /*
237 * Skip, if setup has overridden idle.
238 * One CPU supports mwait => All CPUs supports mwait
239 */
240 if (!pm_idle) {
241 if (!printed) {
242 printk("using mwait in idle threads.\n");
243 printed = 1;
244 }
245 pm_idle = mwait_idle;
246 }
247 }
248}
249
250static int __init idle_setup (char *str)
251{
252 if (!strncmp(str, "poll", 4)) {
253 printk("using polling idle threads.\n");
254 pm_idle = poll_idle;
255 }
256
257 boot_option_idle_override = 1;
258 return 1;
259}
260
261__setup("idle=", idle_setup);
262
263/* Prints also some state that isn't saved in the pt_regs */
264void __show_regs(struct pt_regs * regs)
265{
266 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
267 unsigned int fsindex,gsindex;
268 unsigned int ds,cs,es;
269
270 printk("\n");
271 print_modules();
Andi Kleen9acf23c2005-09-12 18:49:24 +0200272 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
273 current->pid, current->comm, print_tainted(),
274 system_utsname.release,
275 (int)strcspn(system_utsname.version, " "),
276 system_utsname.version);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
278 printk_address(regs->rip);
Andi Kleena88cde12005-11-05 17:25:54 +0100279 printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
280 regs->eflags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
282 regs->rax, regs->rbx, regs->rcx);
283 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
284 regs->rdx, regs->rsi, regs->rdi);
285 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
286 regs->rbp, regs->r8, regs->r9);
287 printk("R10: %016lx R11: %016lx R12: %016lx\n",
288 regs->r10, regs->r11, regs->r12);
289 printk("R13: %016lx R14: %016lx R15: %016lx\n",
290 regs->r13, regs->r14, regs->r15);
291
292 asm("movl %%ds,%0" : "=r" (ds));
293 asm("movl %%cs,%0" : "=r" (cs));
294 asm("movl %%es,%0" : "=r" (es));
295 asm("movl %%fs,%0" : "=r" (fsindex));
296 asm("movl %%gs,%0" : "=r" (gsindex));
297
298 rdmsrl(MSR_FS_BASE, fs);
299 rdmsrl(MSR_GS_BASE, gs);
300 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
301
302 asm("movq %%cr0, %0": "=r" (cr0));
303 asm("movq %%cr2, %0": "=r" (cr2));
304 asm("movq %%cr3, %0": "=r" (cr3));
305 asm("movq %%cr4, %0": "=r" (cr4));
306
307 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
308 fs,fsindex,gs,gsindex,shadowgs);
309 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
310 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
311}
312
313void show_regs(struct pt_regs *regs)
314{
Zwane Mwaikamboc078d322005-09-06 15:16:16 -0700315 printk("CPU %d:", smp_processor_id());
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 __show_regs(regs);
317 show_trace(&regs->rsp);
318}
319
320/*
321 * Free current thread data structures etc..
322 */
323void exit_thread(void)
324{
325 struct task_struct *me = current;
326 struct thread_struct *t = &me->thread;
Rusty Lynch73649da2005-06-23 00:09:23 -0700327
328 /*
329 * Remove function-return probe instances associated with this task
330 * and put them back on the free list. Do not insert an exit probe for
331 * this function, it will be disabled by kprobe_flush_task if you do.
332 */
333 kprobe_flush_task(me);
334
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335 if (me->thread.io_bitmap_ptr) {
336 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
337
338 kfree(t->io_bitmap_ptr);
339 t->io_bitmap_ptr = NULL;
340 /*
341 * Careful, clear this in the TSS too:
342 */
343 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
344 t->io_bitmap_max = 0;
345 put_cpu();
346 }
347}
348
349void flush_thread(void)
350{
351 struct task_struct *tsk = current;
352 struct thread_info *t = current_thread_info();
353
354 if (t->flags & _TIF_ABI_PENDING)
355 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
356
357 tsk->thread.debugreg0 = 0;
358 tsk->thread.debugreg1 = 0;
359 tsk->thread.debugreg2 = 0;
360 tsk->thread.debugreg3 = 0;
361 tsk->thread.debugreg6 = 0;
362 tsk->thread.debugreg7 = 0;
363 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
364 /*
365 * Forget coprocessor state..
366 */
367 clear_fpu(tsk);
368 clear_used_math();
369}
370
371void release_thread(struct task_struct *dead_task)
372{
373 if (dead_task->mm) {
374 if (dead_task->mm->context.size) {
375 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
376 dead_task->comm,
377 dead_task->mm->context.ldt,
378 dead_task->mm->context.size);
379 BUG();
380 }
381 }
382}
383
384static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
385{
386 struct user_desc ud = {
387 .base_addr = addr,
388 .limit = 0xfffff,
389 .seg_32bit = 1,
390 .limit_in_pages = 1,
391 .useable = 1,
392 };
393 struct n_desc_struct *desc = (void *)t->thread.tls_array;
394 desc += tls;
395 desc->a = LDT_entry_a(&ud);
396 desc->b = LDT_entry_b(&ud);
397}
398
399static inline u32 read_32bit_tls(struct task_struct *t, int tls)
400{
401 struct desc_struct *desc = (void *)t->thread.tls_array;
402 desc += tls;
403 return desc->base0 |
404 (((u32)desc->base1) << 16) |
405 (((u32)desc->base2) << 24);
406}
407
408/*
409 * This gets called before we allocate a new thread and copy
410 * the current task into it.
411 */
412void prepare_to_copy(struct task_struct *tsk)
413{
414 unlazy_fpu(tsk);
415}
416
417int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
418 unsigned long unused,
419 struct task_struct * p, struct pt_regs * regs)
420{
421 int err;
422 struct pt_regs * childregs;
423 struct task_struct *me = current;
424
Andi Kleena88cde12005-11-05 17:25:54 +0100425 childregs = ((struct pt_regs *)
426 (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 *childregs = *regs;
428
429 childregs->rax = 0;
430 childregs->rsp = rsp;
Andi Kleena88cde12005-11-05 17:25:54 +0100431 if (rsp == ~0UL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 childregs->rsp = (unsigned long)childregs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433
434 p->thread.rsp = (unsigned long) childregs;
435 p->thread.rsp0 = (unsigned long) (childregs+1);
436 p->thread.userrsp = me->thread.userrsp;
437
438 set_ti_thread_flag(p->thread_info, TIF_FORK);
439
440 p->thread.fs = me->thread.fs;
441 p->thread.gs = me->thread.gs;
442
H. J. Lufd51f662005-05-01 08:58:48 -0700443 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
444 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
445 asm("mov %%es,%0" : "=m" (p->thread.es));
446 asm("mov %%ds,%0" : "=m" (p->thread.ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447
448 if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
449 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
450 if (!p->thread.io_bitmap_ptr) {
451 p->thread.io_bitmap_max = 0;
452 return -ENOMEM;
453 }
Andi Kleena88cde12005-11-05 17:25:54 +0100454 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
455 IO_BITMAP_BYTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 }
457
458 /*
459 * Set a new TLS for the child thread?
460 */
461 if (clone_flags & CLONE_SETTLS) {
462#ifdef CONFIG_IA32_EMULATION
463 if (test_thread_flag(TIF_IA32))
464 err = ia32_child_tls(p, childregs);
465 else
466#endif
467 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
468 if (err)
469 goto out;
470 }
471 err = 0;
472out:
473 if (err && p->thread.io_bitmap_ptr) {
474 kfree(p->thread.io_bitmap_ptr);
475 p->thread.io_bitmap_max = 0;
476 }
477 return err;
478}
479
480/*
481 * This special macro can be used to load a debugging register
482 */
483#define loaddebug(thread,r) set_debug(thread->debugreg ## r, r)
484
485/*
486 * switch_to(x,y) should switch tasks from x to y.
487 *
488 * This could still be optimized:
489 * - fold all the options into a flag word and test it with a single test.
490 * - could test fs/gs bitsliced
491 */
Andi Kleena88cde12005-11-05 17:25:54 +0100492struct task_struct *
493__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494{
495 struct thread_struct *prev = &prev_p->thread,
496 *next = &next_p->thread;
497 int cpu = smp_processor_id();
498 struct tss_struct *tss = &per_cpu(init_tss, cpu);
499
500 unlazy_fpu(prev_p);
501
502 /*
503 * Reload esp0, LDT and the page table pointer:
504 */
505 tss->rsp0 = next->rsp0;
506
507 /*
508 * Switch DS and ES.
509 * This won't pick up thread selector changes, but I guess that is ok.
510 */
H. J. Lufd51f662005-05-01 08:58:48 -0700511 asm volatile("mov %%es,%0" : "=m" (prev->es));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512 if (unlikely(next->es | prev->es))
513 loadsegment(es, next->es);
514
H. J. Lufd51f662005-05-01 08:58:48 -0700515 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 if (unlikely(next->ds | prev->ds))
517 loadsegment(ds, next->ds);
518
519 load_TLS(next, cpu);
520
521 /*
522 * Switch FS and GS.
523 */
524 {
525 unsigned fsindex;
526 asm volatile("movl %%fs,%0" : "=r" (fsindex));
527 /* segment register != 0 always requires a reload.
528 also reload when it has changed.
529 when prev process used 64bit base always reload
530 to avoid an information leak. */
531 if (unlikely(fsindex | next->fsindex | prev->fs)) {
532 loadsegment(fs, next->fsindex);
533 /* check if the user used a selector != 0
534 * if yes clear 64bit base, since overloaded base
535 * is always mapped to the Null selector
536 */
537 if (fsindex)
538 prev->fs = 0;
539 }
540 /* when next process has a 64bit base use it */
541 if (next->fs)
542 wrmsrl(MSR_FS_BASE, next->fs);
543 prev->fsindex = fsindex;
544 }
545 {
546 unsigned gsindex;
547 asm volatile("movl %%gs,%0" : "=r" (gsindex));
548 if (unlikely(gsindex | next->gsindex | prev->gs)) {
549 load_gs_index(next->gsindex);
550 if (gsindex)
551 prev->gs = 0;
552 }
553 if (next->gs)
554 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
555 prev->gsindex = gsindex;
556 }
557
558 /*
559 * Switch the PDA context.
560 */
561 prev->userrsp = read_pda(oldrsp);
562 write_pda(oldrsp, next->userrsp);
563 write_pda(pcurrent, next_p);
Andi Kleena88cde12005-11-05 17:25:54 +0100564 write_pda(kernelstack,
565 (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566
567 /*
568 * Now maybe reload the debug registers
569 */
570 if (unlikely(next->debugreg7)) {
571 loaddebug(next, 0);
572 loaddebug(next, 1);
573 loaddebug(next, 2);
574 loaddebug(next, 3);
575 /* no 4 and 5 */
576 loaddebug(next, 6);
577 loaddebug(next, 7);
578 }
579
580
581 /*
582 * Handle the IO bitmap
583 */
584 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
585 if (next->io_bitmap_ptr)
586 /*
587 * Copy the relevant range of the IO bitmap.
588 * Normally this is 128 bytes or less:
589 */
590 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
591 max(prev->io_bitmap_max, next->io_bitmap_max));
592 else {
593 /*
594 * Clear any possible leftover bits:
595 */
596 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
597 }
598 }
599
600 return prev_p;
601}
602
603/*
604 * sys_execve() executes a new program.
605 */
606asmlinkage
607long sys_execve(char __user *name, char __user * __user *argv,
608 char __user * __user *envp, struct pt_regs regs)
609{
610 long error;
611 char * filename;
612
613 filename = getname(name);
614 error = PTR_ERR(filename);
615 if (IS_ERR(filename))
616 return error;
617 error = do_execve(filename, argv, envp, &regs);
618 if (error == 0) {
619 task_lock(current);
620 current->ptrace &= ~PT_DTRACE;
621 task_unlock(current);
622 }
623 putname(filename);
624 return error;
625}
626
627void set_personality_64bit(void)
628{
629 /* inherit personality from parent */
630
631 /* Make sure to be in 64bit mode */
632 clear_thread_flag(TIF_IA32);
633
634 /* TBD: overwrites user setup. Should have two bits.
635 But 64bit processes have always behaved this way,
636 so it's not too bad. The main problem is just that
637 32bit childs are affected again. */
638 current->personality &= ~READ_IMPLIES_EXEC;
639}
640
641asmlinkage long sys_fork(struct pt_regs *regs)
642{
643 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
644}
645
Andi Kleena88cde12005-11-05 17:25:54 +0100646asmlinkage long
647sys_clone(unsigned long clone_flags, unsigned long newsp,
648 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649{
650 if (!newsp)
651 newsp = regs->rsp;
652 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
653}
654
655/*
656 * This is trivial, and on the face of it looks like it
657 * could equally well be done in user mode.
658 *
659 * Not so, for quite unobvious reasons - register pressure.
660 * In user mode vfork() cannot have a stack frame, and if
661 * done by calling the "clone()" system call directly, you
662 * do not have enough call-clobbered registers to hold all
663 * the information you need.
664 */
665asmlinkage long sys_vfork(struct pt_regs *regs)
666{
667 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
668 NULL, NULL);
669}
670
671unsigned long get_wchan(struct task_struct *p)
672{
673 unsigned long stack;
674 u64 fp,rip;
675 int count = 0;
676
677 if (!p || p == current || p->state==TASK_RUNNING)
678 return 0;
679 stack = (unsigned long)p->thread_info;
680 if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
681 return 0;
682 fp = *(u64 *)(p->thread.rsp);
683 do {
Andi Kleena88cde12005-11-05 17:25:54 +0100684 if (fp < (unsigned long)stack ||
685 fp > (unsigned long)stack+THREAD_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686 return 0;
687 rip = *(u64 *)(fp+8);
688 if (!in_sched_functions(rip))
689 return rip;
690 fp = *(u64 *)fp;
691 } while (count++ < 16);
692 return 0;
693}
694
695long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
696{
697 int ret = 0;
698 int doit = task == current;
699 int cpu;
700
701 switch (code) {
702 case ARCH_SET_GS:
Suresh Siddha84929802005-06-21 17:14:32 -0700703 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 return -EPERM;
705 cpu = get_cpu();
706 /* handle small bases via the GDT because that's faster to
707 switch. */
708 if (addr <= 0xffffffff) {
709 set_32bit_tls(task, GS_TLS, addr);
710 if (doit) {
711 load_TLS(&task->thread, cpu);
712 load_gs_index(GS_TLS_SEL);
713 }
714 task->thread.gsindex = GS_TLS_SEL;
715 task->thread.gs = 0;
716 } else {
717 task->thread.gsindex = 0;
718 task->thread.gs = addr;
719 if (doit) {
Andi Kleena88cde12005-11-05 17:25:54 +0100720 load_gs_index(0);
721 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 }
723 }
724 put_cpu();
725 break;
726 case ARCH_SET_FS:
727 /* Not strictly needed for fs, but do it for symmetry
728 with gs */
Suresh Siddha84929802005-06-21 17:14:32 -0700729 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 return -EPERM;
731 cpu = get_cpu();
732 /* handle small bases via the GDT because that's faster to
733 switch. */
734 if (addr <= 0xffffffff) {
735 set_32bit_tls(task, FS_TLS, addr);
736 if (doit) {
737 load_TLS(&task->thread, cpu);
Andi Kleena88cde12005-11-05 17:25:54 +0100738 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 }
740 task->thread.fsindex = FS_TLS_SEL;
741 task->thread.fs = 0;
742 } else {
743 task->thread.fsindex = 0;
744 task->thread.fs = addr;
745 if (doit) {
746 /* set the selector to 0 to not confuse
747 __switch_to */
Andi Kleena88cde12005-11-05 17:25:54 +0100748 asm volatile("movl %0,%%fs" :: "r" (0));
749 ret = checking_wrmsrl(MSR_FS_BASE, addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 }
751 }
752 put_cpu();
753 break;
754 case ARCH_GET_FS: {
755 unsigned long base;
756 if (task->thread.fsindex == FS_TLS_SEL)
757 base = read_32bit_tls(task, FS_TLS);
Andi Kleena88cde12005-11-05 17:25:54 +0100758 else if (doit)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 rdmsrl(MSR_FS_BASE, base);
Andi Kleena88cde12005-11-05 17:25:54 +0100760 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 base = task->thread.fs;
762 ret = put_user(base, (unsigned long __user *)addr);
763 break;
764 }
765 case ARCH_GET_GS: {
766 unsigned long base;
767 if (task->thread.gsindex == GS_TLS_SEL)
768 base = read_32bit_tls(task, GS_TLS);
Andi Kleena88cde12005-11-05 17:25:54 +0100769 else if (doit)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 rdmsrl(MSR_KERNEL_GS_BASE, base);
Andi Kleena88cde12005-11-05 17:25:54 +0100771 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 base = task->thread.gs;
773 ret = put_user(base, (unsigned long __user *)addr);
774 break;
775 }
776
777 default:
778 ret = -EINVAL;
779 break;
780 }
781
782 return ret;
783}
784
785long sys_arch_prctl(int code, unsigned long addr)
786{
787 return do_arch_prctl(current, code, addr);
788}
789
790/*
791 * Capture the user space registers if the task is not running (in user space)
792 */
793int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
794{
795 struct pt_regs *pp, ptregs;
796
797 pp = (struct pt_regs *)(tsk->thread.rsp0);
798 --pp;
799
800 ptregs = *pp;
801 ptregs.cs &= 0xffff;
802 ptregs.ss &= 0xffff;
803
804 elf_core_copy_regs(regs, &ptregs);
805
806 return 1;
807}
808
809unsigned long arch_align_stack(unsigned long sp)
810{
811 if (randomize_va_space)
812 sp -= get_random_int() % 8192;
813 return sp & ~0xf;
814}