Blame - kernel/exit.c - android_kernel_lge_hammerhead

blob: 22399caf75743269f1ed9166822264e372b22fbe [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/kernel/exit.c
				3	*
				4	* Copyright (C) 1991, 1992 Linus Torvalds
				5	*/
				6
				7	#include <linux/config.h>
				8	#include <linux/mm.h>
				9	#include <linux/slab.h>
				10	#include <linux/interrupt.h>
				11	#include <linux/smp_lock.h>
				12	#include <linux/module.h>
Randy.Dunlap	c59ede7	2006-01-11 12:17:46 -0800	[diff] [blame]	13	#include <linux/capability.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	14	#include <linux/completion.h>
				15	#include <linux/personality.h>
				16	#include <linux/tty.h>
				17	#include <linux/namespace.h>
				18	#include <linux/key.h>
				19	#include <linux/security.h>
				20	#include <linux/cpu.h>
				21	#include <linux/acct.h>
				22	#include <linux/file.h>
				23	#include <linux/binfmts.h>
				24	#include <linux/ptrace.h>
				25	#include <linux/profile.h>
				26	#include <linux/mount.h>
				27	#include <linux/proc_fs.h>
				28	#include <linux/mempolicy.h>
				29	#include <linux/cpuset.h>
				30	#include <linux/syscalls.h>
Jesper Juhl	7ed20e1	2005-05-01 08:59:14 -0700	[diff] [blame]	31	#include <linux/signal.h>
Oleg Nesterov	6a14c5c	2006-03-28 16:11:18 -0800	[diff] [blame]	32	#include <linux/posix-timers.h>
Matt Helsley	9f46080	2005-11-07 00:59:16 -0800	[diff] [blame]	33	#include <linux/cn_proc.h>
Ingo Molnar	de5097c	2006-01-09 15:59:21 -0800	[diff] [blame]	34	#include <linux/mutex.h>
Ingo Molnar	0771dfe	2006-03-27 01:16:22 -0800	[diff] [blame]	35	#include <linux/futex.h>
Ingo Molnar	34f192c	2006-03-27 01:16:24 -0800	[diff] [blame]	36	#include <linux/compat.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	37
				38	#include <asm/uaccess.h>
				39	#include <asm/unistd.h>
				40	#include <asm/pgtable.h>
				41	#include <asm/mmu_context.h>
				42
				43	extern void sem_exit (void);
				44	extern struct task_struct *child_reaper;
				45
				46	int getrusage(struct task_struct , int, struct rusage __user );
				47
Adrian Bunk	408b664	2005-05-01 08:59:29 -0700	[diff] [blame]	48	static void exit_mm(struct task_struct * tsk);
				49
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	50	static void __unhash_process(struct task_struct *p)
				51	{
				52	nr_threads--;
				53	detach_pid(p, PIDTYPE_PID);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	54	if (thread_group_leader(p)) {
				55	detach_pid(p, PIDTYPE_PGID);
				56	detach_pid(p, PIDTYPE_SID);
Oleg Nesterov	c97d989	2006-03-28 16:11:06 -0800	[diff] [blame]	57
				58	list_del_init(&p->tasks);
Oleg Nesterov	73b9ebf	2006-03-28 16:11:07 -0800	[diff] [blame]	59	__get_cpu_var(process_counts)--;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	60	}
Oleg Nesterov	47e6532	2006-03-28 16:11:25 -0800	[diff] [blame^]	61	list_del_rcu(&p->thread_group);
Oleg Nesterov	c97d989	2006-03-28 16:11:06 -0800	[diff] [blame]	62	remove_parent(p);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	63	}
				64
Oleg Nesterov	6a14c5c	2006-03-28 16:11:18 -0800	[diff] [blame]	65	/*
				66	* This function expects the tasklist_lock write-locked.
				67	*/
				68	static void __exit_signal(struct task_struct *tsk)
				69	{
				70	struct signal_struct *sig = tsk->signal;
				71	struct sighand_struct *sighand;
				72
				73	BUG_ON(!sig);
				74	BUG_ON(!atomic_read(&sig->count));
				75
				76	rcu_read_lock();
				77	sighand = rcu_dereference(tsk->sighand);
				78	spin_lock(&sighand->siglock);
				79
				80	posix_cpu_timers_exit(tsk);
				81	if (atomic_dec_and_test(&sig->count))
				82	posix_cpu_timers_exit_group(tsk);
				83	else {
				84	/*
				85	* If there is any task waiting for the group exit
				86	* then notify it:
				87	*/
				88	if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) {
				89	wake_up_process(sig->group_exit_task);
				90	sig->group_exit_task = NULL;
				91	}
				92	if (tsk == sig->curr_target)
				93	sig->curr_target = next_thread(tsk);
				94	/*
				95	* Accumulate here the counters for all threads but the
				96	* group leader as they die, so they can be added into
				97	* the process-wide totals when those are taken.
				98	* The group leader stays around as a zombie as long
				99	* as there are other threads. When it gets reaped,
				100	* the exit.c code will add its counts into these totals.
				101	* We won't ever get here for the group leader, since it
				102	* will have been the last reference on the signal_struct.
				103	*/
				104	sig->utime = cputime_add(sig->utime, tsk->utime);
				105	sig->stime = cputime_add(sig->stime, tsk->stime);
				106	sig->min_flt += tsk->min_flt;
				107	sig->maj_flt += tsk->maj_flt;
				108	sig->nvcsw += tsk->nvcsw;
				109	sig->nivcsw += tsk->nivcsw;
				110	sig->sched_time += tsk->sched_time;
				111	sig = NULL; /* Marker for below. */
				112	}
				113
Oleg Nesterov	5876700	2006-03-28 16:11:20 -0800	[diff] [blame]	114	__unhash_process(tsk);
				115
Oleg Nesterov	6a14c5c	2006-03-28 16:11:18 -0800	[diff] [blame]	116	tsk->signal = NULL;
				117	cleanup_sighand(tsk);
				118	spin_unlock(&sighand->siglock);
				119	rcu_read_unlock();
				120
				121	clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
				122	flush_sigqueue(&tsk->pending);
				123	if (sig) {
				124	flush_sigqueue(&sig->shared_pending);
				125	__cleanup_signal(sig);
				126	}
				127	}
				128
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	129	void release_task(struct task_struct * p)
				130	{
				131	int zap_leader;
				132	task_t *leader;
				133	struct dentry *proc_dentry;
				134
Oleg Nesterov	1f09f97	2006-03-28 16:11:11 -0800	[diff] [blame]	135	repeat:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	136	atomic_dec(&p->user->processes);
				137	spin_lock(&p->proc_lock);
				138	proc_dentry = proc_pid_unhash(p);
				139	write_lock_irq(&tasklist_lock);
Oleg Nesterov	1f09f97	2006-03-28 16:11:11 -0800	[diff] [blame]	140	ptrace_unlink(p);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	141	BUG_ON(!list_empty(&p->ptrace_list) \|\| !list_empty(&p->ptrace_children));
				142	__exit_signal(p);
Oleg Nesterov	35f5cad	2006-03-28 16:11:19 -0800	[diff] [blame]	143
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	144	/*
				145	* If we are the last non-leader member of the thread
				146	* group, and the leader is zombie, then notify the
				147	* group leader's parent process. (if it wants notification.)
				148	*/
				149	zap_leader = 0;
				150	leader = p->group_leader;
				151	if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
				152	BUG_ON(leader->exit_signal == -1);
				153	do_notify_parent(leader, leader->exit_signal);
				154	/*
				155	* If we were the last child thread and the leader has
				156	* exited already, and the leader's parent ignores SIGCHLD,
				157	* then we are the one who should release the leader.
				158	*
				159	* do_notify_parent() will have marked it self-reaping in
				160	* that case.
				161	*/
				162	zap_leader = (leader->exit_signal == -1);
				163	}
				164
				165	sched_exit(p);
				166	write_unlock_irq(&tasklist_lock);
				167	spin_unlock(&p->proc_lock);
				168	proc_pid_flush(proc_dentry);
				169	release_thread(p);
				170	put_task_struct(p);
				171
				172	p = leader;
				173	if (unlikely(zap_leader))
				174	goto repeat;
				175	}
				176
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	177	/*
				178	* This checks not only the pgrp, but falls back on the pid if no
				179	* satisfactory pgrp is found. I dunno - gdb doesn't work correctly
				180	* without this...
				181	*/
				182	int session_of_pgrp(int pgrp)
				183	{
				184	struct task_struct *p;
				185	int sid = -1;
				186
				187	read_lock(&tasklist_lock);
				188	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
				189	if (p->signal->session > 0) {
				190	sid = p->signal->session;
				191	goto out;
				192	}
				193	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
				194	p = find_task_by_pid(pgrp);
				195	if (p)
				196	sid = p->signal->session;
				197	out:
				198	read_unlock(&tasklist_lock);
				199
				200	return sid;
				201	}
				202
				203	/*
				204	* Determine if a process group is "orphaned", according to the POSIX
				205	* definition in 2.2.2.52. Orphaned process groups are not to be affected
				206	* by terminal-generated stop signals. Newly orphaned process groups are
				207	* to receive a SIGHUP and a SIGCONT.
				208	*
				209	* "I ask you, have you ever known what it is to be an orphan?"
				210	*/
				211	static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
				212	{
				213	struct task_struct *p;
				214	int ret = 1;
				215
				216	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
				217	if (p == ignored_task
				218	\|\| p->exit_state
				219	\|\| p->real_parent->pid == 1)
				220	continue;
				221	if (process_group(p->real_parent) != pgrp
				222	&& p->real_parent->signal->session == p->signal->session) {
				223	ret = 0;
				224	break;
				225	}
				226	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
				227	return ret; /* (sighing) "Often!" */
				228	}
				229
				230	int is_orphaned_pgrp(int pgrp)
				231	{
				232	int retval;
				233
				234	read_lock(&tasklist_lock);
				235	retval = will_become_orphaned_pgrp(pgrp, NULL);
				236	read_unlock(&tasklist_lock);
				237
				238	return retval;
				239	}
				240
Arjan van de Ven	858119e	2006-01-14 13:20:43 -0800	[diff] [blame]	241	static int has_stopped_jobs(int pgrp)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	242	{
				243	int retval = 0;
				244	struct task_struct *p;
				245
				246	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
				247	if (p->state != TASK_STOPPED)
				248	continue;
				249
				250	/* If p is stopped by a debugger on a signal that won't
				251	stop it, then don't count p as stopped. This isn't
				252	perfect but it's a good approximation. */
				253	if (unlikely (p->ptrace)
				254	&& p->exit_code != SIGSTOP
				255	&& p->exit_code != SIGTSTP
				256	&& p->exit_code != SIGTTOU
				257	&& p->exit_code != SIGTTIN)
				258	continue;
				259
				260	retval = 1;
				261	break;
				262	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
				263	return retval;
				264	}
				265
				266	/**
Pavel Pisa	4dc3b16	2005-05-01 08:59:25 -0700	[diff] [blame]	267	* reparent_to_init - Reparent the calling kernel thread to the init task.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	268	*
				269	* If a kernel thread is launched as a result of a system call, or if
				270	* it ever exits, it should generally reparent itself to init so that
				271	* it is correctly cleaned up on exit.
				272	*
				273	* The various task state such as scheduling policy and priority may have
				274	* been inherited from a user process, so we reset them to sane values here.
				275	*
				276	* NOTE that reparent_to_init() gives the caller full capabilities.
				277	*/
Arjan van de Ven	858119e	2006-01-14 13:20:43 -0800	[diff] [blame]	278	static void reparent_to_init(void)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	279	{
				280	write_lock_irq(&tasklist_lock);
				281
				282	ptrace_unlink(current);
				283	/* Reparent to init */
Oleg Nesterov	9b678ec	2006-03-28 16:11:05 -0800	[diff] [blame]	284	remove_parent(current);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	285	current->parent = child_reaper;
				286	current->real_parent = child_reaper;
Oleg Nesterov	9b678ec	2006-03-28 16:11:05 -0800	[diff] [blame]	287	add_parent(current);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	288
				289	/* Set the exit signal to SIGCHLD so we signal init on exit */
				290	current->exit_signal = SIGCHLD;
				291
Ingo Molnar	b0a9499	2006-01-14 13:20:41 -0800	[diff] [blame]	292	if ((current->policy == SCHED_NORMAL \|\|
				293	current->policy == SCHED_BATCH)
				294	&& (task_nice(current) < 0))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	295	set_user_nice(current, 0);
				296	/* cpus_allowed? */
				297	/* rt_priority? */
				298	/* signals? */
				299	security_task_reparent_to_init(current);
				300	memcpy(current->signal->rlim, init_task.signal->rlim,
				301	sizeof(current->signal->rlim));
				302	atomic_inc(&(INIT_USER->__count));
				303	write_unlock_irq(&tasklist_lock);
				304	switch_uid(INIT_USER);
				305	}
				306
				307	void __set_special_pids(pid_t session, pid_t pgrp)
				308	{
Oren Laadan	e19f247	2006-01-08 01:03:58 -0800	[diff] [blame]	309	struct task_struct *curr = current->group_leader;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	310
				311	if (curr->signal->session != session) {
				312	detach_pid(curr, PIDTYPE_SID);
				313	curr->signal->session = session;
				314	attach_pid(curr, PIDTYPE_SID, session);
				315	}
				316	if (process_group(curr) != pgrp) {
				317	detach_pid(curr, PIDTYPE_PGID);
				318	curr->signal->pgrp = pgrp;
				319	attach_pid(curr, PIDTYPE_PGID, pgrp);
				320	}
				321	}
				322
				323	void set_special_pids(pid_t session, pid_t pgrp)
				324	{
				325	write_lock_irq(&tasklist_lock);
				326	__set_special_pids(session, pgrp);
				327	write_unlock_irq(&tasklist_lock);
				328	}
				329
				330	/*
				331	* Let kernel threads use this to say that they
				332	* allow a certain signal (since daemonize() will
				333	* have disabled all of them by default).
				334	*/
				335	int allow_signal(int sig)
				336	{
Jesper Juhl	7ed20e1	2005-05-01 08:59:14 -0700	[diff] [blame]	337	if (!valid_signal(sig) \|\| sig < 1)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	338	return -EINVAL;
				339
				340	spin_lock_irq(&current->sighand->siglock);
				341	sigdelset(&current->blocked, sig);
				342	if (!current->mm) {
				343	/* Kernel threads handle their own signals.
				344	Let the signal code know it'll be handled, so
				345	that they don't get converted to SIGKILL or
				346	just silently dropped */
				347	current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
				348	}
				349	recalc_sigpending();
				350	spin_unlock_irq(&current->sighand->siglock);
				351	return 0;
				352	}
				353
				354	EXPORT_SYMBOL(allow_signal);
				355
				356	int disallow_signal(int sig)
				357	{
Jesper Juhl	7ed20e1	2005-05-01 08:59:14 -0700	[diff] [blame]	358	if (!valid_signal(sig) \|\| sig < 1)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	359	return -EINVAL;
				360
				361	spin_lock_irq(&current->sighand->siglock);
				362	sigaddset(&current->blocked, sig);
				363	recalc_sigpending();
				364	spin_unlock_irq(&current->sighand->siglock);
				365	return 0;
				366	}
				367
				368	EXPORT_SYMBOL(disallow_signal);
				369
				370	/*
				371	* Put all the gunge required to become a kernel thread without
				372	* attached user resources in one place where it belongs.
				373	*/
				374
				375	void daemonize(const char *name, ...)
				376	{
				377	va_list args;
				378	struct fs_struct *fs;
				379	sigset_t blocked;
				380
				381	va_start(args, name);
				382	vsnprintf(current->comm, sizeof(current->comm), name, args);
				383	va_end(args);
				384
				385	/*
				386	* If we were started as result of loading a module, close all of the
				387	* user space pages. We don't need them, and if we didn't close them
				388	* they would be locked into memory.
				389	*/
				390	exit_mm(current);
				391
				392	set_special_pids(1, 1);
Ingo Molnar	70522e1	2006-03-23 03:00:31 -0800	[diff] [blame]	393	mutex_lock(&tty_mutex);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	394	current->signal->tty = NULL;
Ingo Molnar	70522e1	2006-03-23 03:00:31 -0800	[diff] [blame]	395	mutex_unlock(&tty_mutex);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	396
				397	/* Block and flush all signals */
				398	sigfillset(&blocked);
				399	sigprocmask(SIG_BLOCK, &blocked, NULL);
				400	flush_signals(current);
				401
				402	/* Become as one with the init task */
				403
				404	exit_fs(current); /* current->fs->count--; */
				405	fs = init_task.fs;
				406	current->fs = fs;
				407	atomic_inc(&fs->count);
Björn Steinbrink	5914811	2006-02-18 18:12:43 +0100	[diff] [blame]	408	exit_namespace(current);
				409	current->namespace = init_task.namespace;
				410	get_namespace(current->namespace);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	411	exit_files(current);
				412	current->files = init_task.files;
				413	atomic_inc(&current->files->count);
				414
				415	reparent_to_init();
				416	}
				417
				418	EXPORT_SYMBOL(daemonize);
				419
Arjan van de Ven	858119e	2006-01-14 13:20:43 -0800	[diff] [blame]	420	static void close_files(struct files_struct * files)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	421	{
				422	int i, j;
Dipankar Sarma	badf166	2005-09-09 13:04:10 -0700	[diff] [blame]	423	struct fdtable *fdt;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	424
				425	j = 0;
Dipankar Sarma	4fb3a53	2005-09-16 19:28:13 -0700	[diff] [blame]	426
				427	/*
				428	* It is safe to dereference the fd table without RCU or
				429	* ->file_lock because this is the last reference to the
				430	* files structure.
				431	*/
Dipankar Sarma	badf166	2005-09-09 13:04:10 -0700	[diff] [blame]	432	fdt = files_fdtable(files);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	433	for (;;) {
				434	unsigned long set;
				435	i = j * __NFDBITS;
Dipankar Sarma	badf166	2005-09-09 13:04:10 -0700	[diff] [blame]	436	if (i >= fdt->max_fdset \|\| i >= fdt->max_fds)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	437	break;
Dipankar Sarma	badf166	2005-09-09 13:04:10 -0700	[diff] [blame]	438	set = fdt->open_fds->fds_bits[j++];
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	439	while (set) {
				440	if (set & 1) {
Dipankar Sarma	badf166	2005-09-09 13:04:10 -0700	[diff] [blame]	441	struct file * file = xchg(&fdt->fd[i], NULL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	442	if (file)
				443	filp_close(file, files);
				444	}
				445	i++;
				446	set >>= 1;
				447	}
				448	}
				449	}
				450
				451	struct files_struct get_files_struct(struct task_struct task)
				452	{
				453	struct files_struct *files;
				454
				455	task_lock(task);
				456	files = task->files;
				457	if (files)
				458	atomic_inc(&files->count);
				459	task_unlock(task);
				460
				461	return files;
				462	}
				463
				464	void fastcall put_files_struct(struct files_struct *files)
				465	{
Dipankar Sarma	badf166	2005-09-09 13:04:10 -0700	[diff] [blame]	466	struct fdtable *fdt;
				467
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	468	if (atomic_dec_and_test(&files->count)) {
				469	close_files(files);
				470	/*
				471	* Free the fd and fdset arrays if we expanded them.
Dipankar Sarma	ab2af1f	2005-09-09 13:04:13 -0700	[diff] [blame]	472	* If the fdtable was embedded, pass files for freeing
				473	* at the end of the RCU grace period. Otherwise,
				474	* you can free files immediately.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	475	*/
Dipankar Sarma	badf166	2005-09-09 13:04:10 -0700	[diff] [blame]	476	fdt = files_fdtable(files);
Dipankar Sarma	ab2af1f	2005-09-09 13:04:13 -0700	[diff] [blame]	477	if (fdt == &files->fdtab)
				478	fdt->free_files = files;
				479	else
				480	kmem_cache_free(files_cachep, files);
				481	free_fdtable(fdt);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	482	}
				483	}
				484
				485	EXPORT_SYMBOL(put_files_struct);
				486
				487	static inline void __exit_files(struct task_struct *tsk)
				488	{
				489	struct files_struct * files = tsk->files;
				490
				491	if (files) {
				492	task_lock(tsk);
				493	tsk->files = NULL;
				494	task_unlock(tsk);
				495	put_files_struct(files);
				496	}
				497	}
				498
				499	void exit_files(struct task_struct *tsk)
				500	{
				501	__exit_files(tsk);
				502	}
				503
				504	static inline void __put_fs_struct(struct fs_struct *fs)
				505	{
				506	/* No need to hold fs->lock if we are killing it */
				507	if (atomic_dec_and_test(&fs->count)) {
				508	dput(fs->root);
				509	mntput(fs->rootmnt);
				510	dput(fs->pwd);
				511	mntput(fs->pwdmnt);
				512	if (fs->altroot) {
				513	dput(fs->altroot);
				514	mntput(fs->altrootmnt);
				515	}
				516	kmem_cache_free(fs_cachep, fs);
				517	}
				518	}
				519
				520	void put_fs_struct(struct fs_struct *fs)
				521	{
				522	__put_fs_struct(fs);
				523	}
				524
				525	static inline void __exit_fs(struct task_struct *tsk)
				526	{
				527	struct fs_struct * fs = tsk->fs;
				528
				529	if (fs) {
				530	task_lock(tsk);
				531	tsk->fs = NULL;
				532	task_unlock(tsk);
				533	__put_fs_struct(fs);
				534	}
				535	}
				536
				537	void exit_fs(struct task_struct *tsk)
				538	{
				539	__exit_fs(tsk);
				540	}
				541
				542	EXPORT_SYMBOL_GPL(exit_fs);
				543
				544	/*
				545	* Turn us into a lazy TLB process if we
				546	* aren't already..
				547	*/
Adrian Bunk	408b664	2005-05-01 08:59:29 -0700	[diff] [blame]	548	static void exit_mm(struct task_struct * tsk)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	549	{
				550	struct mm_struct *mm = tsk->mm;
				551
				552	mm_release(tsk, mm);
				553	if (!mm)
				554	return;
				555	/*
				556	* Serialize with any possible pending coredump.
				557	* We must hold mmap_sem around checking core_waiters
				558	* and clearing tsk->mm. The core-inducing thread
				559	* will increment core_waiters for each thread in the
				560	* group with ->mm != NULL.
				561	*/
				562	down_read(&mm->mmap_sem);
				563	if (mm->core_waiters) {
				564	up_read(&mm->mmap_sem);
				565	down_write(&mm->mmap_sem);
				566	if (!--mm->core_waiters)
				567	complete(mm->core_startup_done);
				568	up_write(&mm->mmap_sem);
				569
				570	wait_for_completion(&mm->core_done);
				571	down_read(&mm->mmap_sem);
				572	}
				573	atomic_inc(&mm->mm_count);
				574	if (mm != tsk->active_mm) BUG();
				575	/* more a memory barrier than a real lock */
				576	task_lock(tsk);
				577	tsk->mm = NULL;
				578	up_read(&mm->mmap_sem);
				579	enter_lazy_tlb(mm, current);
				580	task_unlock(tsk);
				581	mmput(mm);
				582	}
				583
Oleg Nesterov	d799f03	2006-03-28 16:11:04 -0800	[diff] [blame]	584	static inline void choose_new_parent(task_t p, task_t reaper)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	585	{
				586	/*
				587	* Make sure we're not reparenting to ourselves and that
				588	* the parent is not a zombie.
				589	*/
Oleg Nesterov	d799f03	2006-03-28 16:11:04 -0800	[diff] [blame]	590	BUG_ON(p == reaper \|\| reaper->exit_state);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	591	p->real_parent = reaper;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	592	}
				593
Arjan van de Ven	858119e	2006-01-14 13:20:43 -0800	[diff] [blame]	594	static void reparent_thread(task_t p, task_t father, int traced)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	595	{
				596	/* We don't want people slaying init. */
				597	if (p->exit_signal != -1)
				598	p->exit_signal = SIGCHLD;
				599
				600	if (p->pdeath_signal)
				601	/* We already hold the tasklist_lock here. */
Oleg Nesterov	b67a1b9	2005-10-30 15:03:44 -0800	[diff] [blame]	602	group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	603
				604	/* Move the child from its dying parent to the new one. */
				605	if (unlikely(traced)) {
				606	/* Preserve ptrace links if someone else is tracing this child. */
				607	list_del_init(&p->ptrace_list);
				608	if (p->parent != p->real_parent)
				609	list_add(&p->ptrace_list, &p->real_parent->ptrace_children);
				610	} else {
				611	/* If this child is being traced, then we're the one tracing it
				612	* anyway, so let go of it.
				613	*/
				614	p->ptrace = 0;
Oleg Nesterov	6ac781b	2006-03-28 16:11:09 -0800	[diff] [blame]	615	remove_parent(p);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	616	p->parent = p->real_parent;
Oleg Nesterov	6ac781b	2006-03-28 16:11:09 -0800	[diff] [blame]	617	add_parent(p);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	618
				619	/* If we'd notified the old parent about this child's death,
				620	* also notify the new parent.
				621	*/
				622	if (p->exit_state == EXIT_ZOMBIE && p->exit_signal != -1 &&
				623	thread_group_empty(p))
				624	do_notify_parent(p, p->exit_signal);
				625	else if (p->state == TASK_TRACED) {
				626	/*
				627	* If it was at a trace stop, turn it into
				628	* a normal stop since it's no longer being
				629	* traced.
				630	*/
				631	ptrace_untrace(p);
				632	}
				633	}
				634
				635	/*
				636	* process group orphan check
				637	* Case ii: Our child is in a different pgrp
				638	* than we are, and it was the only connection
				639	* outside, so the child pgrp is now orphaned.
				640	*/
				641	if ((process_group(p) != process_group(father)) &&
				642	(p->signal->session == father->signal->session)) {
				643	int pgrp = process_group(p);
				644
				645	if (will_become_orphaned_pgrp(pgrp, NULL) && has_stopped_jobs(pgrp)) {
Oleg Nesterov	b67a1b9	2005-10-30 15:03:44 -0800	[diff] [blame]	646	__kill_pg_info(SIGHUP, SEND_SIG_PRIV, pgrp);
				647	__kill_pg_info(SIGCONT, SEND_SIG_PRIV, pgrp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	648	}
				649	}
				650	}
				651
				652	/*
				653	* When we die, we re-parent all our children.
				654	* Try to give them to another thread in our thread
				655	* group, and if no such member exists, give it to
				656	* the global child reaper process (ie "init")
				657	*/
Arjan van de Ven	858119e	2006-01-14 13:20:43 -0800	[diff] [blame]	658	static void forget_original_parent(struct task_struct * father,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	659	struct list_head *to_release)
				660	{
				661	struct task_struct p, reaper = father;
				662	struct list_head _p, _n;
				663
				664	do {
				665	reaper = next_thread(reaper);
				666	if (reaper == father) {
				667	reaper = child_reaper;
				668	break;
				669	}
				670	} while (reaper->exit_state);
				671
				672	/*
				673	* There are only two places where our children can be:
				674	*
				675	* - in our child list
				676	* - in our ptraced child list
				677	*
				678	* Search them and reparent children.
				679	*/
				680	list_for_each_safe(_p, _n, &father->children) {
				681	int ptrace;
				682	p = list_entry(_p,struct task_struct,sibling);
				683
				684	ptrace = p->ptrace;
				685
				686	/* if father isn't the real parent, then ptrace must be enabled */
				687	BUG_ON(father != p->real_parent && !ptrace);
				688
				689	if (father == p->real_parent) {
				690	/* reparent with a reaper, real father it's us */
Oleg Nesterov	d799f03	2006-03-28 16:11:04 -0800	[diff] [blame]	691	choose_new_parent(p, reaper);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	692	reparent_thread(p, father, 0);
				693	} else {
				694	/* reparent ptraced task to its real parent */
				695	__ptrace_unlink (p);
				696	if (p->exit_state == EXIT_ZOMBIE && p->exit_signal != -1 &&
				697	thread_group_empty(p))
				698	do_notify_parent(p, p->exit_signal);
				699	}
				700
				701	/*
				702	* if the ptraced child is a zombie with exit_signal == -1
				703	* we must collect it before we exit, or it will remain
				704	* zombie forever since we prevented it from self-reap itself
				705	* while it was being traced by us, to be able to see it in wait4.
				706	*/
				707	if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && p->exit_signal == -1))
				708	list_add(&p->ptrace_list, to_release);
				709	}
				710	list_for_each_safe(_p, _n, &father->ptrace_children) {
				711	p = list_entry(_p,struct task_struct,ptrace_list);
Oleg Nesterov	d799f03	2006-03-28 16:11:04 -0800	[diff] [blame]	712	choose_new_parent(p, reaper);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	713	reparent_thread(p, father, 1);
				714	}
				715	}
				716
				717	/*
				718	* Send signals to all our closest relatives so that they know
				719	* to properly mourn us..
				720	*/
				721	static void exit_notify(struct task_struct *tsk)
				722	{
				723	int state;
				724	struct task_struct *t;
				725	struct list_head ptrace_dead, _p, _n;
				726
				727	if (signal_pending(tsk) && !(tsk->signal->flags & SIGNAL_GROUP_EXIT)
				728	&& !thread_group_empty(tsk)) {
				729	/*
				730	* This occurs when there was a race between our exit
				731	* syscall and a group signal choosing us as the one to
				732	* wake up. It could be that we are the only thread
				733	* alerted to check for pending signals, but another thread
				734	* should be woken now to take the signal since we will not.
				735	* Now we'll wake all the threads in the group just to make
				736	* sure someone gets all the pending signals.
				737	*/
				738	read_lock(&tasklist_lock);
				739	spin_lock_irq(&tsk->sighand->siglock);
				740	for (t = next_thread(tsk); t != tsk; t = next_thread(t))
				741	if (!signal_pending(t) && !(t->flags & PF_EXITING)) {
				742	recalc_sigpending_tsk(t);
				743	if (signal_pending(t))
				744	signal_wake_up(t, 0);
				745	}
				746	spin_unlock_irq(&tsk->sighand->siglock);
				747	read_unlock(&tasklist_lock);
				748	}
				749
				750	write_lock_irq(&tasklist_lock);
				751
				752	/*
				753	* This does two things:
				754	*
				755	* A. Make init inherit all the child processes
				756	* B. Check to see if any process groups have become orphaned
				757	* as a result of our exiting, and if they have any stopped
				758	* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
				759	*/
				760
				761	INIT_LIST_HEAD(&ptrace_dead);
				762	forget_original_parent(tsk, &ptrace_dead);
				763	BUG_ON(!list_empty(&tsk->children));
				764	BUG_ON(!list_empty(&tsk->ptrace_children));
				765
				766	/*
				767	* Check to see if any process groups have become orphaned
				768	* as a result of our exiting, and if they have any stopped
				769	* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
				770	*
				771	* Case i: Our father is in a different pgrp than we are
				772	* and we were the only connection outside, so our pgrp
				773	* is about to become orphaned.
				774	*/
				775
				776	t = tsk->real_parent;
				777
				778	if ((process_group(t) != process_group(tsk)) &&
				779	(t->signal->session == tsk->signal->session) &&
				780	will_become_orphaned_pgrp(process_group(tsk), tsk) &&
				781	has_stopped_jobs(process_group(tsk))) {
Oleg Nesterov	b67a1b9	2005-10-30 15:03:44 -0800	[diff] [blame]	782	__kill_pg_info(SIGHUP, SEND_SIG_PRIV, process_group(tsk));
				783	__kill_pg_info(SIGCONT, SEND_SIG_PRIV, process_group(tsk));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	784	}
				785
				786	/* Let father know we died
				787	*
				788	* Thread signals are configurable, but you aren't going to use
				789	* that to send signals to arbitary processes.
				790	* That stops right now.
				791	*
				792	* If the parent exec id doesn't match the exec id we saved
				793	* when we started then we know the parent has changed security
				794	* domain.
				795	*
				796	* If our self_exec id doesn't match our parent_exec_id then
				797	* we have changed execution domain as these two values started
				798	* the same after a fork.
				799	*
				800	*/
				801
				802	if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 &&
				803	( tsk->parent_exec_id != t->self_exec_id \|\|
				804	tsk->self_exec_id != tsk->parent_exec_id)
				805	&& !capable(CAP_KILL))
				806	tsk->exit_signal = SIGCHLD;
				807
				808
				809	/* If something other than our normal parent is ptracing us, then
				810	* send it a SIGCHLD instead of honoring exit_signal. exit_signal
				811	* only has special meaning to our real parent.
				812	*/
				813	if (tsk->exit_signal != -1 && thread_group_empty(tsk)) {
				814	int signal = tsk->parent == tsk->real_parent ? tsk->exit_signal : SIGCHLD;
				815	do_notify_parent(tsk, signal);
				816	} else if (tsk->ptrace) {
				817	do_notify_parent(tsk, SIGCHLD);
				818	}
				819
				820	state = EXIT_ZOMBIE;
				821	if (tsk->exit_signal == -1 &&
				822	(likely(tsk->ptrace == 0) \|\|
				823	unlikely(tsk->parent->signal->flags & SIGNAL_GROUP_EXIT)))
				824	state = EXIT_DEAD;
				825	tsk->exit_state = state;
				826
				827	write_unlock_irq(&tasklist_lock);
				828
				829	list_for_each_safe(_p, _n, &ptrace_dead) {
				830	list_del_init(_p);
				831	t = list_entry(_p,struct task_struct,ptrace_list);
				832	release_task(t);
				833	}
				834
				835	/* If the process is dead, release it - nobody will wait for it */
				836	if (state == EXIT_DEAD)
				837	release_task(tsk);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	838	}
				839
				840	fastcall NORET_TYPE void do_exit(long code)
				841	{
				842	struct task_struct *tsk = current;
				843	int group_dead;
				844
				845	profile_task_exit(tsk);
				846
Jens Axboe	22e2c50	2005-06-27 10:55:12 +0200	[diff] [blame]	847	WARN_ON(atomic_read(&tsk->fs_excl));
				848
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	849	if (unlikely(in_interrupt()))
				850	panic("Aiee, killing interrupt handler!");
				851	if (unlikely(!tsk->pid))
				852	panic("Attempted to kill the idle task!");
Eric W. Biederman	fef23e7	2006-03-28 16:10:58 -0800	[diff] [blame]	853	if (unlikely(tsk == child_reaper))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	854	panic("Attempted to kill init!");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	855
				856	if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
				857	current->ptrace_message = code;
				858	ptrace_notify((PTRACE_EVENT_EXIT << 8) \| SIGTRAP);
				859	}
				860
Alexander Nyberg	df164db	2005-06-23 00:09:13 -0700	[diff] [blame]	861	/*
				862	* We're taking recursive faults here in do_exit. Safest is to just
				863	* leave this task alone and wait for reboot.
				864	*/
				865	if (unlikely(tsk->flags & PF_EXITING)) {
				866	printk(KERN_ALERT
				867	"Fixing recursive fault but reboot is needed!\n");
Al Viro	afc847b	2006-02-28 12:51:55 -0500	[diff] [blame]	868	if (tsk->io_context)
				869	exit_io_context();
Alexander Nyberg	df164db	2005-06-23 00:09:13 -0700	[diff] [blame]	870	set_current_state(TASK_UNINTERRUPTIBLE);
				871	schedule();
				872	}
				873
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	874	tsk->flags \|= PF_EXITING;
				875
Linus Torvalds	a362f46	2005-10-27 09:07:33 -0700	[diff] [blame]	876	/*
				877	* Make sure we don't try to process any timer firings
				878	* while we are already exiting.
				879	*/
				880	tsk->it_virt_expires = cputime_zero;
				881	tsk->it_prof_expires = cputime_zero;
				882	tsk->it_sched_expires = 0;
				883
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	884	if (unlikely(in_atomic()))
				885	printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
				886	current->comm, current->pid,
				887	preempt_count());
				888
				889	acct_update_integrals(tsk);
Hugh Dickins	365e9c87	2005-10-29 18:16:18 -0700	[diff] [blame]	890	if (tsk->mm) {
				891	update_hiwater_rss(tsk->mm);
				892	update_hiwater_vm(tsk->mm);
				893	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	894	group_dead = atomic_dec_and_test(&tsk->signal->live);
Andrew Morton	c306895	2005-08-04 16:49:32 -0700	[diff] [blame]	895	if (group_dead) {
Thomas Gleixner	2ff678b	2006-01-09 20:52:34 -0800	[diff] [blame]	896	hrtimer_cancel(&tsk->signal->real_timer);
Roland McGrath	25f407f	2005-10-21 15:03:29 -0700	[diff] [blame]	897	exit_itimers(tsk->signal);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	898	acct_process(code);
Andrew Morton	c306895	2005-08-04 16:49:32 -0700	[diff] [blame]	899	}
Ingo Molnar	0771dfe	2006-03-27 01:16:22 -0800	[diff] [blame]	900	if (unlikely(tsk->robust_list))
				901	exit_robust_list(tsk);
Ingo Molnar	34f192c	2006-03-27 01:16:24 -0800	[diff] [blame]	902	#ifdef CONFIG_COMPAT
				903	if (unlikely(tsk->compat_robust_list))
				904	compat_exit_robust_list(tsk);
				905	#endif
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	906	exit_mm(tsk);
				907
				908	exit_sem(tsk);
				909	__exit_files(tsk);
				910	__exit_fs(tsk);
				911	exit_namespace(tsk);
				912	exit_thread();
				913	cpuset_exit(tsk);
				914	exit_keys(tsk);
				915
				916	if (group_dead && tsk->signal->leader)
				917	disassociate_ctty(1);
				918
Al Viro	a1261f5	2005-11-13 16:06:55 -0800	[diff] [blame]	919	module_put(task_thread_info(tsk)->exec_domain->module);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	920	if (tsk->binfmt)
				921	module_put(tsk->binfmt->module);
				922
				923	tsk->exit_code = code;
Matt Helsley	9f46080	2005-11-07 00:59:16 -0800	[diff] [blame]	924	proc_exit_connector(tsk);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	925	exit_notify(tsk);
				926	#ifdef CONFIG_NUMA
				927	mpol_free(tsk->mempolicy);
				928	tsk->mempolicy = NULL;
				929	#endif
Ingo Molnar	de5097c	2006-01-09 15:59:21 -0800	[diff] [blame]	930	/*
				931	* If DEBUG_MUTEXES is on, make sure we are holding no locks:
				932	*/
				933	mutex_debug_check_no_locks_held(tsk);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	934
Al Viro	afc847b	2006-02-28 12:51:55 -0500	[diff] [blame]	935	if (tsk->io_context)
				936	exit_io_context();
				937
Coywolf Qi Hunt	7407251	2005-10-30 15:02:47 -0800	[diff] [blame]	938	/* PF_DEAD causes final put_task_struct after we schedule. */
				939	preempt_disable();
				940	BUG_ON(tsk->flags & PF_DEAD);
				941	tsk->flags \|= PF_DEAD;
				942
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	943	schedule();
				944	BUG();
				945	/* Avoid "noreturn function does return". */
				946	for (;;) ;
				947	}
				948
Russ Anderson	012914d	2005-04-23 00:08:00 -0700	[diff] [blame]	949	EXPORT_SYMBOL_GPL(do_exit);
				950
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	951	NORET_TYPE void complete_and_exit(struct completion *comp, long code)
				952	{
				953	if (comp)
				954	complete(comp);
				955
				956	do_exit(code);
				957	}
				958
				959	EXPORT_SYMBOL(complete_and_exit);
				960
				961	asmlinkage long sys_exit(int error_code)
				962	{
				963	do_exit((error_code&0xff)<<8);
				964	}
				965
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	966	/*
				967	* Take down every thread in the group. This is called by fatal signals
				968	* as well as by sys_exit_group (below).
				969	*/
				970	NORET_TYPE void
				971	do_group_exit(int exit_code)
				972	{
				973	BUG_ON(exit_code & 0x80); /* core dumps don't get here */
				974
				975	if (current->signal->flags & SIGNAL_GROUP_EXIT)
				976	exit_code = current->signal->group_exit_code;
				977	else if (!thread_group_empty(current)) {
				978	struct signal_struct *const sig = current->signal;
				979	struct sighand_struct *const sighand = current->sighand;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	980	spin_lock_irq(&sighand->siglock);
				981	if (sig->flags & SIGNAL_GROUP_EXIT)
				982	/* Another thread got here before we took the lock. */
				983	exit_code = sig->group_exit_code;
				984	else {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	985	sig->group_exit_code = exit_code;
				986	zap_other_threads(current);
				987	}
				988	spin_unlock_irq(&sighand->siglock);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	989	}
				990
				991	do_exit(exit_code);
				992	/* NOTREACHED */
				993	}
				994
				995	/*
				996	* this kills every thread in the thread group. Note that any externally
				997	* wait4()-ing process will get the correct exit code - even if this
				998	* thread is not the thread group leader.
				999	*/
				1000	asmlinkage void sys_exit_group(int error_code)
				1001	{
				1002	do_group_exit((error_code & 0xff) << 8);
				1003	}
				1004
				1005	static int eligible_child(pid_t pid, int options, task_t *p)
				1006	{
				1007	if (pid > 0) {
				1008	if (p->pid != pid)
				1009	return 0;
				1010	} else if (!pid) {
				1011	if (process_group(p) != process_group(current))
				1012	return 0;
				1013	} else if (pid != -1) {
				1014	if (process_group(p) != -pid)
				1015	return 0;
				1016	}
				1017
				1018	/*
				1019	* Do not consider detached threads that are
				1020	* not ptraced:
				1021	*/
				1022	if (p->exit_signal == -1 && !p->ptrace)
				1023	return 0;
				1024
				1025	/* Wait for all children (clone and not) if __WALL is set;
				1026	* otherwise, wait for clone children only if __WCLONE is
				1027	* set; otherwise, wait for non-clone children only. (Note:
				1028	* A "clone" child here is one that reports to its parent
				1029	* using a signal other than SIGCHLD.) */
				1030	if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
				1031	&& !(options & __WALL))
				1032	return 0;
				1033	/*
				1034	* Do not consider thread group leaders that are
				1035	* in a non-empty thread group:
				1036	*/
				1037	if (current->tgid != p->tgid && delay_group_leader(p))
				1038	return 2;
				1039
				1040	if (security_task_wait(p))
				1041	return 0;
				1042
				1043	return 1;
				1044	}
				1045
				1046	static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid,
				1047	int why, int status,
				1048	struct siginfo __user *infop,
				1049	struct rusage __user *rusagep)
				1050	{
				1051	int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0;
				1052	put_task_struct(p);
				1053	if (!retval)
				1054	retval = put_user(SIGCHLD, &infop->si_signo);
				1055	if (!retval)
				1056	retval = put_user(0, &infop->si_errno);
				1057	if (!retval)
				1058	retval = put_user((short)why, &infop->si_code);
				1059	if (!retval)
				1060	retval = put_user(pid, &infop->si_pid);
				1061	if (!retval)
				1062	retval = put_user(uid, &infop->si_uid);
				1063	if (!retval)
				1064	retval = put_user(status, &infop->si_status);
				1065	if (!retval)
				1066	retval = pid;
				1067	return retval;
				1068	}
				1069
				1070	/*
				1071	* Handle sys_wait4 work for one task in state EXIT_ZOMBIE. We hold
				1072	* read_lock(&tasklist_lock) on entry. If we return zero, we still hold
				1073	* the lock and this task is uninteresting. If we return nonzero, we have
				1074	* released the lock and the system call should return.
				1075	*/
				1076	static int wait_task_zombie(task_t *p, int noreap,
				1077	struct siginfo __user *infop,
				1078	int __user stat_addr, struct rusage __user ru)
				1079	{
				1080	unsigned long state;
				1081	int retval;
				1082	int status;
				1083
				1084	if (unlikely(noreap)) {
				1085	pid_t pid = p->pid;
				1086	uid_t uid = p->uid;
				1087	int exit_code = p->exit_code;
				1088	int why, status;
				1089
				1090	if (unlikely(p->exit_state != EXIT_ZOMBIE))
				1091	return 0;
				1092	if (unlikely(p->exit_signal == -1 && p->ptrace == 0))
				1093	return 0;
				1094	get_task_struct(p);
				1095	read_unlock(&tasklist_lock);
				1096	if ((exit_code & 0x7f) == 0) {
				1097	why = CLD_EXITED;
				1098	status = exit_code >> 8;
				1099	} else {
				1100	why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
				1101	status = exit_code & 0x7f;
				1102	}
				1103	return wait_noreap_copyout(p, pid, uid, why,
				1104	status, infop, ru);
				1105	}
				1106
				1107	/*
				1108	* Try to move the task's state to DEAD
				1109	* only one thread is allowed to do this:
				1110	*/
				1111	state = xchg(&p->exit_state, EXIT_DEAD);
				1112	if (state != EXIT_ZOMBIE) {
				1113	BUG_ON(state != EXIT_DEAD);
				1114	return 0;
				1115	}
				1116	if (unlikely(p->exit_signal == -1 && p->ptrace == 0)) {
				1117	/*
				1118	* This can only happen in a race with a ptraced thread
				1119	* dying on another processor.
				1120	*/
				1121	return 0;
				1122	}
				1123
				1124	if (likely(p->real_parent == p->parent) && likely(p->signal)) {
Jesper Juhl	3795e16	2006-01-09 20:54:39 -0800	[diff] [blame]	1125	struct signal_struct *psig;
				1126	struct signal_struct *sig;
				1127
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1128	/*
				1129	* The resource counters for the group leader are in its
				1130	* own task_struct. Those for dead threads in the group
				1131	* are in its signal_struct, as are those for the child
				1132	* processes it has previously reaped. All these
				1133	* accumulate in the parent's signal_struct c* fields.
				1134	*
				1135	* We don't bother to take a lock here to protect these
				1136	* p->signal fields, because they are only touched by
				1137	* __exit_signal, which runs with tasklist_lock
				1138	* write-locked anyway, and so is excluded here. We do
				1139	* need to protect the access to p->parent->signal fields,
				1140	* as other threads in the parent group can be right
				1141	* here reaping other children at the same time.
				1142	*/
				1143	spin_lock_irq(&p->parent->sighand->siglock);
Jesper Juhl	3795e16	2006-01-09 20:54:39 -0800	[diff] [blame]	1144	psig = p->parent->signal;
				1145	sig = p->signal;
				1146	psig->cutime =
				1147	cputime_add(psig->cutime,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1148	cputime_add(p->utime,
Jesper Juhl	3795e16	2006-01-09 20:54:39 -0800	[diff] [blame]	1149	cputime_add(sig->utime,
				1150	sig->cutime)));
				1151	psig->cstime =
				1152	cputime_add(psig->cstime,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1153	cputime_add(p->stime,
Jesper Juhl	3795e16	2006-01-09 20:54:39 -0800	[diff] [blame]	1154	cputime_add(sig->stime,
				1155	sig->cstime)));
				1156	psig->cmin_flt +=
				1157	p->min_flt + sig->min_flt + sig->cmin_flt;
				1158	psig->cmaj_flt +=
				1159	p->maj_flt + sig->maj_flt + sig->cmaj_flt;
				1160	psig->cnvcsw +=
				1161	p->nvcsw + sig->nvcsw + sig->cnvcsw;
				1162	psig->cnivcsw +=
				1163	p->nivcsw + sig->nivcsw + sig->cnivcsw;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1164	spin_unlock_irq(&p->parent->sighand->siglock);
				1165	}
				1166
				1167	/*
				1168	* Now we are sure this task is interesting, and no other
				1169	* thread can reap it because we set its state to EXIT_DEAD.
				1170	*/
				1171	read_unlock(&tasklist_lock);
				1172
				1173	retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
				1174	status = (p->signal->flags & SIGNAL_GROUP_EXIT)
				1175	? p->signal->group_exit_code : p->exit_code;
				1176	if (!retval && stat_addr)
				1177	retval = put_user(status, stat_addr);
				1178	if (!retval && infop)
				1179	retval = put_user(SIGCHLD, &infop->si_signo);
				1180	if (!retval && infop)
				1181	retval = put_user(0, &infop->si_errno);
				1182	if (!retval && infop) {
				1183	int why;
				1184
				1185	if ((status & 0x7f) == 0) {
				1186	why = CLD_EXITED;
				1187	status >>= 8;
				1188	} else {
				1189	why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;
				1190	status &= 0x7f;
				1191	}
				1192	retval = put_user((short)why, &infop->si_code);
				1193	if (!retval)
				1194	retval = put_user(status, &infop->si_status);
				1195	}
				1196	if (!retval && infop)
				1197	retval = put_user(p->pid, &infop->si_pid);
				1198	if (!retval && infop)
				1199	retval = put_user(p->uid, &infop->si_uid);
				1200	if (retval) {
				1201	// TODO: is this safe?
				1202	p->exit_state = EXIT_ZOMBIE;
				1203	return retval;
				1204	}
				1205	retval = p->pid;
				1206	if (p->real_parent != p->parent) {
				1207	write_lock_irq(&tasklist_lock);
				1208	/* Double-check with lock held. */
				1209	if (p->real_parent != p->parent) {
				1210	__ptrace_unlink(p);
				1211	// TODO: is this safe?
				1212	p->exit_state = EXIT_ZOMBIE;
				1213	/*
				1214	* If this is not a detached task, notify the parent.
				1215	* If it's still not detached after that, don't release
				1216	* it now.
				1217	*/
				1218	if (p->exit_signal != -1) {
				1219	do_notify_parent(p, p->exit_signal);
				1220	if (p->exit_signal != -1)
				1221	p = NULL;
				1222	}
				1223	}
				1224	write_unlock_irq(&tasklist_lock);
				1225	}
				1226	if (p != NULL)
				1227	release_task(p);
				1228	BUG_ON(!retval);
				1229	return retval;
				1230	}
				1231
				1232	/*
				1233	* Handle sys_wait4 work for one task in state TASK_STOPPED. We hold
				1234	* read_lock(&tasklist_lock) on entry. If we return zero, we still hold
				1235	* the lock and this task is uninteresting. If we return nonzero, we have
				1236	* released the lock and the system call should return.
				1237	*/
				1238	static int wait_task_stopped(task_t *p, int delayed_group_leader, int noreap,
				1239	struct siginfo __user *infop,
				1240	int __user stat_addr, struct rusage __user ru)
				1241	{
				1242	int retval, exit_code;
				1243
				1244	if (!p->exit_code)
				1245	return 0;
				1246	if (delayed_group_leader && !(p->ptrace & PT_PTRACED) &&
				1247	p->signal && p->signal->group_stop_count > 0)
				1248	/*
				1249	* A group stop is in progress and this is the group leader.
				1250	* We won't report until all threads have stopped.
				1251	*/
				1252	return 0;
				1253
				1254	/*
				1255	* Now we are pretty sure this task is interesting.
				1256	* Make sure it doesn't get reaped out from under us while we
				1257	* give up the lock and then examine it below. We don't want to
				1258	* keep holding onto the tasklist_lock while we call getrusage and
				1259	* possibly take page faults for user memory.
				1260	*/
				1261	get_task_struct(p);
				1262	read_unlock(&tasklist_lock);
				1263
				1264	if (unlikely(noreap)) {
				1265	pid_t pid = p->pid;
				1266	uid_t uid = p->uid;
				1267	int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
				1268
				1269	exit_code = p->exit_code;
				1270	if (unlikely(!exit_code) \|\|
Linus Torvalds	14bf01b	2005-10-01 11:04:18 -0700	[diff] [blame]	1271	unlikely(p->state & TASK_TRACED))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1272	goto bail_ref;
				1273	return wait_noreap_copyout(p, pid, uid,
				1274	why, (exit_code << 8) \| 0x7f,
				1275	infop, ru);
				1276	}
				1277
				1278	write_lock_irq(&tasklist_lock);
				1279
				1280	/*
				1281	* This uses xchg to be atomic with the thread resuming and setting
				1282	* it. It must also be done with the write lock held to prevent a
				1283	* race with the EXIT_ZOMBIE case.
				1284	*/
				1285	exit_code = xchg(&p->exit_code, 0);
				1286	if (unlikely(p->exit_state)) {
				1287	/*
				1288	* The task resumed and then died. Let the next iteration
				1289	* catch it in EXIT_ZOMBIE. Note that exit_code might
				1290	* already be zero here if it resumed and did _exit(0).
				1291	* The task itself is dead and won't touch exit_code again;
				1292	* other processors in this function are locked out.
				1293	*/
				1294	p->exit_code = exit_code;
				1295	exit_code = 0;
				1296	}
				1297	if (unlikely(exit_code == 0)) {
				1298	/*
				1299	* Another thread in this function got to it first, or it
				1300	* resumed, or it resumed and then died.
				1301	*/
				1302	write_unlock_irq(&tasklist_lock);
				1303	bail_ref:
				1304	put_task_struct(p);
				1305	/*
				1306	* We are returning to the wait loop without having successfully
				1307	* removed the process and having released the lock. We cannot
				1308	* continue, since the "p" task pointer is potentially stale.
				1309	*
				1310	* Return -EAGAIN, and do_wait() will restart the loop from the
				1311	* beginning. Do _not_ re-acquire the lock.
				1312	*/
				1313	return -EAGAIN;
				1314	}
				1315
				1316	/* move to end of parent's list to avoid starvation */
				1317	remove_parent(p);
Oleg Nesterov	8fafabd	2006-03-28 16:11:05 -0800	[diff] [blame]	1318	add_parent(p);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1319
				1320	write_unlock_irq(&tasklist_lock);
				1321
				1322	retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
				1323	if (!retval && stat_addr)
				1324	retval = put_user((exit_code << 8) \| 0x7f, stat_addr);
				1325	if (!retval && infop)
				1326	retval = put_user(SIGCHLD, &infop->si_signo);
				1327	if (!retval && infop)
				1328	retval = put_user(0, &infop->si_errno);
				1329	if (!retval && infop)
				1330	retval = put_user((short)((p->ptrace & PT_PTRACED)
				1331	? CLD_TRAPPED : CLD_STOPPED),
				1332	&infop->si_code);
				1333	if (!retval && infop)
				1334	retval = put_user(exit_code, &infop->si_status);
				1335	if (!retval && infop)
				1336	retval = put_user(p->pid, &infop->si_pid);
				1337	if (!retval && infop)
				1338	retval = put_user(p->uid, &infop->si_uid);
				1339	if (!retval)
				1340	retval = p->pid;
				1341	put_task_struct(p);
				1342
				1343	BUG_ON(!retval);
				1344	return retval;
				1345	}
				1346
				1347	/*
				1348	* Handle do_wait work for one task in a live, non-stopped state.
				1349	* read_lock(&tasklist_lock) on entry. If we return zero, we still hold
				1350	* the lock and this task is uninteresting. If we return nonzero, we have
				1351	* released the lock and the system call should return.
				1352	*/
				1353	static int wait_task_continued(task_t *p, int noreap,
				1354	struct siginfo __user *infop,
				1355	int __user stat_addr, struct rusage __user ru)
				1356	{
				1357	int retval;
				1358	pid_t pid;
				1359	uid_t uid;
				1360
				1361	if (unlikely(!p->signal))
				1362	return 0;
				1363
				1364	if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
				1365	return 0;
				1366
				1367	spin_lock_irq(&p->sighand->siglock);
				1368	/* Re-check with the lock held. */
				1369	if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) {
				1370	spin_unlock_irq(&p->sighand->siglock);
				1371	return 0;
				1372	}
				1373	if (!noreap)
				1374	p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
				1375	spin_unlock_irq(&p->sighand->siglock);
				1376
				1377	pid = p->pid;
				1378	uid = p->uid;
				1379	get_task_struct(p);
				1380	read_unlock(&tasklist_lock);
				1381
				1382	if (!infop) {
				1383	retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
				1384	put_task_struct(p);
				1385	if (!retval && stat_addr)
				1386	retval = put_user(0xffff, stat_addr);
				1387	if (!retval)
				1388	retval = p->pid;
				1389	} else {
				1390	retval = wait_noreap_copyout(p, pid, uid,
				1391	CLD_CONTINUED, SIGCONT,
				1392	infop, ru);
				1393	BUG_ON(retval == 0);
				1394	}
				1395
				1396	return retval;
				1397	}
				1398
				1399
				1400	static inline int my_ptrace_child(struct task_struct *p)
				1401	{
				1402	if (!(p->ptrace & PT_PTRACED))
				1403	return 0;
				1404	if (!(p->ptrace & PT_ATTACHED))
				1405	return 1;
				1406	/*
				1407	* This child was PTRACE_ATTACH'd. We should be seeing it only if
				1408	* we are the attacher. If we are the real parent, this is a race
				1409	* inside ptrace_attach. It is waiting for the tasklist_lock,
				1410	* which we have to switch the parent links, but has already set
				1411	* the flags in p->ptrace.
				1412	*/
				1413	return (p->parent != p->real_parent);
				1414	}
				1415
				1416	static long do_wait(pid_t pid, int options, struct siginfo __user *infop,
				1417	int __user stat_addr, struct rusage __user ru)
				1418	{
				1419	DECLARE_WAITQUEUE(wait, current);
				1420	struct task_struct *tsk;
				1421	int flag, retval;
				1422
				1423	add_wait_queue(&current->signal->wait_chldexit,&wait);
				1424	repeat:
				1425	/*
				1426	* We will set this flag if we see any child that might later
				1427	* match our criteria, even if we are not able to reap it yet.
				1428	*/
				1429	flag = 0;
				1430	current->state = TASK_INTERRUPTIBLE;
				1431	read_lock(&tasklist_lock);
				1432	tsk = current;
				1433	do {
				1434	struct task_struct *p;
				1435	struct list_head *_p;
				1436	int ret;
				1437
				1438	list_for_each(_p,&tsk->children) {
				1439	p = list_entry(_p,struct task_struct,sibling);
				1440
				1441	ret = eligible_child(pid, options, p);
				1442	if (!ret)
				1443	continue;
				1444
				1445	switch (p->state) {
				1446	case TASK_TRACED:
Roland McGrath	7f2a525	2005-10-30 15:02:50 -0800	[diff] [blame]	1447	/*
				1448	* When we hit the race with PTRACE_ATTACH,
				1449	* we will not report this child. But the
				1450	* race means it has not yet been moved to
				1451	* our ptrace_children list, so we need to
				1452	* set the flag here to avoid a spurious ECHILD
				1453	* when the race happens with the only child.
				1454	*/
				1455	flag = 1;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1456	if (!my_ptrace_child(p))
				1457	continue;
				1458	/FALLTHROUGH/
				1459	case TASK_STOPPED:
				1460	/*
				1461	* It's stopped now, so it might later
				1462	* continue, exit, or stop again.
				1463	*/
				1464	flag = 1;
				1465	if (!(options & WUNTRACED) &&
				1466	!my_ptrace_child(p))
				1467	continue;
				1468	retval = wait_task_stopped(p, ret == 2,
				1469	(options & WNOWAIT),
				1470	infop,
				1471	stat_addr, ru);
				1472	if (retval == -EAGAIN)
				1473	goto repeat;
				1474	if (retval != 0) /* He released the lock. */
				1475	goto end;
				1476	break;
				1477	default:
				1478	// case EXIT_DEAD:
				1479	if (p->exit_state == EXIT_DEAD)
				1480	continue;
				1481	// case EXIT_ZOMBIE:
				1482	if (p->exit_state == EXIT_ZOMBIE) {
				1483	/*
				1484	* Eligible but we cannot release
				1485	* it yet:
				1486	*/
				1487	if (ret == 2)
				1488	goto check_continued;
				1489	if (!likely(options & WEXITED))
				1490	continue;
				1491	retval = wait_task_zombie(
				1492	p, (options & WNOWAIT),
				1493	infop, stat_addr, ru);
				1494	/* He released the lock. */
				1495	if (retval != 0)
				1496	goto end;
				1497	break;
				1498	}
				1499	check_continued:
				1500	/*
				1501	* It's running now, so it might later
				1502	* exit, stop, or stop and then continue.
				1503	*/
				1504	flag = 1;
				1505	if (!unlikely(options & WCONTINUED))
				1506	continue;
				1507	retval = wait_task_continued(
				1508	p, (options & WNOWAIT),
				1509	infop, stat_addr, ru);
				1510	if (retval != 0) /* He released the lock. */
				1511	goto end;
				1512	break;
				1513	}
				1514	}
				1515	if (!flag) {
				1516	list_for_each(_p, &tsk->ptrace_children) {
				1517	p = list_entry(_p, struct task_struct,
				1518	ptrace_list);
				1519	if (!eligible_child(pid, options, p))
				1520	continue;
				1521	flag = 1;
				1522	break;
				1523	}
				1524	}
				1525	if (options & __WNOTHREAD)
				1526	break;
				1527	tsk = next_thread(tsk);
				1528	if (tsk->signal != current->signal)
				1529	BUG();
				1530	} while (tsk != current);
				1531
				1532	read_unlock(&tasklist_lock);
				1533	if (flag) {
				1534	retval = 0;
				1535	if (options & WNOHANG)
				1536	goto end;
				1537	retval = -ERESTARTSYS;
				1538	if (signal_pending(current))
				1539	goto end;
				1540	schedule();
				1541	goto repeat;
				1542	}
				1543	retval = -ECHILD;
				1544	end:
				1545	current->state = TASK_RUNNING;
				1546	remove_wait_queue(&current->signal->wait_chldexit,&wait);
				1547	if (infop) {
				1548	if (retval > 0)
				1549	retval = 0;
				1550	else {
				1551	/*
				1552	* For a WNOHANG return, clear out all the fields
				1553	* we would set so the user can easily tell the
				1554	* difference.
				1555	*/
				1556	if (!retval)
				1557	retval = put_user(0, &infop->si_signo);
				1558	if (!retval)
				1559	retval = put_user(0, &infop->si_errno);
				1560	if (!retval)
				1561	retval = put_user(0, &infop->si_code);
				1562	if (!retval)
				1563	retval = put_user(0, &infop->si_pid);
				1564	if (!retval)
				1565	retval = put_user(0, &infop->si_uid);
				1566	if (!retval)
				1567	retval = put_user(0, &infop->si_status);
				1568	}
				1569	}
				1570	return retval;
				1571	}
				1572
				1573	asmlinkage long sys_waitid(int which, pid_t pid,
				1574	struct siginfo __user *infop, int options,
				1575	struct rusage __user *ru)
				1576	{
				1577	long ret;
				1578
				1579	if (options & ~(WNOHANG\|WNOWAIT\|WEXITED\|WSTOPPED\|WCONTINUED))
				1580	return -EINVAL;
				1581	if (!(options & (WEXITED\|WSTOPPED\|WCONTINUED)))
				1582	return -EINVAL;
				1583
				1584	switch (which) {
				1585	case P_ALL:
				1586	pid = -1;
				1587	break;
				1588	case P_PID:
				1589	if (pid <= 0)
				1590	return -EINVAL;
				1591	break;
				1592	case P_PGID:
				1593	if (pid <= 0)
				1594	return -EINVAL;
				1595	pid = -pid;
				1596	break;
				1597	default:
				1598	return -EINVAL;
				1599	}
				1600
				1601	ret = do_wait(pid, options, infop, NULL, ru);
				1602
				1603	/* avoid REGPARM breakage on x86: */
				1604	prevent_tail_call(ret);
				1605	return ret;
				1606	}
				1607
				1608	asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr,
				1609	int options, struct rusage __user *ru)
				1610	{
				1611	long ret;
				1612
				1613	if (options & ~(WNOHANG\|WUNTRACED\|WCONTINUED\|
				1614	__WNOTHREAD\|__WCLONE\|__WALL))
				1615	return -EINVAL;
				1616	ret = do_wait(pid, options \| WEXITED, NULL, stat_addr, ru);
				1617
				1618	/* avoid REGPARM breakage on x86: */
				1619	prevent_tail_call(ret);
				1620	return ret;
				1621	}
				1622
				1623	#ifdef __ARCH_WANT_SYS_WAITPID
				1624
				1625	/*
				1626	* sys_waitpid() remains for compatibility. waitpid() should be
				1627	* implemented by calling sys_wait4() from libc.a.
				1628	*/
				1629	asmlinkage long sys_waitpid(pid_t pid, int __user *stat_addr, int options)
				1630	{
				1631	return sys_wait4(pid, stat_addr, options, NULL);
				1632	}
				1633
				1634	#endif