| /* | 
 |  *  fs/signalfd.c | 
 |  * | 
 |  *  Copyright (C) 2003  Linus Torvalds | 
 |  * | 
 |  *  Mon Mar 5, 2007: Davide Libenzi <davidel@xmailserver.org> | 
 |  *      Changed ->read() to return a siginfo strcture instead of signal number. | 
 |  *      Fixed locking in ->poll(). | 
 |  *      Added sighand-detach notification. | 
 |  *      Added fd re-use in sys_signalfd() syscall. | 
 |  *      Now using anonymous inode source. | 
 |  *      Thanks to Oleg Nesterov for useful code review and suggestions. | 
 |  *      More comments and suggestions from Arnd Bergmann. | 
 |  * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br> | 
 |  *      Retrieve multiple signals with one read() call | 
 |  */ | 
 |  | 
 | #include <linux/file.h> | 
 | #include <linux/poll.h> | 
 | #include <linux/init.h> | 
 | #include <linux/fs.h> | 
 | #include <linux/sched.h> | 
 | #include <linux/kernel.h> | 
 | #include <linux/signal.h> | 
 | #include <linux/list.h> | 
 | #include <linux/anon_inodes.h> | 
 | #include <linux/signalfd.h> | 
 |  | 
 | struct signalfd_ctx { | 
 | 	struct list_head lnk; | 
 | 	wait_queue_head_t wqh; | 
 | 	sigset_t sigmask; | 
 | 	struct task_struct *tsk; | 
 | }; | 
 |  | 
 | struct signalfd_lockctx { | 
 | 	struct task_struct *tsk; | 
 | 	unsigned long flags; | 
 | }; | 
 |  | 
 | /* | 
 |  * Tries to acquire the sighand lock. We do not increment the sighand | 
 |  * use count, and we do not even pin the task struct, so we need to | 
 |  * do it inside an RCU read lock, and we must be prepared for the | 
 |  * ctx->tsk going to NULL (in signalfd_deliver()), and for the sighand | 
 |  * being detached. We return 0 if the sighand has been detached, or | 
 |  * 1 if we were able to pin the sighand lock. | 
 |  */ | 
 | static int signalfd_lock(struct signalfd_ctx *ctx, struct signalfd_lockctx *lk) | 
 | { | 
 | 	struct sighand_struct *sighand = NULL; | 
 |  | 
 | 	rcu_read_lock(); | 
 | 	lk->tsk = rcu_dereference(ctx->tsk); | 
 | 	if (likely(lk->tsk != NULL)) | 
 | 		sighand = lock_task_sighand(lk->tsk, &lk->flags); | 
 | 	rcu_read_unlock(); | 
 |  | 
 | 	if (sighand && !ctx->tsk) { | 
 | 		unlock_task_sighand(lk->tsk, &lk->flags); | 
 | 		sighand = NULL; | 
 | 	} | 
 |  | 
 | 	return sighand != NULL; | 
 | } | 
 |  | 
 | static void signalfd_unlock(struct signalfd_lockctx *lk) | 
 | { | 
 | 	unlock_task_sighand(lk->tsk, &lk->flags); | 
 | } | 
 |  | 
 | /* | 
 |  * This must be called with the sighand lock held. | 
 |  */ | 
 | void signalfd_deliver(struct task_struct *tsk, int sig) | 
 | { | 
 | 	struct sighand_struct *sighand = tsk->sighand; | 
 | 	struct signalfd_ctx *ctx, *tmp; | 
 |  | 
 | 	BUG_ON(!sig); | 
 | 	list_for_each_entry_safe(ctx, tmp, &sighand->signalfd_list, lnk) { | 
 | 		/* | 
 | 		 * We use a negative signal value as a way to broadcast that the | 
 | 		 * sighand has been orphaned, so that we can notify all the | 
 | 		 * listeners about this. Remember the ctx->sigmask is inverted, | 
 | 		 * so if the user is interested in a signal, that corresponding | 
 | 		 * bit will be zero. | 
 | 		 */ | 
 | 		if (sig < 0) { | 
 | 			if (ctx->tsk == tsk) { | 
 | 				ctx->tsk = NULL; | 
 | 				list_del_init(&ctx->lnk); | 
 | 				wake_up(&ctx->wqh); | 
 | 			} | 
 | 		} else { | 
 | 			if (!sigismember(&ctx->sigmask, sig)) | 
 | 				wake_up(&ctx->wqh); | 
 | 		} | 
 | 	} | 
 | } | 
 |  | 
 | static void signalfd_cleanup(struct signalfd_ctx *ctx) | 
 | { | 
 | 	struct signalfd_lockctx lk; | 
 |  | 
 | 	/* | 
 | 	 * This is tricky. If the sighand is gone, we do not need to remove | 
 | 	 * context from the list, the list itself won't be there anymore. | 
 | 	 */ | 
 | 	if (signalfd_lock(ctx, &lk)) { | 
 | 		list_del(&ctx->lnk); | 
 | 		signalfd_unlock(&lk); | 
 | 	} | 
 | 	kfree(ctx); | 
 | } | 
 |  | 
 | static int signalfd_release(struct inode *inode, struct file *file) | 
 | { | 
 | 	signalfd_cleanup(file->private_data); | 
 | 	return 0; | 
 | } | 
 |  | 
 | static unsigned int signalfd_poll(struct file *file, poll_table *wait) | 
 | { | 
 | 	struct signalfd_ctx *ctx = file->private_data; | 
 | 	unsigned int events = 0; | 
 | 	struct signalfd_lockctx lk; | 
 |  | 
 | 	poll_wait(file, &ctx->wqh, wait); | 
 |  | 
 | 	/* | 
 | 	 * Let the caller get a POLLIN in this case, ala socket recv() when | 
 | 	 * the peer disconnects. | 
 | 	 */ | 
 | 	if (signalfd_lock(ctx, &lk)) { | 
 | 		if ((lk.tsk == current && | 
 | 		     next_signal(&lk.tsk->pending, &ctx->sigmask) > 0) || | 
 | 		    next_signal(&lk.tsk->signal->shared_pending, | 
 | 				&ctx->sigmask) > 0) | 
 | 			events |= POLLIN; | 
 | 		signalfd_unlock(&lk); | 
 | 	} else | 
 | 		events |= POLLIN; | 
 |  | 
 | 	return events; | 
 | } | 
 |  | 
 | /* | 
 |  * Copied from copy_siginfo_to_user() in kernel/signal.c | 
 |  */ | 
 | static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, | 
 | 			     siginfo_t const *kinfo) | 
 | { | 
 | 	long err; | 
 |  | 
 | 	BUILD_BUG_ON(sizeof(struct signalfd_siginfo) != 128); | 
 |  | 
 | 	/* | 
 | 	 * Unused memebers should be zero ... | 
 | 	 */ | 
 | 	err = __clear_user(uinfo, sizeof(*uinfo)); | 
 |  | 
 | 	/* | 
 | 	 * If you change siginfo_t structure, please be sure | 
 | 	 * this code is fixed accordingly. | 
 | 	 */ | 
 | 	err |= __put_user(kinfo->si_signo, &uinfo->signo); | 
 | 	err |= __put_user(kinfo->si_errno, &uinfo->err); | 
 | 	err |= __put_user((short)kinfo->si_code, &uinfo->code); | 
 | 	switch (kinfo->si_code & __SI_MASK) { | 
 | 	case __SI_KILL: | 
 | 		err |= __put_user(kinfo->si_pid, &uinfo->pid); | 
 | 		err |= __put_user(kinfo->si_uid, &uinfo->uid); | 
 | 		break; | 
 | 	case __SI_TIMER: | 
 | 		 err |= __put_user(kinfo->si_tid, &uinfo->tid); | 
 | 		 err |= __put_user(kinfo->si_overrun, &uinfo->overrun); | 
 | 		 err |= __put_user((long)kinfo->si_ptr, &uinfo->svptr); | 
 | 		break; | 
 | 	case __SI_POLL: | 
 | 		err |= __put_user(kinfo->si_band, &uinfo->band); | 
 | 		err |= __put_user(kinfo->si_fd, &uinfo->fd); | 
 | 		break; | 
 | 	case __SI_FAULT: | 
 | 		err |= __put_user((long)kinfo->si_addr, &uinfo->addr); | 
 | #ifdef __ARCH_SI_TRAPNO | 
 | 		err |= __put_user(kinfo->si_trapno, &uinfo->trapno); | 
 | #endif | 
 | 		break; | 
 | 	case __SI_CHLD: | 
 | 		err |= __put_user(kinfo->si_pid, &uinfo->pid); | 
 | 		err |= __put_user(kinfo->si_uid, &uinfo->uid); | 
 | 		err |= __put_user(kinfo->si_status, &uinfo->status); | 
 | 		err |= __put_user(kinfo->si_utime, &uinfo->utime); | 
 | 		err |= __put_user(kinfo->si_stime, &uinfo->stime); | 
 | 		break; | 
 | 	case __SI_RT: /* This is not generated by the kernel as of now. */ | 
 | 	case __SI_MESGQ: /* But this is */ | 
 | 		err |= __put_user(kinfo->si_pid, &uinfo->pid); | 
 | 		err |= __put_user(kinfo->si_uid, &uinfo->uid); | 
 | 		err |= __put_user((long)kinfo->si_ptr, &uinfo->svptr); | 
 | 		break; | 
 | 	default: /* this is just in case for now ... */ | 
 | 		err |= __put_user(kinfo->si_pid, &uinfo->pid); | 
 | 		err |= __put_user(kinfo->si_uid, &uinfo->uid); | 
 | 		break; | 
 | 	} | 
 |  | 
 | 	return err ? -EFAULT: sizeof(*uinfo); | 
 | } | 
 |  | 
 | static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, siginfo_t *info, | 
 | 				int nonblock) | 
 | { | 
 | 	ssize_t ret; | 
 | 	struct signalfd_lockctx lk; | 
 | 	DECLARE_WAITQUEUE(wait, current); | 
 |  | 
 | 	if (!signalfd_lock(ctx, &lk)) | 
 | 		return 0; | 
 |  | 
 | 	ret = dequeue_signal(lk.tsk, &ctx->sigmask, info); | 
 | 	switch (ret) { | 
 | 	case 0: | 
 | 		if (!nonblock) | 
 | 			break; | 
 | 		ret = -EAGAIN; | 
 | 	default: | 
 | 		signalfd_unlock(&lk); | 
 | 		return ret; | 
 | 	} | 
 |  | 
 | 	add_wait_queue(&ctx->wqh, &wait); | 
 | 	for (;;) { | 
 | 		set_current_state(TASK_INTERRUPTIBLE); | 
 | 		ret = dequeue_signal(lk.tsk, &ctx->sigmask, info); | 
 | 		signalfd_unlock(&lk); | 
 | 		if (ret != 0) | 
 | 			break; | 
 | 		if (signal_pending(current)) { | 
 | 			ret = -ERESTARTSYS; | 
 | 			break; | 
 | 		} | 
 | 		schedule(); | 
 | 		ret = signalfd_lock(ctx, &lk); | 
 | 		if (unlikely(!ret)) { | 
 | 			/* | 
 | 			 * Let the caller read zero byte, ala socket | 
 | 			 * recv() when the peer disconnect. This test | 
 | 			 * must be done before doing a dequeue_signal(), | 
 | 			 * because if the sighand has been orphaned, | 
 | 			 * the dequeue_signal() call is going to crash | 
 | 			 * because ->sighand will be long gone. | 
 | 			 */ | 
 | 			 break; | 
 | 		} | 
 | 	} | 
 |  | 
 | 	remove_wait_queue(&ctx->wqh, &wait); | 
 | 	__set_current_state(TASK_RUNNING); | 
 |  | 
 | 	return ret; | 
 | } | 
 |  | 
 | /* | 
 |  * Returns either the size of a "struct signalfd_siginfo", or zero if the | 
 |  * sighand we are attached to, has been orphaned. The "count" parameter | 
 |  * must be at least the size of a "struct signalfd_siginfo". | 
 |  */ | 
 | static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, | 
 | 			     loff_t *ppos) | 
 | { | 
 | 	struct signalfd_ctx *ctx = file->private_data; | 
 | 	struct signalfd_siginfo __user *siginfo; | 
 | 	int nonblock = file->f_flags & O_NONBLOCK; | 
 | 	ssize_t ret, total = 0; | 
 | 	siginfo_t info; | 
 |  | 
 | 	count /= sizeof(struct signalfd_siginfo); | 
 | 	if (!count) | 
 | 		return -EINVAL; | 
 |  | 
 | 	siginfo = (struct signalfd_siginfo __user *) buf; | 
 |  | 
 | 	do { | 
 | 		ret = signalfd_dequeue(ctx, &info, nonblock); | 
 | 		if (unlikely(ret <= 0)) | 
 | 			break; | 
 | 		ret = signalfd_copyinfo(siginfo, &info); | 
 | 		if (ret < 0) | 
 | 			break; | 
 | 		siginfo++; | 
 | 		total += ret; | 
 | 		nonblock = 1; | 
 | 	} while (--count); | 
 |  | 
 | 	return total ? total : ret; | 
 | } | 
 |  | 
 | static const struct file_operations signalfd_fops = { | 
 | 	.release	= signalfd_release, | 
 | 	.poll		= signalfd_poll, | 
 | 	.read		= signalfd_read, | 
 | }; | 
 |  | 
 | /* | 
 |  * Create a file descriptor that is associated with our signal | 
 |  * state. We can pass it around to others if we want to, but | 
 |  * it will always be _our_ signal state. | 
 |  */ | 
 | asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) | 
 | { | 
 | 	int error; | 
 | 	sigset_t sigmask; | 
 | 	struct signalfd_ctx *ctx; | 
 | 	struct sighand_struct *sighand; | 
 | 	struct file *file; | 
 | 	struct inode *inode; | 
 | 	struct signalfd_lockctx lk; | 
 |  | 
 | 	if (sizemask != sizeof(sigset_t) || | 
 | 	    copy_from_user(&sigmask, user_mask, sizeof(sigmask))) | 
 | 		return error = -EINVAL; | 
 | 	sigdelsetmask(&sigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); | 
 | 	signotset(&sigmask); | 
 |  | 
 | 	if (ufd == -1) { | 
 | 		ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | 
 | 		if (!ctx) | 
 | 			return -ENOMEM; | 
 |  | 
 | 		init_waitqueue_head(&ctx->wqh); | 
 | 		ctx->sigmask = sigmask; | 
 | 		ctx->tsk = current; | 
 |  | 
 | 		sighand = current->sighand; | 
 | 		/* | 
 | 		 * Add this fd to the list of signal listeners. | 
 | 		 */ | 
 | 		spin_lock_irq(&sighand->siglock); | 
 | 		list_add_tail(&ctx->lnk, &sighand->signalfd_list); | 
 | 		spin_unlock_irq(&sighand->siglock); | 
 |  | 
 | 		/* | 
 | 		 * When we call this, the initialization must be complete, since | 
 | 		 * anon_inode_getfd() will install the fd. | 
 | 		 */ | 
 | 		error = anon_inode_getfd(&ufd, &inode, &file, "[signalfd]", | 
 | 					 &signalfd_fops, ctx); | 
 | 		if (error) | 
 | 			goto err_fdalloc; | 
 | 	} else { | 
 | 		file = fget(ufd); | 
 | 		if (!file) | 
 | 			return -EBADF; | 
 | 		ctx = file->private_data; | 
 | 		if (file->f_op != &signalfd_fops) { | 
 | 			fput(file); | 
 | 			return -EINVAL; | 
 | 		} | 
 | 		/* | 
 | 		 * We need to be prepared of the fact that the sighand this fd | 
 | 		 * is attached to, has been detched. In that case signalfd_lock() | 
 | 		 * will return 0, and we'll just skip setting the new mask. | 
 | 		 */ | 
 | 		if (signalfd_lock(ctx, &lk)) { | 
 | 			ctx->sigmask = sigmask; | 
 | 			signalfd_unlock(&lk); | 
 | 		} | 
 | 		wake_up(&ctx->wqh); | 
 | 		fput(file); | 
 | 	} | 
 |  | 
 | 	return ufd; | 
 |  | 
 | err_fdalloc: | 
 | 	signalfd_cleanup(ctx); | 
 | 	return error; | 
 | } | 
 |  |