|  | /* | 
|  | *  fs/signalfd.c | 
|  | * | 
|  | *  Copyright (C) 2003  Linus Torvalds | 
|  | * | 
|  | *  Mon Mar 5, 2007: Davide Libenzi <davidel@xmailserver.org> | 
|  | *      Changed ->read() to return a siginfo strcture instead of signal number. | 
|  | *      Fixed locking in ->poll(). | 
|  | *      Added sighand-detach notification. | 
|  | *      Added fd re-use in sys_signalfd() syscall. | 
|  | *      Now using anonymous inode source. | 
|  | *      Thanks to Oleg Nesterov for useful code review and suggestions. | 
|  | *      More comments and suggestions from Arnd Bergmann. | 
|  | * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br> | 
|  | *      Retrieve multiple signals with one read() call | 
|  | */ | 
|  |  | 
|  | #include <linux/file.h> | 
|  | #include <linux/poll.h> | 
|  | #include <linux/init.h> | 
|  | #include <linux/fs.h> | 
|  | #include <linux/sched.h> | 
|  | #include <linux/kernel.h> | 
|  | #include <linux/signal.h> | 
|  | #include <linux/list.h> | 
|  | #include <linux/anon_inodes.h> | 
|  | #include <linux/signalfd.h> | 
|  |  | 
|  | struct signalfd_ctx { | 
|  | struct list_head lnk; | 
|  | wait_queue_head_t wqh; | 
|  | sigset_t sigmask; | 
|  | struct task_struct *tsk; | 
|  | }; | 
|  |  | 
|  | struct signalfd_lockctx { | 
|  | struct task_struct *tsk; | 
|  | unsigned long flags; | 
|  | }; | 
|  |  | 
|  | /* | 
|  | * Tries to acquire the sighand lock. We do not increment the sighand | 
|  | * use count, and we do not even pin the task struct, so we need to | 
|  | * do it inside an RCU read lock, and we must be prepared for the | 
|  | * ctx->tsk going to NULL (in signalfd_deliver()), and for the sighand | 
|  | * being detached. We return 0 if the sighand has been detached, or | 
|  | * 1 if we were able to pin the sighand lock. | 
|  | */ | 
|  | static int signalfd_lock(struct signalfd_ctx *ctx, struct signalfd_lockctx *lk) | 
|  | { | 
|  | struct sighand_struct *sighand = NULL; | 
|  |  | 
|  | rcu_read_lock(); | 
|  | lk->tsk = rcu_dereference(ctx->tsk); | 
|  | if (likely(lk->tsk != NULL)) | 
|  | sighand = lock_task_sighand(lk->tsk, &lk->flags); | 
|  | rcu_read_unlock(); | 
|  |  | 
|  | if (sighand && !ctx->tsk) { | 
|  | unlock_task_sighand(lk->tsk, &lk->flags); | 
|  | sighand = NULL; | 
|  | } | 
|  |  | 
|  | return sighand != NULL; | 
|  | } | 
|  |  | 
|  | static void signalfd_unlock(struct signalfd_lockctx *lk) | 
|  | { | 
|  | unlock_task_sighand(lk->tsk, &lk->flags); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * This must be called with the sighand lock held. | 
|  | */ | 
|  | void signalfd_deliver(struct task_struct *tsk, int sig) | 
|  | { | 
|  | struct sighand_struct *sighand = tsk->sighand; | 
|  | struct signalfd_ctx *ctx, *tmp; | 
|  |  | 
|  | BUG_ON(!sig); | 
|  | list_for_each_entry_safe(ctx, tmp, &sighand->signalfd_list, lnk) { | 
|  | /* | 
|  | * We use a negative signal value as a way to broadcast that the | 
|  | * sighand has been orphaned, so that we can notify all the | 
|  | * listeners about this. Remember the ctx->sigmask is inverted, | 
|  | * so if the user is interested in a signal, that corresponding | 
|  | * bit will be zero. | 
|  | */ | 
|  | if (sig < 0) { | 
|  | if (ctx->tsk == tsk) { | 
|  | ctx->tsk = NULL; | 
|  | list_del_init(&ctx->lnk); | 
|  | wake_up(&ctx->wqh); | 
|  | } | 
|  | } else { | 
|  | if (!sigismember(&ctx->sigmask, sig)) | 
|  | wake_up(&ctx->wqh); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | static void signalfd_cleanup(struct signalfd_ctx *ctx) | 
|  | { | 
|  | struct signalfd_lockctx lk; | 
|  |  | 
|  | /* | 
|  | * This is tricky. If the sighand is gone, we do not need to remove | 
|  | * context from the list, the list itself won't be there anymore. | 
|  | */ | 
|  | if (signalfd_lock(ctx, &lk)) { | 
|  | list_del(&ctx->lnk); | 
|  | signalfd_unlock(&lk); | 
|  | } | 
|  | kfree(ctx); | 
|  | } | 
|  |  | 
|  | static int signalfd_release(struct inode *inode, struct file *file) | 
|  | { | 
|  | signalfd_cleanup(file->private_data); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static unsigned int signalfd_poll(struct file *file, poll_table *wait) | 
|  | { | 
|  | struct signalfd_ctx *ctx = file->private_data; | 
|  | unsigned int events = 0; | 
|  | struct signalfd_lockctx lk; | 
|  |  | 
|  | poll_wait(file, &ctx->wqh, wait); | 
|  |  | 
|  | /* | 
|  | * Let the caller get a POLLIN in this case, ala socket recv() when | 
|  | * the peer disconnects. | 
|  | */ | 
|  | if (signalfd_lock(ctx, &lk)) { | 
|  | if ((lk.tsk == current && | 
|  | next_signal(&lk.tsk->pending, &ctx->sigmask) > 0) || | 
|  | next_signal(&lk.tsk->signal->shared_pending, | 
|  | &ctx->sigmask) > 0) | 
|  | events |= POLLIN; | 
|  | signalfd_unlock(&lk); | 
|  | } else | 
|  | events |= POLLIN; | 
|  |  | 
|  | return events; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Copied from copy_siginfo_to_user() in kernel/signal.c | 
|  | */ | 
|  | static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, | 
|  | siginfo_t const *kinfo) | 
|  | { | 
|  | long err; | 
|  |  | 
|  | BUILD_BUG_ON(sizeof(struct signalfd_siginfo) != 128); | 
|  |  | 
|  | /* | 
|  | * Unused memebers should be zero ... | 
|  | */ | 
|  | err = __clear_user(uinfo, sizeof(*uinfo)); | 
|  |  | 
|  | /* | 
|  | * If you change siginfo_t structure, please be sure | 
|  | * this code is fixed accordingly. | 
|  | */ | 
|  | err |= __put_user(kinfo->si_signo, &uinfo->signo); | 
|  | err |= __put_user(kinfo->si_errno, &uinfo->err); | 
|  | err |= __put_user((short)kinfo->si_code, &uinfo->code); | 
|  | switch (kinfo->si_code & __SI_MASK) { | 
|  | case __SI_KILL: | 
|  | err |= __put_user(kinfo->si_pid, &uinfo->pid); | 
|  | err |= __put_user(kinfo->si_uid, &uinfo->uid); | 
|  | break; | 
|  | case __SI_TIMER: | 
|  | err |= __put_user(kinfo->si_tid, &uinfo->tid); | 
|  | err |= __put_user(kinfo->si_overrun, &uinfo->overrun); | 
|  | err |= __put_user((long)kinfo->si_ptr, &uinfo->svptr); | 
|  | break; | 
|  | case __SI_POLL: | 
|  | err |= __put_user(kinfo->si_band, &uinfo->band); | 
|  | err |= __put_user(kinfo->si_fd, &uinfo->fd); | 
|  | break; | 
|  | case __SI_FAULT: | 
|  | err |= __put_user((long)kinfo->si_addr, &uinfo->addr); | 
|  | #ifdef __ARCH_SI_TRAPNO | 
|  | err |= __put_user(kinfo->si_trapno, &uinfo->trapno); | 
|  | #endif | 
|  | break; | 
|  | case __SI_CHLD: | 
|  | err |= __put_user(kinfo->si_pid, &uinfo->pid); | 
|  | err |= __put_user(kinfo->si_uid, &uinfo->uid); | 
|  | err |= __put_user(kinfo->si_status, &uinfo->status); | 
|  | err |= __put_user(kinfo->si_utime, &uinfo->utime); | 
|  | err |= __put_user(kinfo->si_stime, &uinfo->stime); | 
|  | break; | 
|  | case __SI_RT: /* This is not generated by the kernel as of now. */ | 
|  | case __SI_MESGQ: /* But this is */ | 
|  | err |= __put_user(kinfo->si_pid, &uinfo->pid); | 
|  | err |= __put_user(kinfo->si_uid, &uinfo->uid); | 
|  | err |= __put_user((long)kinfo->si_ptr, &uinfo->svptr); | 
|  | break; | 
|  | default: /* this is just in case for now ... */ | 
|  | err |= __put_user(kinfo->si_pid, &uinfo->pid); | 
|  | err |= __put_user(kinfo->si_uid, &uinfo->uid); | 
|  | break; | 
|  | } | 
|  |  | 
|  | return err ? -EFAULT: sizeof(*uinfo); | 
|  | } | 
|  |  | 
|  | static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, siginfo_t *info, | 
|  | int nonblock) | 
|  | { | 
|  | ssize_t ret; | 
|  | struct signalfd_lockctx lk; | 
|  | DECLARE_WAITQUEUE(wait, current); | 
|  |  | 
|  | if (!signalfd_lock(ctx, &lk)) | 
|  | return 0; | 
|  |  | 
|  | ret = dequeue_signal(lk.tsk, &ctx->sigmask, info); | 
|  | switch (ret) { | 
|  | case 0: | 
|  | if (!nonblock) | 
|  | break; | 
|  | ret = -EAGAIN; | 
|  | default: | 
|  | signalfd_unlock(&lk); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | add_wait_queue(&ctx->wqh, &wait); | 
|  | for (;;) { | 
|  | set_current_state(TASK_INTERRUPTIBLE); | 
|  | ret = dequeue_signal(lk.tsk, &ctx->sigmask, info); | 
|  | signalfd_unlock(&lk); | 
|  | if (ret != 0) | 
|  | break; | 
|  | if (signal_pending(current)) { | 
|  | ret = -ERESTARTSYS; | 
|  | break; | 
|  | } | 
|  | schedule(); | 
|  | ret = signalfd_lock(ctx, &lk); | 
|  | if (unlikely(!ret)) { | 
|  | /* | 
|  | * Let the caller read zero byte, ala socket | 
|  | * recv() when the peer disconnect. This test | 
|  | * must be done before doing a dequeue_signal(), | 
|  | * because if the sighand has been orphaned, | 
|  | * the dequeue_signal() call is going to crash | 
|  | * because ->sighand will be long gone. | 
|  | */ | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | remove_wait_queue(&ctx->wqh, &wait); | 
|  | __set_current_state(TASK_RUNNING); | 
|  |  | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Returns either the size of a "struct signalfd_siginfo", or zero if the | 
|  | * sighand we are attached to, has been orphaned. The "count" parameter | 
|  | * must be at least the size of a "struct signalfd_siginfo". | 
|  | */ | 
|  | static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, | 
|  | loff_t *ppos) | 
|  | { | 
|  | struct signalfd_ctx *ctx = file->private_data; | 
|  | struct signalfd_siginfo __user *siginfo; | 
|  | int nonblock = file->f_flags & O_NONBLOCK; | 
|  | ssize_t ret, total = 0; | 
|  | siginfo_t info; | 
|  |  | 
|  | count /= sizeof(struct signalfd_siginfo); | 
|  | if (!count) | 
|  | return -EINVAL; | 
|  |  | 
|  | siginfo = (struct signalfd_siginfo __user *) buf; | 
|  |  | 
|  | do { | 
|  | ret = signalfd_dequeue(ctx, &info, nonblock); | 
|  | if (unlikely(ret <= 0)) | 
|  | break; | 
|  | ret = signalfd_copyinfo(siginfo, &info); | 
|  | if (ret < 0) | 
|  | break; | 
|  | siginfo++; | 
|  | total += ret; | 
|  | nonblock = 1; | 
|  | } while (--count); | 
|  |  | 
|  | return total ? total : ret; | 
|  | } | 
|  |  | 
|  | static const struct file_operations signalfd_fops = { | 
|  | .release	= signalfd_release, | 
|  | .poll		= signalfd_poll, | 
|  | .read		= signalfd_read, | 
|  | }; | 
|  |  | 
|  | /* | 
|  | * Create a file descriptor that is associated with our signal | 
|  | * state. We can pass it around to others if we want to, but | 
|  | * it will always be _our_ signal state. | 
|  | */ | 
|  | asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) | 
|  | { | 
|  | int error; | 
|  | sigset_t sigmask; | 
|  | struct signalfd_ctx *ctx; | 
|  | struct sighand_struct *sighand; | 
|  | struct file *file; | 
|  | struct inode *inode; | 
|  | struct signalfd_lockctx lk; | 
|  |  | 
|  | if (sizemask != sizeof(sigset_t) || | 
|  | copy_from_user(&sigmask, user_mask, sizeof(sigmask))) | 
|  | return error = -EINVAL; | 
|  | sigdelsetmask(&sigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); | 
|  | signotset(&sigmask); | 
|  |  | 
|  | if (ufd == -1) { | 
|  | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | 
|  | if (!ctx) | 
|  | return -ENOMEM; | 
|  |  | 
|  | init_waitqueue_head(&ctx->wqh); | 
|  | ctx->sigmask = sigmask; | 
|  | ctx->tsk = current; | 
|  |  | 
|  | sighand = current->sighand; | 
|  | /* | 
|  | * Add this fd to the list of signal listeners. | 
|  | */ | 
|  | spin_lock_irq(&sighand->siglock); | 
|  | list_add_tail(&ctx->lnk, &sighand->signalfd_list); | 
|  | spin_unlock_irq(&sighand->siglock); | 
|  |  | 
|  | /* | 
|  | * When we call this, the initialization must be complete, since | 
|  | * anon_inode_getfd() will install the fd. | 
|  | */ | 
|  | error = anon_inode_getfd(&ufd, &inode, &file, "[signalfd]", | 
|  | &signalfd_fops, ctx); | 
|  | if (error) | 
|  | goto err_fdalloc; | 
|  | } else { | 
|  | file = fget(ufd); | 
|  | if (!file) | 
|  | return -EBADF; | 
|  | ctx = file->private_data; | 
|  | if (file->f_op != &signalfd_fops) { | 
|  | fput(file); | 
|  | return -EINVAL; | 
|  | } | 
|  | /* | 
|  | * We need to be prepared of the fact that the sighand this fd | 
|  | * is attached to, has been detched. In that case signalfd_lock() | 
|  | * will return 0, and we'll just skip setting the new mask. | 
|  | */ | 
|  | if (signalfd_lock(ctx, &lk)) { | 
|  | ctx->sigmask = sigmask; | 
|  | signalfd_unlock(&lk); | 
|  | } | 
|  | wake_up(&ctx->wqh); | 
|  | fput(file); | 
|  | } | 
|  |  | 
|  | return ufd; | 
|  |  | 
|  | err_fdalloc: | 
|  | signalfd_cleanup(ctx); | 
|  | return error; | 
|  | } | 
|  |  |