| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1 | /* Copyright (C) 2009 Red Hat, Inc. | 
 | 2 |  * Copyright (C) 2006 Rusty Russell IBM Corporation | 
 | 3 |  * | 
 | 4 |  * Author: Michael S. Tsirkin <mst@redhat.com> | 
 | 5 |  * | 
 | 6 |  * Inspiration, some code, and most witty comments come from | 
| Rob Landley | 6151658 | 2011-05-06 09:27:36 -0700 | [diff] [blame] | 7 |  * Documentation/virtual/lguest/lguest.c, by Rusty Russell | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 8 |  * | 
 | 9 |  * This work is licensed under the terms of the GNU GPL, version 2. | 
 | 10 |  * | 
 | 11 |  * Generic code for virtio server in host kernel. | 
 | 12 |  */ | 
 | 13 |  | 
 | 14 | #include <linux/eventfd.h> | 
 | 15 | #include <linux/vhost.h> | 
 | 16 | #include <linux/virtio_net.h> | 
 | 17 | #include <linux/mm.h> | 
| Michael S. Tsirkin | 64e1c80 | 2010-10-06 15:34:45 +0200 | [diff] [blame] | 18 | #include <linux/mmu_context.h> | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 19 | #include <linux/miscdevice.h> | 
 | 20 | #include <linux/mutex.h> | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 21 | #include <linux/rcupdate.h> | 
 | 22 | #include <linux/poll.h> | 
 | 23 | #include <linux/file.h> | 
 | 24 | #include <linux/highmem.h> | 
| Tejun Heo | 5a0e3ad | 2010-03-24 17:04:11 +0900 | [diff] [blame] | 25 | #include <linux/slab.h> | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 26 | #include <linux/kthread.h> | 
| Michael S. Tsirkin | 9e3d195 | 2010-07-27 22:56:50 +0300 | [diff] [blame] | 27 | #include <linux/cgroup.h> | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 28 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 29 | #include "vhost.h" | 
 | 30 |  | 
 | 31 | enum { | 
 | 32 | 	VHOST_MEMORY_MAX_NREGIONS = 64, | 
 | 33 | 	VHOST_MEMORY_F_LOG = 0x1, | 
 | 34 | }; | 
 | 35 |  | 
| Michael S. Tsirkin | bab632d | 2011-07-18 03:48:46 +0000 | [diff] [blame] | 36 | static unsigned vhost_zcopy_mask __read_mostly; | 
 | 37 |  | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 38 | #define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num]) | 
 | 39 | #define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num]) | 
 | 40 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 41 | static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, | 
 | 42 | 			    poll_table *pt) | 
 | 43 | { | 
 | 44 | 	struct vhost_poll *poll; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 45 |  | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 46 | 	poll = container_of(pt, struct vhost_poll, table); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 47 | 	poll->wqh = wqh; | 
 | 48 | 	add_wait_queue(wqh, &poll->wait); | 
 | 49 | } | 
 | 50 |  | 
 | 51 | static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, | 
 | 52 | 			     void *key) | 
 | 53 | { | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 54 | 	struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); | 
 | 55 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 56 | 	if (!((unsigned long)key & poll->mask)) | 
 | 57 | 		return 0; | 
 | 58 |  | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 59 | 	vhost_poll_queue(poll); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 60 | 	return 0; | 
 | 61 | } | 
 | 62 |  | 
| Stefan Hajnoczi | 163049a | 2012-07-21 06:55:37 +0000 | [diff] [blame] | 63 | void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 64 | { | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 65 | 	INIT_LIST_HEAD(&work->node); | 
 | 66 | 	work->fn = fn; | 
 | 67 | 	init_waitqueue_head(&work->done); | 
 | 68 | 	work->flushing = 0; | 
 | 69 | 	work->queue_seq = work->done_seq = 0; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 70 | } | 
 | 71 |  | 
| Michael S. Tsirkin | 87d6a41 | 2010-09-02 14:05:30 +0300 | [diff] [blame] | 72 | /* Init poll structure */ | 
 | 73 | void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, | 
 | 74 | 		     unsigned long mask, struct vhost_dev *dev) | 
 | 75 | { | 
 | 76 | 	init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); | 
 | 77 | 	init_poll_funcptr(&poll->table, vhost_poll_func); | 
 | 78 | 	poll->mask = mask; | 
 | 79 | 	poll->dev = dev; | 
| Jason Wang | 2b8b328 | 2013-01-28 01:05:18 +0000 | [diff] [blame] | 80 | 	poll->wqh = NULL; | 
| Michael S. Tsirkin | 87d6a41 | 2010-09-02 14:05:30 +0300 | [diff] [blame] | 81 |  | 
 | 82 | 	vhost_work_init(&poll->work, fn); | 
 | 83 | } | 
 | 84 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 85 | /* Start polling a file. We add ourselves to file's wait queue. The caller must | 
 | 86 |  * keep a reference to a file until after vhost_poll_stop is called. */ | 
| Jason Wang | 2b8b328 | 2013-01-28 01:05:18 +0000 | [diff] [blame] | 87 | int vhost_poll_start(struct vhost_poll *poll, struct file *file) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 88 | { | 
 | 89 | 	unsigned long mask; | 
| Jason Wang | 2b8b328 | 2013-01-28 01:05:18 +0000 | [diff] [blame] | 90 | 	int ret = 0; | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 91 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 92 | 	mask = file->f_op->poll(file, &poll->table); | 
 | 93 | 	if (mask) | 
 | 94 | 		vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask); | 
| Jason Wang | 2b8b328 | 2013-01-28 01:05:18 +0000 | [diff] [blame] | 95 | 	if (mask & POLLERR) { | 
 | 96 | 		if (poll->wqh) | 
 | 97 | 			remove_wait_queue(poll->wqh, &poll->wait); | 
 | 98 | 		ret = -EINVAL; | 
 | 99 | 	} | 
 | 100 |  | 
 | 101 | 	return ret; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 102 | } | 
 | 103 |  | 
 | 104 | /* Stop polling a file. After this function returns, it becomes safe to drop the | 
 | 105 |  * file reference. You must also flush afterwards. */ | 
 | 106 | void vhost_poll_stop(struct vhost_poll *poll) | 
 | 107 | { | 
| Jason Wang | 2b8b328 | 2013-01-28 01:05:18 +0000 | [diff] [blame] | 108 | 	if (poll->wqh) { | 
 | 109 | 		remove_wait_queue(poll->wqh, &poll->wait); | 
 | 110 | 		poll->wqh = NULL; | 
 | 111 | 	} | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 112 | } | 
 | 113 |  | 
| Michael S. Tsirkin | 0174b0c | 2011-01-10 10:03:20 +0200 | [diff] [blame] | 114 | static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work, | 
 | 115 | 				unsigned seq) | 
 | 116 | { | 
 | 117 | 	int left; | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 118 |  | 
| Michael S. Tsirkin | 0174b0c | 2011-01-10 10:03:20 +0200 | [diff] [blame] | 119 | 	spin_lock_irq(&dev->work_lock); | 
 | 120 | 	left = seq - work->done_seq; | 
 | 121 | 	spin_unlock_irq(&dev->work_lock); | 
 | 122 | 	return left <= 0; | 
 | 123 | } | 
 | 124 |  | 
| Michael S. Tsirkin | 87d6a41 | 2010-09-02 14:05:30 +0300 | [diff] [blame] | 125 | static void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 126 | { | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 127 | 	unsigned seq; | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 128 | 	int flushing; | 
 | 129 |  | 
| Michael S. Tsirkin | 87d6a41 | 2010-09-02 14:05:30 +0300 | [diff] [blame] | 130 | 	spin_lock_irq(&dev->work_lock); | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 131 | 	seq = work->queue_seq; | 
 | 132 | 	work->flushing++; | 
| Michael S. Tsirkin | 87d6a41 | 2010-09-02 14:05:30 +0300 | [diff] [blame] | 133 | 	spin_unlock_irq(&dev->work_lock); | 
| Michael S. Tsirkin | 0174b0c | 2011-01-10 10:03:20 +0200 | [diff] [blame] | 134 | 	wait_event(work->done, vhost_work_seq_done(dev, work, seq)); | 
| Michael S. Tsirkin | 87d6a41 | 2010-09-02 14:05:30 +0300 | [diff] [blame] | 135 | 	spin_lock_irq(&dev->work_lock); | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 136 | 	flushing = --work->flushing; | 
| Michael S. Tsirkin | 87d6a41 | 2010-09-02 14:05:30 +0300 | [diff] [blame] | 137 | 	spin_unlock_irq(&dev->work_lock); | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 138 | 	BUG_ON(flushing < 0); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 139 | } | 
 | 140 |  | 
| Michael S. Tsirkin | 87d6a41 | 2010-09-02 14:05:30 +0300 | [diff] [blame] | 141 | /* Flush any work that has been scheduled. When calling this, don't hold any | 
 | 142 |  * locks that are also used by the callback. */ | 
 | 143 | void vhost_poll_flush(struct vhost_poll *poll) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 144 | { | 
| Michael S. Tsirkin | 87d6a41 | 2010-09-02 14:05:30 +0300 | [diff] [blame] | 145 | 	vhost_work_flush(poll->dev, &poll->work); | 
 | 146 | } | 
 | 147 |  | 
| Stefan Hajnoczi | 163049a | 2012-07-21 06:55:37 +0000 | [diff] [blame] | 148 | void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) | 
| Michael S. Tsirkin | 87d6a41 | 2010-09-02 14:05:30 +0300 | [diff] [blame] | 149 | { | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 150 | 	unsigned long flags; | 
 | 151 |  | 
 | 152 | 	spin_lock_irqsave(&dev->work_lock, flags); | 
 | 153 | 	if (list_empty(&work->node)) { | 
 | 154 | 		list_add_tail(&work->node, &dev->work_list); | 
 | 155 | 		work->queue_seq++; | 
 | 156 | 		wake_up_process(dev->worker); | 
 | 157 | 	} | 
 | 158 | 	spin_unlock_irqrestore(&dev->work_lock, flags); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 159 | } | 
 | 160 |  | 
| Michael S. Tsirkin | 87d6a41 | 2010-09-02 14:05:30 +0300 | [diff] [blame] | 161 | void vhost_poll_queue(struct vhost_poll *poll) | 
 | 162 | { | 
 | 163 | 	vhost_work_queue(poll->dev, &poll->work); | 
 | 164 | } | 
 | 165 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 166 | static void vhost_vq_reset(struct vhost_dev *dev, | 
 | 167 | 			   struct vhost_virtqueue *vq) | 
 | 168 | { | 
 | 169 | 	vq->num = 1; | 
 | 170 | 	vq->desc = NULL; | 
 | 171 | 	vq->avail = NULL; | 
 | 172 | 	vq->used = NULL; | 
 | 173 | 	vq->last_avail_idx = 0; | 
 | 174 | 	vq->avail_idx = 0; | 
 | 175 | 	vq->last_used_idx = 0; | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 176 | 	vq->signalled_used = 0; | 
 | 177 | 	vq->signalled_used_valid = false; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 178 | 	vq->used_flags = 0; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 179 | 	vq->log_used = false; | 
 | 180 | 	vq->log_addr = -1ull; | 
| David Stevens | 8dd014a | 2010-07-27 18:52:21 +0300 | [diff] [blame] | 181 | 	vq->vhost_hlen = 0; | 
 | 182 | 	vq->sock_hlen = 0; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 183 | 	vq->private_data = NULL; | 
 | 184 | 	vq->log_base = NULL; | 
 | 185 | 	vq->error_ctx = NULL; | 
 | 186 | 	vq->error = NULL; | 
 | 187 | 	vq->kick = NULL; | 
 | 188 | 	vq->call_ctx = NULL; | 
 | 189 | 	vq->call = NULL; | 
| Michael S. Tsirkin | 73a99f0 | 2010-02-23 11:23:45 +0200 | [diff] [blame] | 190 | 	vq->log_ctx = NULL; | 
| Michael S. Tsirkin | bab632d | 2011-07-18 03:48:46 +0000 | [diff] [blame] | 191 | 	vq->upend_idx = 0; | 
 | 192 | 	vq->done_idx = 0; | 
 | 193 | 	vq->ubufs = NULL; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 194 | } | 
 | 195 |  | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 196 | static int vhost_worker(void *data) | 
 | 197 | { | 
 | 198 | 	struct vhost_dev *dev = data; | 
 | 199 | 	struct vhost_work *work = NULL; | 
 | 200 | 	unsigned uninitialized_var(seq); | 
| Jens Freimann | d7ffde3 | 2012-06-26 00:59:58 +0000 | [diff] [blame] | 201 | 	mm_segment_t oldfs = get_fs(); | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 202 |  | 
| Jens Freimann | d7ffde3 | 2012-06-26 00:59:58 +0000 | [diff] [blame] | 203 | 	set_fs(USER_DS); | 
| Michael S. Tsirkin | 64e1c80 | 2010-10-06 15:34:45 +0200 | [diff] [blame] | 204 | 	use_mm(dev->mm); | 
 | 205 |  | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 206 | 	for (;;) { | 
 | 207 | 		/* mb paired w/ kthread_stop */ | 
 | 208 | 		set_current_state(TASK_INTERRUPTIBLE); | 
 | 209 |  | 
 | 210 | 		spin_lock_irq(&dev->work_lock); | 
 | 211 | 		if (work) { | 
 | 212 | 			work->done_seq = seq; | 
 | 213 | 			if (work->flushing) | 
 | 214 | 				wake_up_all(&work->done); | 
 | 215 | 		} | 
 | 216 |  | 
 | 217 | 		if (kthread_should_stop()) { | 
 | 218 | 			spin_unlock_irq(&dev->work_lock); | 
 | 219 | 			__set_current_state(TASK_RUNNING); | 
| Michael S. Tsirkin | 64e1c80 | 2010-10-06 15:34:45 +0200 | [diff] [blame] | 220 | 			break; | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 221 | 		} | 
 | 222 | 		if (!list_empty(&dev->work_list)) { | 
 | 223 | 			work = list_first_entry(&dev->work_list, | 
 | 224 | 						struct vhost_work, node); | 
 | 225 | 			list_del_init(&work->node); | 
 | 226 | 			seq = work->queue_seq; | 
 | 227 | 		} else | 
 | 228 | 			work = NULL; | 
 | 229 | 		spin_unlock_irq(&dev->work_lock); | 
 | 230 |  | 
 | 231 | 		if (work) { | 
 | 232 | 			__set_current_state(TASK_RUNNING); | 
 | 233 | 			work->fn(work); | 
| Nadav Har'El | d550dda | 2012-02-27 15:07:29 +0200 | [diff] [blame] | 234 | 			if (need_resched()) | 
 | 235 | 				schedule(); | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 236 | 		} else | 
 | 237 | 			schedule(); | 
 | 238 |  | 
 | 239 | 	} | 
| Michael S. Tsirkin | 64e1c80 | 2010-10-06 15:34:45 +0200 | [diff] [blame] | 240 | 	unuse_mm(dev->mm); | 
| Jens Freimann | d7ffde3 | 2012-06-26 00:59:58 +0000 | [diff] [blame] | 241 | 	set_fs(oldfs); | 
| Michael S. Tsirkin | 64e1c80 | 2010-10-06 15:34:45 +0200 | [diff] [blame] | 242 | 	return 0; | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 243 | } | 
 | 244 |  | 
| Michael S. Tsirkin | bab632d | 2011-07-18 03:48:46 +0000 | [diff] [blame] | 245 | static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) | 
 | 246 | { | 
 | 247 | 	kfree(vq->indirect); | 
 | 248 | 	vq->indirect = NULL; | 
 | 249 | 	kfree(vq->log); | 
 | 250 | 	vq->log = NULL; | 
 | 251 | 	kfree(vq->heads); | 
 | 252 | 	vq->heads = NULL; | 
 | 253 | 	kfree(vq->ubuf_info); | 
 | 254 | 	vq->ubuf_info = NULL; | 
 | 255 | } | 
 | 256 |  | 
 | 257 | void vhost_enable_zcopy(int vq) | 
 | 258 | { | 
 | 259 | 	vhost_zcopy_mask |= 0x1 << vq; | 
 | 260 | } | 
 | 261 |  | 
| Jason Wang | e0e9b40 | 2010-09-14 23:53:05 +0800 | [diff] [blame] | 262 | /* Helper to allocate iovec buffers for all vqs. */ | 
 | 263 | static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) | 
 | 264 | { | 
 | 265 | 	int i; | 
| Michael S. Tsirkin | bab632d | 2011-07-18 03:48:46 +0000 | [diff] [blame] | 266 | 	bool zcopy; | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 267 |  | 
| Jason Wang | e0e9b40 | 2010-09-14 23:53:05 +0800 | [diff] [blame] | 268 | 	for (i = 0; i < dev->nvqs; ++i) { | 
 | 269 | 		dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect * | 
 | 270 | 					       UIO_MAXIOV, GFP_KERNEL); | 
 | 271 | 		dev->vqs[i].log = kmalloc(sizeof *dev->vqs[i].log * UIO_MAXIOV, | 
 | 272 | 					  GFP_KERNEL); | 
 | 273 | 		dev->vqs[i].heads = kmalloc(sizeof *dev->vqs[i].heads * | 
 | 274 | 					    UIO_MAXIOV, GFP_KERNEL); | 
| Michael S. Tsirkin | bab632d | 2011-07-18 03:48:46 +0000 | [diff] [blame] | 275 | 		zcopy = vhost_zcopy_mask & (0x1 << i); | 
 | 276 | 		if (zcopy) | 
 | 277 | 			dev->vqs[i].ubuf_info = | 
 | 278 | 				kmalloc(sizeof *dev->vqs[i].ubuf_info * | 
 | 279 | 					UIO_MAXIOV, GFP_KERNEL); | 
| Jason Wang | e0e9b40 | 2010-09-14 23:53:05 +0800 | [diff] [blame] | 280 | 		if (!dev->vqs[i].indirect || !dev->vqs[i].log || | 
| Michael S. Tsirkin | bab632d | 2011-07-18 03:48:46 +0000 | [diff] [blame] | 281 | 			!dev->vqs[i].heads || | 
 | 282 | 			(zcopy && !dev->vqs[i].ubuf_info)) | 
| Jason Wang | e0e9b40 | 2010-09-14 23:53:05 +0800 | [diff] [blame] | 283 | 			goto err_nomem; | 
 | 284 | 	} | 
 | 285 | 	return 0; | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 286 |  | 
| Jason Wang | e0e9b40 | 2010-09-14 23:53:05 +0800 | [diff] [blame] | 287 | err_nomem: | 
| Michael S. Tsirkin | bab632d | 2011-07-18 03:48:46 +0000 | [diff] [blame] | 288 | 	for (; i >= 0; --i) | 
 | 289 | 		vhost_vq_free_iovecs(&dev->vqs[i]); | 
| Jason Wang | e0e9b40 | 2010-09-14 23:53:05 +0800 | [diff] [blame] | 290 | 	return -ENOMEM; | 
 | 291 | } | 
 | 292 |  | 
 | 293 | static void vhost_dev_free_iovecs(struct vhost_dev *dev) | 
 | 294 | { | 
 | 295 | 	int i; | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 296 |  | 
| Michael S. Tsirkin | bab632d | 2011-07-18 03:48:46 +0000 | [diff] [blame] | 297 | 	for (i = 0; i < dev->nvqs; ++i) | 
 | 298 | 		vhost_vq_free_iovecs(&dev->vqs[i]); | 
| Jason Wang | e0e9b40 | 2010-09-14 23:53:05 +0800 | [diff] [blame] | 299 | } | 
 | 300 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 301 | long vhost_dev_init(struct vhost_dev *dev, | 
 | 302 | 		    struct vhost_virtqueue *vqs, int nvqs) | 
 | 303 | { | 
 | 304 | 	int i; | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 305 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 306 | 	dev->vqs = vqs; | 
 | 307 | 	dev->nvqs = nvqs; | 
 | 308 | 	mutex_init(&dev->mutex); | 
 | 309 | 	dev->log_ctx = NULL; | 
 | 310 | 	dev->log_file = NULL; | 
 | 311 | 	dev->memory = NULL; | 
 | 312 | 	dev->mm = NULL; | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 313 | 	spin_lock_init(&dev->work_lock); | 
 | 314 | 	INIT_LIST_HEAD(&dev->work_list); | 
 | 315 | 	dev->worker = NULL; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 316 |  | 
 | 317 | 	for (i = 0; i < dev->nvqs; ++i) { | 
| Jason Wang | e0e9b40 | 2010-09-14 23:53:05 +0800 | [diff] [blame] | 318 | 		dev->vqs[i].log = NULL; | 
 | 319 | 		dev->vqs[i].indirect = NULL; | 
 | 320 | 		dev->vqs[i].heads = NULL; | 
| Michael S. Tsirkin | bab632d | 2011-07-18 03:48:46 +0000 | [diff] [blame] | 321 | 		dev->vqs[i].ubuf_info = NULL; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 322 | 		dev->vqs[i].dev = dev; | 
 | 323 | 		mutex_init(&dev->vqs[i].mutex); | 
 | 324 | 		vhost_vq_reset(dev, dev->vqs + i); | 
 | 325 | 		if (dev->vqs[i].handle_kick) | 
 | 326 | 			vhost_poll_init(&dev->vqs[i].poll, | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 327 | 					dev->vqs[i].handle_kick, POLLIN, dev); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 328 | 	} | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 329 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 330 | 	return 0; | 
 | 331 | } | 
 | 332 |  | 
 | 333 | /* Caller should have device mutex */ | 
 | 334 | long vhost_dev_check_owner(struct vhost_dev *dev) | 
 | 335 | { | 
 | 336 | 	/* Are you the owner? If not, I don't think you mean to do that */ | 
 | 337 | 	return dev->mm == current->mm ? 0 : -EPERM; | 
 | 338 | } | 
 | 339 |  | 
| Michael S. Tsirkin | 87d6a41 | 2010-09-02 14:05:30 +0300 | [diff] [blame] | 340 | struct vhost_attach_cgroups_struct { | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 341 | 	struct vhost_work work; | 
 | 342 | 	struct task_struct *owner; | 
 | 343 | 	int ret; | 
| Michael S. Tsirkin | 87d6a41 | 2010-09-02 14:05:30 +0300 | [diff] [blame] | 344 | }; | 
 | 345 |  | 
 | 346 | static void vhost_attach_cgroups_work(struct vhost_work *work) | 
 | 347 | { | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 348 | 	struct vhost_attach_cgroups_struct *s; | 
 | 349 |  | 
 | 350 | 	s = container_of(work, struct vhost_attach_cgroups_struct, work); | 
 | 351 | 	s->ret = cgroup_attach_task_all(s->owner, current); | 
| Michael S. Tsirkin | 87d6a41 | 2010-09-02 14:05:30 +0300 | [diff] [blame] | 352 | } | 
 | 353 |  | 
 | 354 | static int vhost_attach_cgroups(struct vhost_dev *dev) | 
 | 355 | { | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 356 | 	struct vhost_attach_cgroups_struct attach; | 
 | 357 |  | 
 | 358 | 	attach.owner = current; | 
 | 359 | 	vhost_work_init(&attach.work, vhost_attach_cgroups_work); | 
 | 360 | 	vhost_work_queue(dev, &attach.work); | 
 | 361 | 	vhost_work_flush(dev, &attach.work); | 
 | 362 | 	return attach.ret; | 
| Michael S. Tsirkin | 87d6a41 | 2010-09-02 14:05:30 +0300 | [diff] [blame] | 363 | } | 
 | 364 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 365 | /* Caller should have device mutex */ | 
 | 366 | static long vhost_dev_set_owner(struct vhost_dev *dev) | 
 | 367 | { | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 368 | 	struct task_struct *worker; | 
 | 369 | 	int err; | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 370 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 371 | 	/* Is there an owner already? */ | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 372 | 	if (dev->mm) { | 
 | 373 | 		err = -EBUSY; | 
 | 374 | 		goto err_mm; | 
 | 375 | 	} | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 376 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 377 | 	/* No owner, become one */ | 
 | 378 | 	dev->mm = get_task_mm(current); | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 379 | 	worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); | 
 | 380 | 	if (IS_ERR(worker)) { | 
 | 381 | 		err = PTR_ERR(worker); | 
 | 382 | 		goto err_worker; | 
 | 383 | 	} | 
 | 384 |  | 
 | 385 | 	dev->worker = worker; | 
| Michael S. Tsirkin | 87d6a41 | 2010-09-02 14:05:30 +0300 | [diff] [blame] | 386 | 	wake_up_process(worker);	/* avoid contributing to loadavg */ | 
 | 387 |  | 
 | 388 | 	err = vhost_attach_cgroups(dev); | 
| Michael S. Tsirkin | 9e3d195 | 2010-07-27 22:56:50 +0300 | [diff] [blame] | 389 | 	if (err) | 
 | 390 | 		goto err_cgroup; | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 391 |  | 
| Jason Wang | e0e9b40 | 2010-09-14 23:53:05 +0800 | [diff] [blame] | 392 | 	err = vhost_dev_alloc_iovecs(dev); | 
 | 393 | 	if (err) | 
 | 394 | 		goto err_cgroup; | 
 | 395 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 396 | 	return 0; | 
| Michael S. Tsirkin | 9e3d195 | 2010-07-27 22:56:50 +0300 | [diff] [blame] | 397 | err_cgroup: | 
 | 398 | 	kthread_stop(worker); | 
| Michael S. Tsirkin | 615cc22 | 2010-09-02 14:16:36 +0300 | [diff] [blame] | 399 | 	dev->worker = NULL; | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 400 | err_worker: | 
 | 401 | 	if (dev->mm) | 
 | 402 | 		mmput(dev->mm); | 
 | 403 | 	dev->mm = NULL; | 
 | 404 | err_mm: | 
 | 405 | 	return err; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 406 | } | 
 | 407 |  | 
 | 408 | /* Caller should have device mutex */ | 
 | 409 | long vhost_dev_reset_owner(struct vhost_dev *dev) | 
 | 410 | { | 
 | 411 | 	struct vhost_memory *memory; | 
 | 412 |  | 
 | 413 | 	/* Restore memory to default empty mapping. */ | 
 | 414 | 	memory = kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL); | 
 | 415 | 	if (!memory) | 
 | 416 | 		return -ENOMEM; | 
 | 417 |  | 
| Michael S. Tsirkin | ea5d404 | 2011-11-27 19:05:58 +0200 | [diff] [blame] | 418 | 	vhost_dev_cleanup(dev, true); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 419 |  | 
 | 420 | 	memory->nregions = 0; | 
| Arnd Bergmann | 28457ee | 2010-03-09 19:24:45 +0100 | [diff] [blame] | 421 | 	RCU_INIT_POINTER(dev->memory, memory); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 422 | 	return 0; | 
 | 423 | } | 
 | 424 |  | 
| Michael S. Tsirkin | b211616 | 2012-11-01 09:16:46 +0000 | [diff] [blame] | 425 | void vhost_dev_stop(struct vhost_dev *dev) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 426 | { | 
 | 427 | 	int i; | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 428 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 429 | 	for (i = 0; i < dev->nvqs; ++i) { | 
 | 430 | 		if (dev->vqs[i].kick && dev->vqs[i].handle_kick) { | 
 | 431 | 			vhost_poll_stop(&dev->vqs[i].poll); | 
 | 432 | 			vhost_poll_flush(&dev->vqs[i].poll); | 
 | 433 | 		} | 
| Michael S. Tsirkin | b211616 | 2012-11-01 09:16:46 +0000 | [diff] [blame] | 434 | 	} | 
 | 435 | } | 
| Michael S. Tsirkin | bab632d | 2011-07-18 03:48:46 +0000 | [diff] [blame] | 436 |  | 
| Michael S. Tsirkin | b211616 | 2012-11-01 09:16:46 +0000 | [diff] [blame] | 437 | /* Caller should have device mutex if and only if locked is set */ | 
 | 438 | void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) | 
 | 439 | { | 
 | 440 | 	int i; | 
| Michael S. Tsirkin | bab632d | 2011-07-18 03:48:46 +0000 | [diff] [blame] | 441 |  | 
| Michael S. Tsirkin | b211616 | 2012-11-01 09:16:46 +0000 | [diff] [blame] | 442 | 	for (i = 0; i < dev->nvqs; ++i) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 443 | 		if (dev->vqs[i].error_ctx) | 
 | 444 | 			eventfd_ctx_put(dev->vqs[i].error_ctx); | 
 | 445 | 		if (dev->vqs[i].error) | 
 | 446 | 			fput(dev->vqs[i].error); | 
 | 447 | 		if (dev->vqs[i].kick) | 
 | 448 | 			fput(dev->vqs[i].kick); | 
 | 449 | 		if (dev->vqs[i].call_ctx) | 
 | 450 | 			eventfd_ctx_put(dev->vqs[i].call_ctx); | 
 | 451 | 		if (dev->vqs[i].call) | 
 | 452 | 			fput(dev->vqs[i].call); | 
 | 453 | 		vhost_vq_reset(dev, dev->vqs + i); | 
 | 454 | 	} | 
| Jason Wang | e0e9b40 | 2010-09-14 23:53:05 +0800 | [diff] [blame] | 455 | 	vhost_dev_free_iovecs(dev); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 456 | 	if (dev->log_ctx) | 
 | 457 | 		eventfd_ctx_put(dev->log_ctx); | 
 | 458 | 	dev->log_ctx = NULL; | 
 | 459 | 	if (dev->log_file) | 
 | 460 | 		fput(dev->log_file); | 
 | 461 | 	dev->log_file = NULL; | 
 | 462 | 	/* No one will access memory at this point */ | 
| Arnd Bergmann | 28457ee | 2010-03-09 19:24:45 +0100 | [diff] [blame] | 463 | 	kfree(rcu_dereference_protected(dev->memory, | 
| Michael S. Tsirkin | ea5d404 | 2011-11-27 19:05:58 +0200 | [diff] [blame] | 464 | 					locked == | 
 | 465 | 						lockdep_is_held(&dev->mutex))); | 
| Arnd Bergmann | 28457ee | 2010-03-09 19:24:45 +0100 | [diff] [blame] | 466 | 	RCU_INIT_POINTER(dev->memory, NULL); | 
| Tejun Heo | c23f3445 | 2010-06-02 20:40:00 +0200 | [diff] [blame] | 467 | 	WARN_ON(!list_empty(&dev->work_list)); | 
| Eric Dumazet | 78b620c | 2010-08-31 02:05:57 +0000 | [diff] [blame] | 468 | 	if (dev->worker) { | 
 | 469 | 		kthread_stop(dev->worker); | 
 | 470 | 		dev->worker = NULL; | 
 | 471 | 	} | 
| Michael S. Tsirkin | 533a19b | 2010-10-06 15:34:38 +0200 | [diff] [blame] | 472 | 	if (dev->mm) | 
 | 473 | 		mmput(dev->mm); | 
 | 474 | 	dev->mm = NULL; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 475 | } | 
 | 476 |  | 
 | 477 | static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) | 
 | 478 | { | 
 | 479 | 	u64 a = addr / VHOST_PAGE_SIZE / 8; | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 480 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 481 | 	/* Make sure 64 bit math will not overflow. */ | 
 | 482 | 	if (a > ULONG_MAX - (unsigned long)log_base || | 
 | 483 | 	    a + (unsigned long)log_base > ULONG_MAX) | 
| Dan Carpenter | 6d97e55 | 2010-10-11 19:24:19 +0200 | [diff] [blame] | 484 | 		return 0; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 485 |  | 
 | 486 | 	return access_ok(VERIFY_WRITE, log_base + a, | 
 | 487 | 			 (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8); | 
 | 488 | } | 
 | 489 |  | 
 | 490 | /* Caller should have vq mutex and device mutex. */ | 
 | 491 | static int vq_memory_access_ok(void __user *log_base, struct vhost_memory *mem, | 
 | 492 | 			       int log_all) | 
 | 493 | { | 
 | 494 | 	int i; | 
| Jeff Dike | 179b284 | 2010-04-07 09:59:10 -0400 | [diff] [blame] | 495 |  | 
| Michael S. Tsirkin | f8322fb | 2010-05-27 12:28:03 +0300 | [diff] [blame] | 496 | 	if (!mem) | 
 | 497 | 		return 0; | 
| Jeff Dike | 179b284 | 2010-04-07 09:59:10 -0400 | [diff] [blame] | 498 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 499 | 	for (i = 0; i < mem->nregions; ++i) { | 
 | 500 | 		struct vhost_memory_region *m = mem->regions + i; | 
 | 501 | 		unsigned long a = m->userspace_addr; | 
 | 502 | 		if (m->memory_size > ULONG_MAX) | 
 | 503 | 			return 0; | 
 | 504 | 		else if (!access_ok(VERIFY_WRITE, (void __user *)a, | 
 | 505 | 				    m->memory_size)) | 
 | 506 | 			return 0; | 
 | 507 | 		else if (log_all && !log_access_ok(log_base, | 
 | 508 | 						   m->guest_phys_addr, | 
 | 509 | 						   m->memory_size)) | 
 | 510 | 			return 0; | 
 | 511 | 	} | 
 | 512 | 	return 1; | 
 | 513 | } | 
 | 514 |  | 
 | 515 | /* Can we switch to this memory table? */ | 
 | 516 | /* Caller should have device mutex but not vq mutex */ | 
 | 517 | static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, | 
 | 518 | 			    int log_all) | 
 | 519 | { | 
 | 520 | 	int i; | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 521 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 522 | 	for (i = 0; i < d->nvqs; ++i) { | 
 | 523 | 		int ok; | 
 | 524 | 		mutex_lock(&d->vqs[i].mutex); | 
 | 525 | 		/* If ring is inactive, will check when it's enabled. */ | 
 | 526 | 		if (d->vqs[i].private_data) | 
 | 527 | 			ok = vq_memory_access_ok(d->vqs[i].log_base, mem, | 
 | 528 | 						 log_all); | 
 | 529 | 		else | 
 | 530 | 			ok = 1; | 
 | 531 | 		mutex_unlock(&d->vqs[i].mutex); | 
 | 532 | 		if (!ok) | 
 | 533 | 			return 0; | 
 | 534 | 	} | 
 | 535 | 	return 1; | 
 | 536 | } | 
 | 537 |  | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 538 | static int vq_access_ok(struct vhost_dev *d, unsigned int num, | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 539 | 			struct vring_desc __user *desc, | 
 | 540 | 			struct vring_avail __user *avail, | 
 | 541 | 			struct vring_used __user *used) | 
 | 542 | { | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 543 | 	size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 544 | 	return access_ok(VERIFY_READ, desc, num * sizeof *desc) && | 
 | 545 | 	       access_ok(VERIFY_READ, avail, | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 546 | 			 sizeof *avail + num * sizeof *avail->ring + s) && | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 547 | 	       access_ok(VERIFY_WRITE, used, | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 548 | 			sizeof *used + num * sizeof *used->ring + s); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 549 | } | 
 | 550 |  | 
 | 551 | /* Can we log writes? */ | 
 | 552 | /* Caller should have device mutex but not vq mutex */ | 
 | 553 | int vhost_log_access_ok(struct vhost_dev *dev) | 
 | 554 | { | 
| Arnd Bergmann | 28457ee | 2010-03-09 19:24:45 +0100 | [diff] [blame] | 555 | 	struct vhost_memory *mp; | 
 | 556 |  | 
 | 557 | 	mp = rcu_dereference_protected(dev->memory, | 
 | 558 | 				       lockdep_is_held(&dev->mutex)); | 
 | 559 | 	return memory_access_ok(dev, mp, 1); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 560 | } | 
 | 561 |  | 
 | 562 | /* Verify access for write logging. */ | 
 | 563 | /* Caller should have vq mutex and device mutex */ | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 564 | static int vq_log_access_ok(struct vhost_dev *d, struct vhost_virtqueue *vq, | 
 | 565 | 			    void __user *log_base) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 566 | { | 
| Arnd Bergmann | 28457ee | 2010-03-09 19:24:45 +0100 | [diff] [blame] | 567 | 	struct vhost_memory *mp; | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 568 | 	size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; | 
| Arnd Bergmann | 28457ee | 2010-03-09 19:24:45 +0100 | [diff] [blame] | 569 |  | 
 | 570 | 	mp = rcu_dereference_protected(vq->dev->memory, | 
 | 571 | 				       lockdep_is_held(&vq->mutex)); | 
 | 572 | 	return vq_memory_access_ok(log_base, mp, | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 573 | 			    vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && | 
 | 574 | 		(!vq->log_used || log_access_ok(log_base, vq->log_addr, | 
 | 575 | 					sizeof *vq->used + | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 576 | 					vq->num * sizeof *vq->used->ring + s)); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 577 | } | 
 | 578 |  | 
 | 579 | /* Can we start vq? */ | 
 | 580 | /* Caller should have vq mutex and device mutex */ | 
 | 581 | int vhost_vq_access_ok(struct vhost_virtqueue *vq) | 
 | 582 | { | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 583 | 	return vq_access_ok(vq->dev, vq->num, vq->desc, vq->avail, vq->used) && | 
 | 584 | 		vq_log_access_ok(vq->dev, vq, vq->log_base); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 585 | } | 
 | 586 |  | 
 | 587 | static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) | 
 | 588 | { | 
 | 589 | 	struct vhost_memory mem, *newmem, *oldmem; | 
 | 590 | 	unsigned long size = offsetof(struct vhost_memory, regions); | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 591 |  | 
| Takuya Yoshikawa | 7ad9c9d | 2010-05-27 18:58:03 +0900 | [diff] [blame] | 592 | 	if (copy_from_user(&mem, m, size)) | 
 | 593 | 		return -EFAULT; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 594 | 	if (mem.padding) | 
 | 595 | 		return -EOPNOTSUPP; | 
 | 596 | 	if (mem.nregions > VHOST_MEMORY_MAX_NREGIONS) | 
 | 597 | 		return -E2BIG; | 
 | 598 | 	newmem = kmalloc(size + mem.nregions * sizeof *m->regions, GFP_KERNEL); | 
 | 599 | 	if (!newmem) | 
 | 600 | 		return -ENOMEM; | 
 | 601 |  | 
 | 602 | 	memcpy(newmem, &mem, size); | 
| Takuya Yoshikawa | 7ad9c9d | 2010-05-27 18:58:03 +0900 | [diff] [blame] | 603 | 	if (copy_from_user(newmem->regions, m->regions, | 
 | 604 | 			   mem.nregions * sizeof *m->regions)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 605 | 		kfree(newmem); | 
| Takuya Yoshikawa | 7ad9c9d | 2010-05-27 18:58:03 +0900 | [diff] [blame] | 606 | 		return -EFAULT; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 607 | 	} | 
 | 608 |  | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 609 | 	if (!memory_access_ok(d, newmem, | 
 | 610 | 			      vhost_has_feature(d, VHOST_F_LOG_ALL))) { | 
| Takuya Yoshikawa | a02c378 | 2010-05-27 19:03:56 +0900 | [diff] [blame] | 611 | 		kfree(newmem); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 612 | 		return -EFAULT; | 
| Takuya Yoshikawa | a02c378 | 2010-05-27 19:03:56 +0900 | [diff] [blame] | 613 | 	} | 
| Arnd Bergmann | 28457ee | 2010-03-09 19:24:45 +0100 | [diff] [blame] | 614 | 	oldmem = rcu_dereference_protected(d->memory, | 
 | 615 | 					   lockdep_is_held(&d->mutex)); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 616 | 	rcu_assign_pointer(d->memory, newmem); | 
 | 617 | 	synchronize_rcu(); | 
 | 618 | 	kfree(oldmem); | 
 | 619 | 	return 0; | 
 | 620 | } | 
 | 621 |  | 
| Michael S. Tsirkin | 935cdee | 2012-12-06 14:03:34 +0200 | [diff] [blame] | 622 | long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 623 | { | 
| Al Viro | cecb46f | 2012-08-27 14:21:39 -0400 | [diff] [blame] | 624 | 	struct file *eventfp, *filep = NULL; | 
 | 625 | 	bool pollstart = false, pollstop = false; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 626 | 	struct eventfd_ctx *ctx = NULL; | 
 | 627 | 	u32 __user *idxp = argp; | 
 | 628 | 	struct vhost_virtqueue *vq; | 
 | 629 | 	struct vhost_vring_state s; | 
 | 630 | 	struct vhost_vring_file f; | 
 | 631 | 	struct vhost_vring_addr a; | 
 | 632 | 	u32 idx; | 
 | 633 | 	long r; | 
 | 634 |  | 
 | 635 | 	r = get_user(idx, idxp); | 
 | 636 | 	if (r < 0) | 
 | 637 | 		return r; | 
| Krishna Kumar | 0f3d9a1 | 2010-05-25 11:10:36 +0530 | [diff] [blame] | 638 | 	if (idx >= d->nvqs) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 639 | 		return -ENOBUFS; | 
 | 640 |  | 
 | 641 | 	vq = d->vqs + idx; | 
 | 642 |  | 
 | 643 | 	mutex_lock(&vq->mutex); | 
 | 644 |  | 
 | 645 | 	switch (ioctl) { | 
 | 646 | 	case VHOST_SET_VRING_NUM: | 
 | 647 | 		/* Resizing ring with an active backend? | 
 | 648 | 		 * You don't want to do that. */ | 
 | 649 | 		if (vq->private_data) { | 
 | 650 | 			r = -EBUSY; | 
 | 651 | 			break; | 
 | 652 | 		} | 
| Takuya Yoshikawa | 7ad9c9d | 2010-05-27 18:58:03 +0900 | [diff] [blame] | 653 | 		if (copy_from_user(&s, argp, sizeof s)) { | 
 | 654 | 			r = -EFAULT; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 655 | 			break; | 
| Takuya Yoshikawa | 7ad9c9d | 2010-05-27 18:58:03 +0900 | [diff] [blame] | 656 | 		} | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 657 | 		if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) { | 
 | 658 | 			r = -EINVAL; | 
 | 659 | 			break; | 
 | 660 | 		} | 
 | 661 | 		vq->num = s.num; | 
 | 662 | 		break; | 
 | 663 | 	case VHOST_SET_VRING_BASE: | 
 | 664 | 		/* Moving base with an active backend? | 
 | 665 | 		 * You don't want to do that. */ | 
 | 666 | 		if (vq->private_data) { | 
 | 667 | 			r = -EBUSY; | 
 | 668 | 			break; | 
 | 669 | 		} | 
| Takuya Yoshikawa | 7ad9c9d | 2010-05-27 18:58:03 +0900 | [diff] [blame] | 670 | 		if (copy_from_user(&s, argp, sizeof s)) { | 
 | 671 | 			r = -EFAULT; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 672 | 			break; | 
| Takuya Yoshikawa | 7ad9c9d | 2010-05-27 18:58:03 +0900 | [diff] [blame] | 673 | 		} | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 674 | 		if (s.num > 0xffff) { | 
 | 675 | 			r = -EINVAL; | 
 | 676 | 			break; | 
 | 677 | 		} | 
 | 678 | 		vq->last_avail_idx = s.num; | 
 | 679 | 		/* Forget the cached index value. */ | 
 | 680 | 		vq->avail_idx = vq->last_avail_idx; | 
 | 681 | 		break; | 
 | 682 | 	case VHOST_GET_VRING_BASE: | 
 | 683 | 		s.index = idx; | 
 | 684 | 		s.num = vq->last_avail_idx; | 
| Takuya Yoshikawa | 7ad9c9d | 2010-05-27 18:58:03 +0900 | [diff] [blame] | 685 | 		if (copy_to_user(argp, &s, sizeof s)) | 
 | 686 | 			r = -EFAULT; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 687 | 		break; | 
 | 688 | 	case VHOST_SET_VRING_ADDR: | 
| Takuya Yoshikawa | 7ad9c9d | 2010-05-27 18:58:03 +0900 | [diff] [blame] | 689 | 		if (copy_from_user(&a, argp, sizeof a)) { | 
 | 690 | 			r = -EFAULT; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 691 | 			break; | 
| Takuya Yoshikawa | 7ad9c9d | 2010-05-27 18:58:03 +0900 | [diff] [blame] | 692 | 		} | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 693 | 		if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) { | 
 | 694 | 			r = -EOPNOTSUPP; | 
 | 695 | 			break; | 
 | 696 | 		} | 
 | 697 | 		/* For 32bit, verify that the top 32bits of the user | 
 | 698 | 		   data are set to zero. */ | 
 | 699 | 		if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr || | 
 | 700 | 		    (u64)(unsigned long)a.used_user_addr != a.used_user_addr || | 
 | 701 | 		    (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) { | 
 | 702 | 			r = -EFAULT; | 
 | 703 | 			break; | 
 | 704 | 		} | 
 | 705 | 		if ((a.avail_user_addr & (sizeof *vq->avail->ring - 1)) || | 
 | 706 | 		    (a.used_user_addr & (sizeof *vq->used->ring - 1)) || | 
 | 707 | 		    (a.log_guest_addr & (sizeof *vq->used->ring - 1))) { | 
 | 708 | 			r = -EINVAL; | 
 | 709 | 			break; | 
 | 710 | 		} | 
 | 711 |  | 
 | 712 | 		/* We only verify access here if backend is configured. | 
 | 713 | 		 * If it is not, we don't as size might not have been setup. | 
 | 714 | 		 * We will verify when backend is configured. */ | 
 | 715 | 		if (vq->private_data) { | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 716 | 			if (!vq_access_ok(d, vq->num, | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 717 | 				(void __user *)(unsigned long)a.desc_user_addr, | 
 | 718 | 				(void __user *)(unsigned long)a.avail_user_addr, | 
 | 719 | 				(void __user *)(unsigned long)a.used_user_addr)) { | 
 | 720 | 				r = -EINVAL; | 
 | 721 | 				break; | 
 | 722 | 			} | 
 | 723 |  | 
 | 724 | 			/* Also validate log access for used ring if enabled. */ | 
 | 725 | 			if ((a.flags & (0x1 << VHOST_VRING_F_LOG)) && | 
 | 726 | 			    !log_access_ok(vq->log_base, a.log_guest_addr, | 
 | 727 | 					   sizeof *vq->used + | 
 | 728 | 					   vq->num * sizeof *vq->used->ring)) { | 
 | 729 | 				r = -EINVAL; | 
 | 730 | 				break; | 
 | 731 | 			} | 
 | 732 | 		} | 
 | 733 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 734 | 		vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG)); | 
 | 735 | 		vq->desc = (void __user *)(unsigned long)a.desc_user_addr; | 
 | 736 | 		vq->avail = (void __user *)(unsigned long)a.avail_user_addr; | 
 | 737 | 		vq->log_addr = a.log_guest_addr; | 
 | 738 | 		vq->used = (void __user *)(unsigned long)a.used_user_addr; | 
 | 739 | 		break; | 
 | 740 | 	case VHOST_SET_VRING_KICK: | 
| Takuya Yoshikawa | 7ad9c9d | 2010-05-27 18:58:03 +0900 | [diff] [blame] | 741 | 		if (copy_from_user(&f, argp, sizeof f)) { | 
 | 742 | 			r = -EFAULT; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 743 | 			break; | 
| Takuya Yoshikawa | 7ad9c9d | 2010-05-27 18:58:03 +0900 | [diff] [blame] | 744 | 		} | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 745 | 		eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); | 
| Michael S. Tsirkin | 535297a | 2010-03-17 16:06:11 +0200 | [diff] [blame] | 746 | 		if (IS_ERR(eventfp)) { | 
 | 747 | 			r = PTR_ERR(eventfp); | 
 | 748 | 			break; | 
 | 749 | 		} | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 750 | 		if (eventfp != vq->kick) { | 
| Al Viro | cecb46f | 2012-08-27 14:21:39 -0400 | [diff] [blame] | 751 | 			pollstop = (filep = vq->kick) != NULL; | 
 | 752 | 			pollstart = (vq->kick = eventfp) != NULL; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 753 | 		} else | 
 | 754 | 			filep = eventfp; | 
 | 755 | 		break; | 
 | 756 | 	case VHOST_SET_VRING_CALL: | 
| Takuya Yoshikawa | 7ad9c9d | 2010-05-27 18:58:03 +0900 | [diff] [blame] | 757 | 		if (copy_from_user(&f, argp, sizeof f)) { | 
 | 758 | 			r = -EFAULT; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 759 | 			break; | 
| Takuya Yoshikawa | 7ad9c9d | 2010-05-27 18:58:03 +0900 | [diff] [blame] | 760 | 		} | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 761 | 		eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); | 
| Michael S. Tsirkin | 535297a | 2010-03-17 16:06:11 +0200 | [diff] [blame] | 762 | 		if (IS_ERR(eventfp)) { | 
 | 763 | 			r = PTR_ERR(eventfp); | 
 | 764 | 			break; | 
 | 765 | 		} | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 766 | 		if (eventfp != vq->call) { | 
 | 767 | 			filep = vq->call; | 
 | 768 | 			ctx = vq->call_ctx; | 
 | 769 | 			vq->call = eventfp; | 
 | 770 | 			vq->call_ctx = eventfp ? | 
 | 771 | 				eventfd_ctx_fileget(eventfp) : NULL; | 
 | 772 | 		} else | 
 | 773 | 			filep = eventfp; | 
 | 774 | 		break; | 
 | 775 | 	case VHOST_SET_VRING_ERR: | 
| Takuya Yoshikawa | 7ad9c9d | 2010-05-27 18:58:03 +0900 | [diff] [blame] | 776 | 		if (copy_from_user(&f, argp, sizeof f)) { | 
 | 777 | 			r = -EFAULT; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 778 | 			break; | 
| Takuya Yoshikawa | 7ad9c9d | 2010-05-27 18:58:03 +0900 | [diff] [blame] | 779 | 		} | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 780 | 		eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); | 
| Michael S. Tsirkin | 535297a | 2010-03-17 16:06:11 +0200 | [diff] [blame] | 781 | 		if (IS_ERR(eventfp)) { | 
 | 782 | 			r = PTR_ERR(eventfp); | 
 | 783 | 			break; | 
 | 784 | 		} | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 785 | 		if (eventfp != vq->error) { | 
 | 786 | 			filep = vq->error; | 
 | 787 | 			vq->error = eventfp; | 
 | 788 | 			ctx = vq->error_ctx; | 
 | 789 | 			vq->error_ctx = eventfp ? | 
 | 790 | 				eventfd_ctx_fileget(eventfp) : NULL; | 
 | 791 | 		} else | 
 | 792 | 			filep = eventfp; | 
 | 793 | 		break; | 
 | 794 | 	default: | 
 | 795 | 		r = -ENOIOCTLCMD; | 
 | 796 | 	} | 
 | 797 |  | 
 | 798 | 	if (pollstop && vq->handle_kick) | 
 | 799 | 		vhost_poll_stop(&vq->poll); | 
 | 800 |  | 
 | 801 | 	if (ctx) | 
 | 802 | 		eventfd_ctx_put(ctx); | 
 | 803 | 	if (filep) | 
 | 804 | 		fput(filep); | 
 | 805 |  | 
 | 806 | 	if (pollstart && vq->handle_kick) | 
| Jason Wang | 2b8b328 | 2013-01-28 01:05:18 +0000 | [diff] [blame] | 807 | 		r = vhost_poll_start(&vq->poll, vq->kick); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 808 |  | 
 | 809 | 	mutex_unlock(&vq->mutex); | 
 | 810 |  | 
 | 811 | 	if (pollstop && vq->handle_kick) | 
 | 812 | 		vhost_poll_flush(&vq->poll); | 
 | 813 | 	return r; | 
 | 814 | } | 
 | 815 |  | 
 | 816 | /* Caller must have device mutex */ | 
| Michael S. Tsirkin | 935cdee | 2012-12-06 14:03:34 +0200 | [diff] [blame] | 817 | long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 818 | { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 819 | 	struct file *eventfp, *filep = NULL; | 
 | 820 | 	struct eventfd_ctx *ctx = NULL; | 
 | 821 | 	u64 p; | 
 | 822 | 	long r; | 
 | 823 | 	int i, fd; | 
 | 824 |  | 
 | 825 | 	/* If you are not the owner, you can become one */ | 
 | 826 | 	if (ioctl == VHOST_SET_OWNER) { | 
 | 827 | 		r = vhost_dev_set_owner(d); | 
 | 828 | 		goto done; | 
 | 829 | 	} | 
 | 830 |  | 
 | 831 | 	/* You must be the owner to do anything else */ | 
 | 832 | 	r = vhost_dev_check_owner(d); | 
 | 833 | 	if (r) | 
 | 834 | 		goto done; | 
 | 835 |  | 
 | 836 | 	switch (ioctl) { | 
 | 837 | 	case VHOST_SET_MEM_TABLE: | 
 | 838 | 		r = vhost_set_memory(d, argp); | 
 | 839 | 		break; | 
 | 840 | 	case VHOST_SET_LOG_BASE: | 
| Takuya Yoshikawa | 7ad9c9d | 2010-05-27 18:58:03 +0900 | [diff] [blame] | 841 | 		if (copy_from_user(&p, argp, sizeof p)) { | 
 | 842 | 			r = -EFAULT; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 843 | 			break; | 
| Takuya Yoshikawa | 7ad9c9d | 2010-05-27 18:58:03 +0900 | [diff] [blame] | 844 | 		} | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 845 | 		if ((u64)(unsigned long)p != p) { | 
 | 846 | 			r = -EFAULT; | 
 | 847 | 			break; | 
 | 848 | 		} | 
 | 849 | 		for (i = 0; i < d->nvqs; ++i) { | 
 | 850 | 			struct vhost_virtqueue *vq; | 
 | 851 | 			void __user *base = (void __user *)(unsigned long)p; | 
 | 852 | 			vq = d->vqs + i; | 
 | 853 | 			mutex_lock(&vq->mutex); | 
 | 854 | 			/* If ring is inactive, will check when it's enabled. */ | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 855 | 			if (vq->private_data && !vq_log_access_ok(d, vq, base)) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 856 | 				r = -EFAULT; | 
 | 857 | 			else | 
 | 858 | 				vq->log_base = base; | 
 | 859 | 			mutex_unlock(&vq->mutex); | 
 | 860 | 		} | 
 | 861 | 		break; | 
 | 862 | 	case VHOST_SET_LOG_FD: | 
 | 863 | 		r = get_user(fd, (int __user *)argp); | 
 | 864 | 		if (r < 0) | 
 | 865 | 			break; | 
 | 866 | 		eventfp = fd == -1 ? NULL : eventfd_fget(fd); | 
 | 867 | 		if (IS_ERR(eventfp)) { | 
 | 868 | 			r = PTR_ERR(eventfp); | 
 | 869 | 			break; | 
 | 870 | 		} | 
 | 871 | 		if (eventfp != d->log_file) { | 
 | 872 | 			filep = d->log_file; | 
 | 873 | 			ctx = d->log_ctx; | 
 | 874 | 			d->log_ctx = eventfp ? | 
 | 875 | 				eventfd_ctx_fileget(eventfp) : NULL; | 
 | 876 | 		} else | 
 | 877 | 			filep = eventfp; | 
 | 878 | 		for (i = 0; i < d->nvqs; ++i) { | 
 | 879 | 			mutex_lock(&d->vqs[i].mutex); | 
 | 880 | 			d->vqs[i].log_ctx = d->log_ctx; | 
 | 881 | 			mutex_unlock(&d->vqs[i].mutex); | 
 | 882 | 		} | 
 | 883 | 		if (ctx) | 
 | 884 | 			eventfd_ctx_put(ctx); | 
 | 885 | 		if (filep) | 
 | 886 | 			fput(filep); | 
 | 887 | 		break; | 
 | 888 | 	default: | 
| Michael S. Tsirkin | 935cdee | 2012-12-06 14:03:34 +0200 | [diff] [blame] | 889 | 		r = -ENOIOCTLCMD; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 890 | 		break; | 
 | 891 | 	} | 
 | 892 | done: | 
 | 893 | 	return r; | 
 | 894 | } | 
 | 895 |  | 
 | 896 | static const struct vhost_memory_region *find_region(struct vhost_memory *mem, | 
 | 897 | 						     __u64 addr, __u32 len) | 
 | 898 | { | 
 | 899 | 	struct vhost_memory_region *reg; | 
 | 900 | 	int i; | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 901 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 902 | 	/* linear search is not brilliant, but we really have on the order of 6 | 
 | 903 | 	 * regions in practice */ | 
 | 904 | 	for (i = 0; i < mem->nregions; ++i) { | 
 | 905 | 		reg = mem->regions + i; | 
 | 906 | 		if (reg->guest_phys_addr <= addr && | 
 | 907 | 		    reg->guest_phys_addr + reg->memory_size - 1 >= addr) | 
 | 908 | 			return reg; | 
 | 909 | 	} | 
 | 910 | 	return NULL; | 
 | 911 | } | 
 | 912 |  | 
 | 913 | /* TODO: This is really inefficient.  We need something like get_user() | 
 | 914 |  * (instruction directly accesses the data, with an exception table entry | 
 | 915 |  * returning -EFAULT). See Documentation/x86/exception-tables.txt. | 
 | 916 |  */ | 
 | 917 | static int set_bit_to_user(int nr, void __user *addr) | 
 | 918 | { | 
 | 919 | 	unsigned long log = (unsigned long)addr; | 
 | 920 | 	struct page *page; | 
 | 921 | 	void *base; | 
 | 922 | 	int bit = nr + (log % PAGE_SIZE) * 8; | 
 | 923 | 	int r; | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 924 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 925 | 	r = get_user_pages_fast(log, 1, 1, &page); | 
| Michael S. Tsirkin | d6db3f5 | 2010-02-23 11:25:23 +0200 | [diff] [blame] | 926 | 	if (r < 0) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 927 | 		return r; | 
| Michael S. Tsirkin | d6db3f5 | 2010-02-23 11:25:23 +0200 | [diff] [blame] | 928 | 	BUG_ON(r != 1); | 
| Cong Wang | c6daa7f | 2011-11-25 23:14:26 +0800 | [diff] [blame] | 929 | 	base = kmap_atomic(page); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 930 | 	set_bit(bit, base); | 
| Cong Wang | c6daa7f | 2011-11-25 23:14:26 +0800 | [diff] [blame] | 931 | 	kunmap_atomic(base); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 932 | 	set_page_dirty_lock(page); | 
 | 933 | 	put_page(page); | 
 | 934 | 	return 0; | 
 | 935 | } | 
 | 936 |  | 
 | 937 | static int log_write(void __user *log_base, | 
 | 938 | 		     u64 write_address, u64 write_length) | 
 | 939 | { | 
| Michael S. Tsirkin | 28831ee | 2010-11-29 10:22:10 +0200 | [diff] [blame] | 940 | 	u64 write_page = write_address / VHOST_PAGE_SIZE; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 941 | 	int r; | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 942 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 943 | 	if (!write_length) | 
 | 944 | 		return 0; | 
| Michael S. Tsirkin | 3bf9be4 | 2010-11-29 10:19:07 +0200 | [diff] [blame] | 945 | 	write_length += write_address % VHOST_PAGE_SIZE; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 946 | 	for (;;) { | 
 | 947 | 		u64 base = (u64)(unsigned long)log_base; | 
| Michael S. Tsirkin | 28831ee | 2010-11-29 10:22:10 +0200 | [diff] [blame] | 948 | 		u64 log = base + write_page / 8; | 
 | 949 | 		int bit = write_page % 8; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 950 | 		if ((u64)(unsigned long)log != log) | 
 | 951 | 			return -EFAULT; | 
 | 952 | 		r = set_bit_to_user(bit, (void __user *)(unsigned long)log); | 
 | 953 | 		if (r < 0) | 
 | 954 | 			return r; | 
 | 955 | 		if (write_length <= VHOST_PAGE_SIZE) | 
 | 956 | 			break; | 
 | 957 | 		write_length -= VHOST_PAGE_SIZE; | 
| Michael S. Tsirkin | 28831ee | 2010-11-29 10:22:10 +0200 | [diff] [blame] | 958 | 		write_page += 1; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 959 | 	} | 
 | 960 | 	return r; | 
 | 961 | } | 
 | 962 |  | 
 | 963 | int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, | 
 | 964 | 		    unsigned int log_num, u64 len) | 
 | 965 | { | 
 | 966 | 	int i, r; | 
 | 967 |  | 
 | 968 | 	/* Make sure data written is seen before log. */ | 
| Michael S. Tsirkin | 5659338 | 2010-02-01 07:21:02 +0000 | [diff] [blame] | 969 | 	smp_wmb(); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 970 | 	for (i = 0; i < log_num; ++i) { | 
 | 971 | 		u64 l = min(log[i].len, len); | 
 | 972 | 		r = log_write(vq->log_base, log[i].addr, l); | 
 | 973 | 		if (r < 0) | 
 | 974 | 			return r; | 
 | 975 | 		len -= l; | 
| Michael S. Tsirkin | 5786aee | 2010-09-22 12:31:53 +0200 | [diff] [blame] | 976 | 		if (!len) { | 
 | 977 | 			if (vq->log_ctx) | 
 | 978 | 				eventfd_signal(vq->log_ctx, 1); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 979 | 			return 0; | 
| Michael S. Tsirkin | 5786aee | 2010-09-22 12:31:53 +0200 | [diff] [blame] | 980 | 		} | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 981 | 	} | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 982 | 	/* Length written exceeds what we have stored. This is a bug. */ | 
 | 983 | 	BUG(); | 
 | 984 | 	return 0; | 
 | 985 | } | 
 | 986 |  | 
| Jason Wang | 2723fea | 2011-06-21 18:04:38 +0800 | [diff] [blame] | 987 | static int vhost_update_used_flags(struct vhost_virtqueue *vq) | 
 | 988 | { | 
 | 989 | 	void __user *used; | 
| Michael S. Tsirkin | b834226 | 2011-07-19 17:15:43 +0300 | [diff] [blame] | 990 | 	if (__put_user(vq->used_flags, &vq->used->flags) < 0) | 
| Jason Wang | 2723fea | 2011-06-21 18:04:38 +0800 | [diff] [blame] | 991 | 		return -EFAULT; | 
 | 992 | 	if (unlikely(vq->log_used)) { | 
 | 993 | 		/* Make sure the flag is seen before log. */ | 
 | 994 | 		smp_wmb(); | 
 | 995 | 		/* Log used flag write. */ | 
 | 996 | 		used = &vq->used->flags; | 
 | 997 | 		log_write(vq->log_base, vq->log_addr + | 
 | 998 | 			  (used - (void __user *)vq->used), | 
 | 999 | 			  sizeof vq->used->flags); | 
 | 1000 | 		if (vq->log_ctx) | 
 | 1001 | 			eventfd_signal(vq->log_ctx, 1); | 
 | 1002 | 	} | 
 | 1003 | 	return 0; | 
 | 1004 | } | 
 | 1005 |  | 
 | 1006 | static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) | 
 | 1007 | { | 
| Michael S. Tsirkin | b834226 | 2011-07-19 17:15:43 +0300 | [diff] [blame] | 1008 | 	if (__put_user(vq->avail_idx, vhost_avail_event(vq))) | 
| Jason Wang | 2723fea | 2011-06-21 18:04:38 +0800 | [diff] [blame] | 1009 | 		return -EFAULT; | 
 | 1010 | 	if (unlikely(vq->log_used)) { | 
 | 1011 | 		void __user *used; | 
 | 1012 | 		/* Make sure the event is seen before log. */ | 
 | 1013 | 		smp_wmb(); | 
 | 1014 | 		/* Log avail event write */ | 
 | 1015 | 		used = vhost_avail_event(vq); | 
 | 1016 | 		log_write(vq->log_base, vq->log_addr + | 
 | 1017 | 			  (used - (void __user *)vq->used), | 
 | 1018 | 			  sizeof *vhost_avail_event(vq)); | 
 | 1019 | 		if (vq->log_ctx) | 
 | 1020 | 			eventfd_signal(vq->log_ctx, 1); | 
 | 1021 | 	} | 
 | 1022 | 	return 0; | 
 | 1023 | } | 
 | 1024 |  | 
 | 1025 | int vhost_init_used(struct vhost_virtqueue *vq) | 
 | 1026 | { | 
 | 1027 | 	int r; | 
 | 1028 | 	if (!vq->private_data) | 
 | 1029 | 		return 0; | 
 | 1030 |  | 
 | 1031 | 	r = vhost_update_used_flags(vq); | 
 | 1032 | 	if (r) | 
 | 1033 | 		return r; | 
 | 1034 | 	vq->signalled_used_valid = false; | 
 | 1035 | 	return get_user(vq->last_used_idx, &vq->used->idx); | 
 | 1036 | } | 
 | 1037 |  | 
| Christoph Hellwig | a8d3782 | 2010-04-13 14:11:25 -0400 | [diff] [blame] | 1038 | static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len, | 
 | 1039 | 			  struct iovec iov[], int iov_size) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1040 | { | 
 | 1041 | 	const struct vhost_memory_region *reg; | 
 | 1042 | 	struct vhost_memory *mem; | 
 | 1043 | 	struct iovec *_iov; | 
 | 1044 | 	u64 s = 0; | 
 | 1045 | 	int ret = 0; | 
 | 1046 |  | 
 | 1047 | 	rcu_read_lock(); | 
 | 1048 |  | 
 | 1049 | 	mem = rcu_dereference(dev->memory); | 
 | 1050 | 	while ((u64)len > s) { | 
 | 1051 | 		u64 size; | 
| Michael S. Tsirkin | 7b3384f | 2010-07-01 18:40:12 +0300 | [diff] [blame] | 1052 | 		if (unlikely(ret >= iov_size)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1053 | 			ret = -ENOBUFS; | 
 | 1054 | 			break; | 
 | 1055 | 		} | 
 | 1056 | 		reg = find_region(mem, addr, len); | 
| Michael S. Tsirkin | 7b3384f | 2010-07-01 18:40:12 +0300 | [diff] [blame] | 1057 | 		if (unlikely(!reg)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1058 | 			ret = -EFAULT; | 
 | 1059 | 			break; | 
 | 1060 | 		} | 
 | 1061 | 		_iov = iov + ret; | 
 | 1062 | 		size = reg->memory_size - addr + reg->guest_phys_addr; | 
| Michael S. Tsirkin | bd97120 | 2012-11-26 05:57:27 +0000 | [diff] [blame] | 1063 | 		_iov->iov_len = min((u64)len - s, size); | 
| Christoph Hellwig | a8d3782 | 2010-04-13 14:11:25 -0400 | [diff] [blame] | 1064 | 		_iov->iov_base = (void __user *)(unsigned long) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1065 | 			(reg->userspace_addr + addr - reg->guest_phys_addr); | 
 | 1066 | 		s += size; | 
 | 1067 | 		addr += size; | 
 | 1068 | 		++ret; | 
 | 1069 | 	} | 
 | 1070 |  | 
 | 1071 | 	rcu_read_unlock(); | 
 | 1072 | 	return ret; | 
 | 1073 | } | 
 | 1074 |  | 
 | 1075 | /* Each buffer in the virtqueues is actually a chain of descriptors.  This | 
 | 1076 |  * function returns the next descriptor in the chain, | 
 | 1077 |  * or -1U if we're at the end. */ | 
 | 1078 | static unsigned next_desc(struct vring_desc *desc) | 
 | 1079 | { | 
 | 1080 | 	unsigned int next; | 
 | 1081 |  | 
 | 1082 | 	/* If this descriptor says it doesn't chain, we're done. */ | 
 | 1083 | 	if (!(desc->flags & VRING_DESC_F_NEXT)) | 
 | 1084 | 		return -1U; | 
 | 1085 |  | 
 | 1086 | 	/* Check they're not leading us off end of descriptors. */ | 
 | 1087 | 	next = desc->next; | 
 | 1088 | 	/* Make sure compiler knows to grab that: we don't want it changing! */ | 
 | 1089 | 	/* We will use the result as an index in an array, so most | 
 | 1090 | 	 * architectures only need a compiler barrier here. */ | 
 | 1091 | 	read_barrier_depends(); | 
 | 1092 |  | 
 | 1093 | 	return next; | 
 | 1094 | } | 
 | 1095 |  | 
| Michael S. Tsirkin | 7b3384f | 2010-07-01 18:40:12 +0300 | [diff] [blame] | 1096 | static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, | 
 | 1097 | 			struct iovec iov[], unsigned int iov_size, | 
 | 1098 | 			unsigned int *out_num, unsigned int *in_num, | 
 | 1099 | 			struct vhost_log *log, unsigned int *log_num, | 
 | 1100 | 			struct vring_desc *indirect) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1101 | { | 
 | 1102 | 	struct vring_desc desc; | 
 | 1103 | 	unsigned int i = 0, count, found = 0; | 
 | 1104 | 	int ret; | 
 | 1105 |  | 
 | 1106 | 	/* Sanity check */ | 
| Michael S. Tsirkin | 7b3384f | 2010-07-01 18:40:12 +0300 | [diff] [blame] | 1107 | 	if (unlikely(indirect->len % sizeof desc)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1108 | 		vq_err(vq, "Invalid length in indirect descriptor: " | 
 | 1109 | 		       "len 0x%llx not multiple of 0x%zx\n", | 
 | 1110 | 		       (unsigned long long)indirect->len, | 
 | 1111 | 		       sizeof desc); | 
 | 1112 | 		return -EINVAL; | 
 | 1113 | 	} | 
 | 1114 |  | 
 | 1115 | 	ret = translate_desc(dev, indirect->addr, indirect->len, vq->indirect, | 
| Jason Wang | e0e9b40 | 2010-09-14 23:53:05 +0800 | [diff] [blame] | 1116 | 			     UIO_MAXIOV); | 
| Michael S. Tsirkin | 7b3384f | 2010-07-01 18:40:12 +0300 | [diff] [blame] | 1117 | 	if (unlikely(ret < 0)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1118 | 		vq_err(vq, "Translation failure %d in indirect.\n", ret); | 
 | 1119 | 		return ret; | 
 | 1120 | 	} | 
 | 1121 |  | 
 | 1122 | 	/* We will use the result as an address to read from, so most | 
 | 1123 | 	 * architectures only need a compiler barrier here. */ | 
 | 1124 | 	read_barrier_depends(); | 
 | 1125 |  | 
 | 1126 | 	count = indirect->len / sizeof desc; | 
 | 1127 | 	/* Buffers are chained via a 16 bit next field, so | 
 | 1128 | 	 * we can have at most 2^16 of these. */ | 
| Michael S. Tsirkin | 7b3384f | 2010-07-01 18:40:12 +0300 | [diff] [blame] | 1129 | 	if (unlikely(count > USHRT_MAX + 1)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1130 | 		vq_err(vq, "Indirect buffer length too big: %d\n", | 
 | 1131 | 		       indirect->len); | 
 | 1132 | 		return -E2BIG; | 
 | 1133 | 	} | 
 | 1134 |  | 
 | 1135 | 	do { | 
 | 1136 | 		unsigned iov_count = *in_num + *out_num; | 
| Michael S. Tsirkin | 7b3384f | 2010-07-01 18:40:12 +0300 | [diff] [blame] | 1137 | 		if (unlikely(++found > count)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1138 | 			vq_err(vq, "Loop detected: last one at %u " | 
 | 1139 | 			       "indirect size %u\n", | 
 | 1140 | 			       i, count); | 
 | 1141 | 			return -EINVAL; | 
 | 1142 | 		} | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 1143 | 		if (unlikely(memcpy_fromiovec((unsigned char *)&desc, | 
 | 1144 | 					      vq->indirect, sizeof desc))) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1145 | 			vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", | 
 | 1146 | 			       i, (size_t)indirect->addr + i * sizeof desc); | 
 | 1147 | 			return -EINVAL; | 
 | 1148 | 		} | 
| Michael S. Tsirkin | 7b3384f | 2010-07-01 18:40:12 +0300 | [diff] [blame] | 1149 | 		if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1150 | 			vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n", | 
 | 1151 | 			       i, (size_t)indirect->addr + i * sizeof desc); | 
 | 1152 | 			return -EINVAL; | 
 | 1153 | 		} | 
 | 1154 |  | 
 | 1155 | 		ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count, | 
 | 1156 | 				     iov_size - iov_count); | 
| Michael S. Tsirkin | 7b3384f | 2010-07-01 18:40:12 +0300 | [diff] [blame] | 1157 | 		if (unlikely(ret < 0)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1158 | 			vq_err(vq, "Translation failure %d indirect idx %d\n", | 
 | 1159 | 			       ret, i); | 
 | 1160 | 			return ret; | 
 | 1161 | 		} | 
 | 1162 | 		/* If this is an input descriptor, increment that count. */ | 
 | 1163 | 		if (desc.flags & VRING_DESC_F_WRITE) { | 
 | 1164 | 			*in_num += ret; | 
 | 1165 | 			if (unlikely(log)) { | 
 | 1166 | 				log[*log_num].addr = desc.addr; | 
 | 1167 | 				log[*log_num].len = desc.len; | 
 | 1168 | 				++*log_num; | 
 | 1169 | 			} | 
 | 1170 | 		} else { | 
 | 1171 | 			/* If it's an output descriptor, they're all supposed | 
 | 1172 | 			 * to come before any input descriptors. */ | 
| Michael S. Tsirkin | 7b3384f | 2010-07-01 18:40:12 +0300 | [diff] [blame] | 1173 | 			if (unlikely(*in_num)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1174 | 				vq_err(vq, "Indirect descriptor " | 
 | 1175 | 				       "has out after in: idx %d\n", i); | 
 | 1176 | 				return -EINVAL; | 
 | 1177 | 			} | 
 | 1178 | 			*out_num += ret; | 
 | 1179 | 		} | 
 | 1180 | 	} while ((i = next_desc(&desc)) != -1); | 
 | 1181 | 	return 0; | 
 | 1182 | } | 
 | 1183 |  | 
 | 1184 | /* This looks in the virtqueue and for the first available buffer, and converts | 
 | 1185 |  * it to an iovec for convenient access.  Since descriptors consist of some | 
 | 1186 |  * number of output then some number of input descriptors, it's actually two | 
 | 1187 |  * iovecs, but we pack them into one and note how many of each there were. | 
 | 1188 |  * | 
| Michael S. Tsirkin | d5675bd | 2010-06-24 16:59:59 +0300 | [diff] [blame] | 1189 |  * This function returns the descriptor number found, or vq->num (which is | 
 | 1190 |  * never a valid descriptor number) if none was found.  A negative code is | 
 | 1191 |  * returned on error. */ | 
 | 1192 | int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, | 
 | 1193 | 		      struct iovec iov[], unsigned int iov_size, | 
 | 1194 | 		      unsigned int *out_num, unsigned int *in_num, | 
 | 1195 | 		      struct vhost_log *log, unsigned int *log_num) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1196 | { | 
 | 1197 | 	struct vring_desc desc; | 
 | 1198 | 	unsigned int i, head, found = 0; | 
 | 1199 | 	u16 last_avail_idx; | 
 | 1200 | 	int ret; | 
 | 1201 |  | 
 | 1202 | 	/* Check it isn't doing very strange things with descriptor numbers. */ | 
 | 1203 | 	last_avail_idx = vq->last_avail_idx; | 
| Michael S. Tsirkin | 8b7347a | 2010-09-19 15:56:30 +0200 | [diff] [blame] | 1204 | 	if (unlikely(__get_user(vq->avail_idx, &vq->avail->idx))) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1205 | 		vq_err(vq, "Failed to access avail idx at %p\n", | 
 | 1206 | 		       &vq->avail->idx); | 
| Michael S. Tsirkin | d5675bd | 2010-06-24 16:59:59 +0300 | [diff] [blame] | 1207 | 		return -EFAULT; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1208 | 	} | 
 | 1209 |  | 
| Michael S. Tsirkin | 7b3384f | 2010-07-01 18:40:12 +0300 | [diff] [blame] | 1210 | 	if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1211 | 		vq_err(vq, "Guest moved used index from %u to %u", | 
 | 1212 | 		       last_avail_idx, vq->avail_idx); | 
| Michael S. Tsirkin | d5675bd | 2010-06-24 16:59:59 +0300 | [diff] [blame] | 1213 | 		return -EFAULT; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1214 | 	} | 
 | 1215 |  | 
 | 1216 | 	/* If there's nothing new since last we looked, return invalid. */ | 
 | 1217 | 	if (vq->avail_idx == last_avail_idx) | 
 | 1218 | 		return vq->num; | 
 | 1219 |  | 
 | 1220 | 	/* Only get avail ring entries after they have been exposed by guest. */ | 
| Michael S. Tsirkin | 5659338 | 2010-02-01 07:21:02 +0000 | [diff] [blame] | 1221 | 	smp_rmb(); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1222 |  | 
 | 1223 | 	/* Grab the next descriptor number they're advertising, and increment | 
 | 1224 | 	 * the index we've seen. */ | 
| Michael S. Tsirkin | 8b7347a | 2010-09-19 15:56:30 +0200 | [diff] [blame] | 1225 | 	if (unlikely(__get_user(head, | 
 | 1226 | 				&vq->avail->ring[last_avail_idx % vq->num]))) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1227 | 		vq_err(vq, "Failed to read head: idx %d address %p\n", | 
 | 1228 | 		       last_avail_idx, | 
 | 1229 | 		       &vq->avail->ring[last_avail_idx % vq->num]); | 
| Michael S. Tsirkin | d5675bd | 2010-06-24 16:59:59 +0300 | [diff] [blame] | 1230 | 		return -EFAULT; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1231 | 	} | 
 | 1232 |  | 
 | 1233 | 	/* If their number is silly, that's an error. */ | 
| Michael S. Tsirkin | 7b3384f | 2010-07-01 18:40:12 +0300 | [diff] [blame] | 1234 | 	if (unlikely(head >= vq->num)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1235 | 		vq_err(vq, "Guest says index %u > %u is available", | 
 | 1236 | 		       head, vq->num); | 
| Michael S. Tsirkin | d5675bd | 2010-06-24 16:59:59 +0300 | [diff] [blame] | 1237 | 		return -EINVAL; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1238 | 	} | 
 | 1239 |  | 
 | 1240 | 	/* When we start there are none of either input nor output. */ | 
 | 1241 | 	*out_num = *in_num = 0; | 
 | 1242 | 	if (unlikely(log)) | 
 | 1243 | 		*log_num = 0; | 
 | 1244 |  | 
 | 1245 | 	i = head; | 
 | 1246 | 	do { | 
 | 1247 | 		unsigned iov_count = *in_num + *out_num; | 
| Michael S. Tsirkin | 7b3384f | 2010-07-01 18:40:12 +0300 | [diff] [blame] | 1248 | 		if (unlikely(i >= vq->num)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1249 | 			vq_err(vq, "Desc index is %u > %u, head = %u", | 
 | 1250 | 			       i, vq->num, head); | 
| Michael S. Tsirkin | d5675bd | 2010-06-24 16:59:59 +0300 | [diff] [blame] | 1251 | 			return -EINVAL; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1252 | 		} | 
| Michael S. Tsirkin | 7b3384f | 2010-07-01 18:40:12 +0300 | [diff] [blame] | 1253 | 		if (unlikely(++found > vq->num)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1254 | 			vq_err(vq, "Loop detected: last one at %u " | 
 | 1255 | 			       "vq size %u head %u\n", | 
 | 1256 | 			       i, vq->num, head); | 
| Michael S. Tsirkin | d5675bd | 2010-06-24 16:59:59 +0300 | [diff] [blame] | 1257 | 			return -EINVAL; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1258 | 		} | 
| Michael S. Tsirkin | fcc042a | 2011-03-06 13:33:49 +0200 | [diff] [blame] | 1259 | 		ret = __copy_from_user(&desc, vq->desc + i, sizeof desc); | 
| Michael S. Tsirkin | 7b3384f | 2010-07-01 18:40:12 +0300 | [diff] [blame] | 1260 | 		if (unlikely(ret)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1261 | 			vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", | 
 | 1262 | 			       i, vq->desc + i); | 
| Michael S. Tsirkin | d5675bd | 2010-06-24 16:59:59 +0300 | [diff] [blame] | 1263 | 			return -EFAULT; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1264 | 		} | 
 | 1265 | 		if (desc.flags & VRING_DESC_F_INDIRECT) { | 
 | 1266 | 			ret = get_indirect(dev, vq, iov, iov_size, | 
 | 1267 | 					   out_num, in_num, | 
 | 1268 | 					   log, log_num, &desc); | 
| Michael S. Tsirkin | 7b3384f | 2010-07-01 18:40:12 +0300 | [diff] [blame] | 1269 | 			if (unlikely(ret < 0)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1270 | 				vq_err(vq, "Failure detected " | 
 | 1271 | 				       "in indirect descriptor at idx %d\n", i); | 
| Michael S. Tsirkin | d5675bd | 2010-06-24 16:59:59 +0300 | [diff] [blame] | 1272 | 				return ret; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1273 | 			} | 
 | 1274 | 			continue; | 
 | 1275 | 		} | 
 | 1276 |  | 
 | 1277 | 		ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count, | 
 | 1278 | 				     iov_size - iov_count); | 
| Michael S. Tsirkin | 7b3384f | 2010-07-01 18:40:12 +0300 | [diff] [blame] | 1279 | 		if (unlikely(ret < 0)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1280 | 			vq_err(vq, "Translation failure %d descriptor idx %d\n", | 
 | 1281 | 			       ret, i); | 
| Michael S. Tsirkin | d5675bd | 2010-06-24 16:59:59 +0300 | [diff] [blame] | 1282 | 			return ret; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1283 | 		} | 
 | 1284 | 		if (desc.flags & VRING_DESC_F_WRITE) { | 
 | 1285 | 			/* If this is an input descriptor, | 
 | 1286 | 			 * increment that count. */ | 
 | 1287 | 			*in_num += ret; | 
 | 1288 | 			if (unlikely(log)) { | 
 | 1289 | 				log[*log_num].addr = desc.addr; | 
 | 1290 | 				log[*log_num].len = desc.len; | 
 | 1291 | 				++*log_num; | 
 | 1292 | 			} | 
 | 1293 | 		} else { | 
 | 1294 | 			/* If it's an output descriptor, they're all supposed | 
 | 1295 | 			 * to come before any input descriptors. */ | 
| Michael S. Tsirkin | 7b3384f | 2010-07-01 18:40:12 +0300 | [diff] [blame] | 1296 | 			if (unlikely(*in_num)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1297 | 				vq_err(vq, "Descriptor has out after in: " | 
 | 1298 | 				       "idx %d\n", i); | 
| Michael S. Tsirkin | d5675bd | 2010-06-24 16:59:59 +0300 | [diff] [blame] | 1299 | 				return -EINVAL; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1300 | 			} | 
 | 1301 | 			*out_num += ret; | 
 | 1302 | 		} | 
 | 1303 | 	} while ((i = next_desc(&desc)) != -1); | 
 | 1304 |  | 
 | 1305 | 	/* On success, increment avail index. */ | 
 | 1306 | 	vq->last_avail_idx++; | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 1307 |  | 
 | 1308 | 	/* Assume notifications from guest are disabled at this point, | 
 | 1309 | 	 * if they aren't we would need to update avail_event index. */ | 
 | 1310 | 	BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY)); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1311 | 	return head; | 
 | 1312 | } | 
 | 1313 |  | 
 | 1314 | /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ | 
| David Stevens | 8dd014a | 2010-07-27 18:52:21 +0300 | [diff] [blame] | 1315 | void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1316 | { | 
| David Stevens | 8dd014a | 2010-07-27 18:52:21 +0300 | [diff] [blame] | 1317 | 	vq->last_avail_idx -= n; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1318 | } | 
 | 1319 |  | 
 | 1320 | /* After we've used one of their buffers, we tell them about it.  We'll then | 
 | 1321 |  * want to notify the guest, using eventfd. */ | 
 | 1322 | int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) | 
 | 1323 | { | 
| Christoph Hellwig | a8d3782 | 2010-04-13 14:11:25 -0400 | [diff] [blame] | 1324 | 	struct vring_used_elem __user *used; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1325 |  | 
 | 1326 | 	/* The virtqueue contains a ring of used buffers.  Get a pointer to the | 
 | 1327 | 	 * next entry in that used ring. */ | 
 | 1328 | 	used = &vq->used->ring[vq->last_used_idx % vq->num]; | 
| Michael S. Tsirkin | 8b7347a | 2010-09-19 15:56:30 +0200 | [diff] [blame] | 1329 | 	if (__put_user(head, &used->id)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1330 | 		vq_err(vq, "Failed to write used id"); | 
 | 1331 | 		return -EFAULT; | 
 | 1332 | 	} | 
| Michael S. Tsirkin | 8b7347a | 2010-09-19 15:56:30 +0200 | [diff] [blame] | 1333 | 	if (__put_user(len, &used->len)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1334 | 		vq_err(vq, "Failed to write used len"); | 
 | 1335 | 		return -EFAULT; | 
 | 1336 | 	} | 
 | 1337 | 	/* Make sure buffer is written before we update index. */ | 
| Michael S. Tsirkin | 5659338 | 2010-02-01 07:21:02 +0000 | [diff] [blame] | 1338 | 	smp_wmb(); | 
| Michael S. Tsirkin | 8b7347a | 2010-09-19 15:56:30 +0200 | [diff] [blame] | 1339 | 	if (__put_user(vq->last_used_idx + 1, &vq->used->idx)) { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1340 | 		vq_err(vq, "Failed to increment used idx"); | 
 | 1341 | 		return -EFAULT; | 
 | 1342 | 	} | 
 | 1343 | 	if (unlikely(vq->log_used)) { | 
 | 1344 | 		/* Make sure data is seen before log. */ | 
| Michael S. Tsirkin | 5659338 | 2010-02-01 07:21:02 +0000 | [diff] [blame] | 1345 | 		smp_wmb(); | 
| Michael S. Tsirkin | 86e9424 | 2010-02-17 19:11:33 +0200 | [diff] [blame] | 1346 | 		/* Log used ring entry write. */ | 
 | 1347 | 		log_write(vq->log_base, | 
| Christoph Hellwig | a8d3782 | 2010-04-13 14:11:25 -0400 | [diff] [blame] | 1348 | 			  vq->log_addr + | 
 | 1349 | 			   ((void __user *)used - (void __user *)vq->used), | 
| Michael S. Tsirkin | 86e9424 | 2010-02-17 19:11:33 +0200 | [diff] [blame] | 1350 | 			  sizeof *used); | 
 | 1351 | 		/* Log used index update. */ | 
 | 1352 | 		log_write(vq->log_base, | 
 | 1353 | 			  vq->log_addr + offsetof(struct vring_used, idx), | 
 | 1354 | 			  sizeof vq->used->idx); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1355 | 		if (vq->log_ctx) | 
 | 1356 | 			eventfd_signal(vq->log_ctx, 1); | 
 | 1357 | 	} | 
 | 1358 | 	vq->last_used_idx++; | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 1359 | 	/* If the driver never bothers to signal in a very long while, | 
 | 1360 | 	 * used index might wrap around. If that happens, invalidate | 
 | 1361 | 	 * signalled_used index we stored. TODO: make sure driver | 
 | 1362 | 	 * signals at least once in 2^16 and remove this. */ | 
 | 1363 | 	if (unlikely(vq->last_used_idx == vq->signalled_used)) | 
 | 1364 | 		vq->signalled_used_valid = false; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1365 | 	return 0; | 
 | 1366 | } | 
 | 1367 |  | 
| David Stevens | 8dd014a | 2010-07-27 18:52:21 +0300 | [diff] [blame] | 1368 | static int __vhost_add_used_n(struct vhost_virtqueue *vq, | 
 | 1369 | 			    struct vring_used_elem *heads, | 
 | 1370 | 			    unsigned count) | 
 | 1371 | { | 
 | 1372 | 	struct vring_used_elem __user *used; | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 1373 | 	u16 old, new; | 
| David Stevens | 8dd014a | 2010-07-27 18:52:21 +0300 | [diff] [blame] | 1374 | 	int start; | 
 | 1375 |  | 
 | 1376 | 	start = vq->last_used_idx % vq->num; | 
 | 1377 | 	used = vq->used->ring + start; | 
| Michael S. Tsirkin | dfe5ac5 | 2010-09-21 14:18:01 +0200 | [diff] [blame] | 1378 | 	if (__copy_to_user(used, heads, count * sizeof *used)) { | 
| David Stevens | 8dd014a | 2010-07-27 18:52:21 +0300 | [diff] [blame] | 1379 | 		vq_err(vq, "Failed to write used"); | 
 | 1380 | 		return -EFAULT; | 
 | 1381 | 	} | 
 | 1382 | 	if (unlikely(vq->log_used)) { | 
 | 1383 | 		/* Make sure data is seen before log. */ | 
 | 1384 | 		smp_wmb(); | 
 | 1385 | 		/* Log used ring entry write. */ | 
 | 1386 | 		log_write(vq->log_base, | 
 | 1387 | 			  vq->log_addr + | 
 | 1388 | 			   ((void __user *)used - (void __user *)vq->used), | 
 | 1389 | 			  count * sizeof *used); | 
 | 1390 | 	} | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 1391 | 	old = vq->last_used_idx; | 
 | 1392 | 	new = (vq->last_used_idx += count); | 
 | 1393 | 	/* If the driver never bothers to signal in a very long while, | 
 | 1394 | 	 * used index might wrap around. If that happens, invalidate | 
 | 1395 | 	 * signalled_used index we stored. TODO: make sure driver | 
 | 1396 | 	 * signals at least once in 2^16 and remove this. */ | 
 | 1397 | 	if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old))) | 
 | 1398 | 		vq->signalled_used_valid = false; | 
| David Stevens | 8dd014a | 2010-07-27 18:52:21 +0300 | [diff] [blame] | 1399 | 	return 0; | 
 | 1400 | } | 
 | 1401 |  | 
 | 1402 | /* After we've used one of their buffers, we tell them about it.  We'll then | 
 | 1403 |  * want to notify the guest, using eventfd. */ | 
 | 1404 | int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, | 
 | 1405 | 		     unsigned count) | 
 | 1406 | { | 
 | 1407 | 	int start, n, r; | 
 | 1408 |  | 
 | 1409 | 	start = vq->last_used_idx % vq->num; | 
 | 1410 | 	n = vq->num - start; | 
 | 1411 | 	if (n < count) { | 
 | 1412 | 		r = __vhost_add_used_n(vq, heads, n); | 
 | 1413 | 		if (r < 0) | 
 | 1414 | 			return r; | 
 | 1415 | 		heads += n; | 
 | 1416 | 		count -= n; | 
 | 1417 | 	} | 
 | 1418 | 	r = __vhost_add_used_n(vq, heads, count); | 
 | 1419 |  | 
 | 1420 | 	/* Make sure buffer is written before we update index. */ | 
 | 1421 | 	smp_wmb(); | 
 | 1422 | 	if (put_user(vq->last_used_idx, &vq->used->idx)) { | 
 | 1423 | 		vq_err(vq, "Failed to increment used idx"); | 
 | 1424 | 		return -EFAULT; | 
 | 1425 | 	} | 
 | 1426 | 	if (unlikely(vq->log_used)) { | 
 | 1427 | 		/* Log used index update. */ | 
 | 1428 | 		log_write(vq->log_base, | 
 | 1429 | 			  vq->log_addr + offsetof(struct vring_used, idx), | 
 | 1430 | 			  sizeof vq->used->idx); | 
 | 1431 | 		if (vq->log_ctx) | 
 | 1432 | 			eventfd_signal(vq->log_ctx, 1); | 
 | 1433 | 	} | 
 | 1434 | 	return r; | 
 | 1435 | } | 
 | 1436 |  | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 1437 | static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1438 | { | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 1439 | 	__u16 old, new, event; | 
 | 1440 | 	bool v; | 
| Michael S. Tsirkin | 0d49935 | 2010-05-11 19:44:17 +0300 | [diff] [blame] | 1441 | 	/* Flush out used index updates. This is paired | 
 | 1442 | 	 * with the barrier that the Guest executes when enabling | 
 | 1443 | 	 * interrupts. */ | 
 | 1444 | 	smp_mb(); | 
 | 1445 |  | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 1446 | 	if (vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) && | 
 | 1447 | 	    unlikely(vq->avail_idx == vq->last_avail_idx)) | 
 | 1448 | 		return true; | 
 | 1449 |  | 
 | 1450 | 	if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { | 
 | 1451 | 		__u16 flags; | 
 | 1452 | 		if (__get_user(flags, &vq->avail->flags)) { | 
 | 1453 | 			vq_err(vq, "Failed to get flags"); | 
 | 1454 | 			return true; | 
 | 1455 | 		} | 
 | 1456 | 		return !(flags & VRING_AVAIL_F_NO_INTERRUPT); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1457 | 	} | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 1458 | 	old = vq->signalled_used; | 
 | 1459 | 	v = vq->signalled_used_valid; | 
 | 1460 | 	new = vq->signalled_used = vq->last_used_idx; | 
 | 1461 | 	vq->signalled_used_valid = true; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1462 |  | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 1463 | 	if (unlikely(!v)) | 
 | 1464 | 		return true; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1465 |  | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 1466 | 	if (get_user(event, vhost_used_event(vq))) { | 
 | 1467 | 		vq_err(vq, "Failed to get used event idx"); | 
 | 1468 | 		return true; | 
 | 1469 | 	} | 
 | 1470 | 	return vring_need_event(event, new, old); | 
 | 1471 | } | 
 | 1472 |  | 
 | 1473 | /* This actually signals the guest, using eventfd. */ | 
 | 1474 | void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) | 
 | 1475 | { | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1476 | 	/* Signal the Guest tell them we used something up. */ | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 1477 | 	if (vq->call_ctx && vhost_notify(dev, vq)) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1478 | 		eventfd_signal(vq->call_ctx, 1); | 
 | 1479 | } | 
 | 1480 |  | 
 | 1481 | /* And here's the combo meal deal.  Supersize me! */ | 
 | 1482 | void vhost_add_used_and_signal(struct vhost_dev *dev, | 
 | 1483 | 			       struct vhost_virtqueue *vq, | 
 | 1484 | 			       unsigned int head, int len) | 
 | 1485 | { | 
 | 1486 | 	vhost_add_used(vq, head, len); | 
 | 1487 | 	vhost_signal(dev, vq); | 
 | 1488 | } | 
 | 1489 |  | 
| David Stevens | 8dd014a | 2010-07-27 18:52:21 +0300 | [diff] [blame] | 1490 | /* multi-buffer version of vhost_add_used_and_signal */ | 
 | 1491 | void vhost_add_used_and_signal_n(struct vhost_dev *dev, | 
 | 1492 | 				 struct vhost_virtqueue *vq, | 
 | 1493 | 				 struct vring_used_elem *heads, unsigned count) | 
 | 1494 | { | 
 | 1495 | 	vhost_add_used_n(vq, heads, count); | 
 | 1496 | 	vhost_signal(dev, vq); | 
 | 1497 | } | 
 | 1498 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1499 | /* OK, now we need to know about added descriptors. */ | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 1500 | bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1501 | { | 
 | 1502 | 	u16 avail_idx; | 
 | 1503 | 	int r; | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 1504 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1505 | 	if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) | 
 | 1506 | 		return false; | 
 | 1507 | 	vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 1508 | 	if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { | 
| Jason Wang | 2723fea | 2011-06-21 18:04:38 +0800 | [diff] [blame] | 1509 | 		r = vhost_update_used_flags(vq); | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 1510 | 		if (r) { | 
 | 1511 | 			vq_err(vq, "Failed to enable notification at %p: %d\n", | 
 | 1512 | 			       &vq->used->flags, r); | 
 | 1513 | 			return false; | 
 | 1514 | 		} | 
 | 1515 | 	} else { | 
| Jason Wang | 2723fea | 2011-06-21 18:04:38 +0800 | [diff] [blame] | 1516 | 		r = vhost_update_avail_event(vq, vq->avail_idx); | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 1517 | 		if (r) { | 
 | 1518 | 			vq_err(vq, "Failed to update avail event index at %p: %d\n", | 
 | 1519 | 			       vhost_avail_event(vq), r); | 
 | 1520 | 			return false; | 
 | 1521 | 		} | 
 | 1522 | 	} | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1523 | 	/* They could have slipped one in as we were doing that: make | 
 | 1524 | 	 * sure it's written, then check again. */ | 
| Michael S. Tsirkin | 5659338 | 2010-02-01 07:21:02 +0000 | [diff] [blame] | 1525 | 	smp_mb(); | 
| Michael S. Tsirkin | 8b7347a | 2010-09-19 15:56:30 +0200 | [diff] [blame] | 1526 | 	r = __get_user(avail_idx, &vq->avail->idx); | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1527 | 	if (r) { | 
 | 1528 | 		vq_err(vq, "Failed to check avail idx at %p: %d\n", | 
 | 1529 | 		       &vq->avail->idx, r); | 
 | 1530 | 		return false; | 
 | 1531 | 	} | 
 | 1532 |  | 
| David Stevens | 8dd014a | 2010-07-27 18:52:21 +0300 | [diff] [blame] | 1533 | 	return avail_idx != vq->avail_idx; | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1534 | } | 
 | 1535 |  | 
 | 1536 | /* We don't need to be notified again. */ | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 1537 | void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1538 | { | 
 | 1539 | 	int r; | 
| Krishna Kumar | d47effe | 2011-03-01 17:06:37 +0530 | [diff] [blame] | 1540 |  | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1541 | 	if (vq->used_flags & VRING_USED_F_NO_NOTIFY) | 
 | 1542 | 		return; | 
 | 1543 | 	vq->used_flags |= VRING_USED_F_NO_NOTIFY; | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 1544 | 	if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { | 
| Jason Wang | 2723fea | 2011-06-21 18:04:38 +0800 | [diff] [blame] | 1545 | 		r = vhost_update_used_flags(vq); | 
| Michael S. Tsirkin | 8ea8cf8 | 2011-05-20 02:10:54 +0300 | [diff] [blame] | 1546 | 		if (r) | 
 | 1547 | 			vq_err(vq, "Failed to enable notification at %p: %d\n", | 
 | 1548 | 			       &vq->used->flags, r); | 
 | 1549 | 	} | 
| Michael S. Tsirkin | 3a4d5c9 | 2010-01-14 06:17:27 +0000 | [diff] [blame] | 1550 | } | 
| Michael S. Tsirkin | bab632d | 2011-07-18 03:48:46 +0000 | [diff] [blame] | 1551 |  | 
 | 1552 | static void vhost_zerocopy_done_signal(struct kref *kref) | 
 | 1553 | { | 
 | 1554 | 	struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref, | 
 | 1555 | 						    kref); | 
 | 1556 | 	wake_up(&ubufs->wait); | 
 | 1557 | } | 
 | 1558 |  | 
 | 1559 | struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq, | 
 | 1560 | 					bool zcopy) | 
 | 1561 | { | 
 | 1562 | 	struct vhost_ubuf_ref *ubufs; | 
 | 1563 | 	/* No zero copy backend? Nothing to count. */ | 
 | 1564 | 	if (!zcopy) | 
 | 1565 | 		return NULL; | 
 | 1566 | 	ubufs = kmalloc(sizeof *ubufs, GFP_KERNEL); | 
 | 1567 | 	if (!ubufs) | 
 | 1568 | 		return ERR_PTR(-ENOMEM); | 
 | 1569 | 	kref_init(&ubufs->kref); | 
| Michael S. Tsirkin | bab632d | 2011-07-18 03:48:46 +0000 | [diff] [blame] | 1570 | 	init_waitqueue_head(&ubufs->wait); | 
 | 1571 | 	ubufs->vq = vq; | 
 | 1572 | 	return ubufs; | 
 | 1573 | } | 
 | 1574 |  | 
 | 1575 | void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs) | 
 | 1576 | { | 
 | 1577 | 	kref_put(&ubufs->kref, vhost_zerocopy_done_signal); | 
 | 1578 | } | 
 | 1579 |  | 
 | 1580 | void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs) | 
 | 1581 | { | 
 | 1582 | 	kref_put(&ubufs->kref, vhost_zerocopy_done_signal); | 
 | 1583 | 	wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount)); | 
 | 1584 | 	kfree(ubufs); | 
 | 1585 | } |