| Timur Tabi | 6db7199 | 2011-06-09 15:52:06 -0500 | [diff] [blame] | 1 | /* | 
 | 2 |  * Freescale Hypervisor Management Driver | 
 | 3 |  | 
 | 4 |  * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. | 
 | 5 |  * Author: Timur Tabi <timur@freescale.com> | 
 | 6 |  * | 
 | 7 |  * This file is licensed under the terms of the GNU General Public License | 
 | 8 |  * version 2.  This program is licensed "as is" without any warranty of any | 
 | 9 |  * kind, whether express or implied. | 
 | 10 |  * | 
 | 11 |  * The Freescale hypervisor management driver provides several services to | 
 | 12 |  * drivers and applications related to the Freescale hypervisor: | 
 | 13 |  * | 
 | 14 |  * 1. An ioctl interface for querying and managing partitions. | 
 | 15 |  * | 
 | 16 |  * 2. A file interface to reading incoming doorbells. | 
 | 17 |  * | 
 | 18 |  * 3. An interrupt handler for shutting down the partition upon receiving the | 
 | 19 |  *    shutdown doorbell from a manager partition. | 
 | 20 |  * | 
 | 21 |  * 4. A kernel interface for receiving callbacks when a managed partition | 
 | 22 |  *    shuts down. | 
 | 23 |  */ | 
 | 24 |  | 
 | 25 | #include <linux/kernel.h> | 
 | 26 | #include <linux/module.h> | 
 | 27 | #include <linux/init.h> | 
 | 28 | #include <linux/types.h> | 
 | 29 | #include <linux/err.h> | 
 | 30 | #include <linux/fs.h> | 
 | 31 | #include <linux/miscdevice.h> | 
 | 32 | #include <linux/mm.h> | 
 | 33 | #include <linux/pagemap.h> | 
 | 34 | #include <linux/slab.h> | 
 | 35 | #include <linux/poll.h> | 
 | 36 | #include <linux/of.h> | 
 | 37 | #include <linux/reboot.h> | 
 | 38 | #include <linux/uaccess.h> | 
 | 39 | #include <linux/notifier.h> | 
| Timur Tabi | f1f4ee0 | 2011-07-19 15:45:51 -0500 | [diff] [blame] | 40 | #include <linux/interrupt.h> | 
| Timur Tabi | 6db7199 | 2011-06-09 15:52:06 -0500 | [diff] [blame] | 41 |  | 
 | 42 | #include <linux/io.h> | 
 | 43 | #include <asm/fsl_hcalls.h> | 
 | 44 |  | 
 | 45 | #include <linux/fsl_hypervisor.h> | 
 | 46 |  | 
 | 47 | static BLOCKING_NOTIFIER_HEAD(failover_subscribers); | 
 | 48 |  | 
 | 49 | /* | 
 | 50 |  * Ioctl interface for FSL_HV_IOCTL_PARTITION_RESTART | 
 | 51 |  * | 
 | 52 |  * Restart a running partition | 
 | 53 |  */ | 
 | 54 | static long ioctl_restart(struct fsl_hv_ioctl_restart __user *p) | 
 | 55 | { | 
 | 56 | 	struct fsl_hv_ioctl_restart param; | 
 | 57 |  | 
 | 58 | 	/* Get the parameters from the user */ | 
 | 59 | 	if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_restart))) | 
 | 60 | 		return -EFAULT; | 
 | 61 |  | 
 | 62 | 	param.ret = fh_partition_restart(param.partition); | 
 | 63 |  | 
 | 64 | 	if (copy_to_user(&p->ret, ¶m.ret, sizeof(__u32))) | 
 | 65 | 		return -EFAULT; | 
 | 66 |  | 
 | 67 | 	return 0; | 
 | 68 | } | 
 | 69 |  | 
 | 70 | /* | 
 | 71 |  * Ioctl interface for FSL_HV_IOCTL_PARTITION_STATUS | 
 | 72 |  * | 
 | 73 |  * Query the status of a partition | 
 | 74 |  */ | 
 | 75 | static long ioctl_status(struct fsl_hv_ioctl_status __user *p) | 
 | 76 | { | 
 | 77 | 	struct fsl_hv_ioctl_status param; | 
 | 78 | 	u32 status; | 
 | 79 |  | 
 | 80 | 	/* Get the parameters from the user */ | 
 | 81 | 	if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_status))) | 
 | 82 | 		return -EFAULT; | 
 | 83 |  | 
 | 84 | 	param.ret = fh_partition_get_status(param.partition, &status); | 
 | 85 | 	if (!param.ret) | 
 | 86 | 		param.status = status; | 
 | 87 |  | 
 | 88 | 	if (copy_to_user(p, ¶m, sizeof(struct fsl_hv_ioctl_status))) | 
 | 89 | 		return -EFAULT; | 
 | 90 |  | 
 | 91 | 	return 0; | 
 | 92 | } | 
 | 93 |  | 
 | 94 | /* | 
 | 95 |  * Ioctl interface for FSL_HV_IOCTL_PARTITION_START | 
 | 96 |  * | 
 | 97 |  * Start a stopped partition. | 
 | 98 |  */ | 
 | 99 | static long ioctl_start(struct fsl_hv_ioctl_start __user *p) | 
 | 100 | { | 
 | 101 | 	struct fsl_hv_ioctl_start param; | 
 | 102 |  | 
 | 103 | 	/* Get the parameters from the user */ | 
 | 104 | 	if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_start))) | 
 | 105 | 		return -EFAULT; | 
 | 106 |  | 
 | 107 | 	param.ret = fh_partition_start(param.partition, param.entry_point, | 
 | 108 | 				       param.load); | 
 | 109 |  | 
 | 110 | 	if (copy_to_user(&p->ret, ¶m.ret, sizeof(__u32))) | 
 | 111 | 		return -EFAULT; | 
 | 112 |  | 
 | 113 | 	return 0; | 
 | 114 | } | 
 | 115 |  | 
 | 116 | /* | 
 | 117 |  * Ioctl interface for FSL_HV_IOCTL_PARTITION_STOP | 
 | 118 |  * | 
 | 119 |  * Stop a running partition | 
 | 120 |  */ | 
 | 121 | static long ioctl_stop(struct fsl_hv_ioctl_stop __user *p) | 
 | 122 | { | 
 | 123 | 	struct fsl_hv_ioctl_stop param; | 
 | 124 |  | 
 | 125 | 	/* Get the parameters from the user */ | 
 | 126 | 	if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_stop))) | 
 | 127 | 		return -EFAULT; | 
 | 128 |  | 
 | 129 | 	param.ret = fh_partition_stop(param.partition); | 
 | 130 |  | 
 | 131 | 	if (copy_to_user(&p->ret, ¶m.ret, sizeof(__u32))) | 
 | 132 | 		return -EFAULT; | 
 | 133 |  | 
 | 134 | 	return 0; | 
 | 135 | } | 
 | 136 |  | 
 | 137 | /* | 
 | 138 |  * Ioctl interface for FSL_HV_IOCTL_MEMCPY | 
 | 139 |  * | 
 | 140 |  * The FH_MEMCPY hypercall takes an array of address/address/size structures | 
 | 141 |  * to represent the data being copied.  As a convenience to the user, this | 
 | 142 |  * ioctl takes a user-create buffer and a pointer to a guest physically | 
 | 143 |  * contiguous buffer in the remote partition, and creates the | 
 | 144 |  * address/address/size array for the hypercall. | 
 | 145 |  */ | 
 | 146 | static long ioctl_memcpy(struct fsl_hv_ioctl_memcpy __user *p) | 
 | 147 | { | 
 | 148 | 	struct fsl_hv_ioctl_memcpy param; | 
 | 149 |  | 
 | 150 | 	struct page **pages = NULL; | 
 | 151 | 	void *sg_list_unaligned = NULL; | 
 | 152 | 	struct fh_sg_list *sg_list = NULL; | 
 | 153 |  | 
 | 154 | 	unsigned int num_pages; | 
 | 155 | 	unsigned long lb_offset; /* Offset within a page of the local buffer */ | 
 | 156 |  | 
 | 157 | 	unsigned int i; | 
 | 158 | 	long ret = 0; | 
 | 159 | 	int num_pinned; /* return value from get_user_pages() */ | 
 | 160 | 	phys_addr_t remote_paddr; /* The next address in the remote buffer */ | 
 | 161 | 	uint32_t count; /* The number of bytes left to copy */ | 
 | 162 |  | 
 | 163 | 	/* Get the parameters from the user */ | 
 | 164 | 	if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_memcpy))) | 
 | 165 | 		return -EFAULT; | 
 | 166 |  | 
 | 167 | 	/* | 
 | 168 | 	 * One partition must be local, the other must be remote.  In other | 
 | 169 | 	 * words, if source and target are both -1, or are both not -1, then | 
 | 170 | 	 * return an error. | 
 | 171 | 	 */ | 
 | 172 | 	if ((param.source == -1) == (param.target == -1)) | 
 | 173 | 		return -EINVAL; | 
 | 174 |  | 
 | 175 | 	/* | 
 | 176 | 	 * The array of pages returned by get_user_pages() covers only | 
 | 177 | 	 * page-aligned memory.  Since the user buffer is probably not | 
 | 178 | 	 * page-aligned, we need to handle the discrepancy. | 
 | 179 | 	 * | 
 | 180 | 	 * We calculate the offset within a page of the S/G list, and make | 
 | 181 | 	 * adjustments accordingly.  This will result in a page list that looks | 
 | 182 | 	 * like this: | 
 | 183 | 	 * | 
 | 184 | 	 *      ----    <-- first page starts before the buffer | 
 | 185 | 	 *     |    | | 
 | 186 | 	 *     |////|-> ---- | 
 | 187 | 	 *     |////|  |    | | 
 | 188 | 	 *      ----   |    | | 
 | 189 | 	 *             |    | | 
 | 190 | 	 *      ----   |    | | 
 | 191 | 	 *     |////|  |    | | 
 | 192 | 	 *     |////|  |    | | 
 | 193 | 	 *     |////|  |    | | 
 | 194 | 	 *      ----   |    | | 
 | 195 | 	 *             |    | | 
 | 196 | 	 *      ----   |    | | 
 | 197 | 	 *     |////|  |    | | 
 | 198 | 	 *     |////|  |    | | 
 | 199 | 	 *     |////|  |    | | 
 | 200 | 	 *      ----   |    | | 
 | 201 | 	 *             |    | | 
 | 202 | 	 *      ----   |    | | 
 | 203 | 	 *     |////|  |    | | 
 | 204 | 	 *     |////|-> ---- | 
 | 205 | 	 *     |    |   <-- last page ends after the buffer | 
 | 206 | 	 *      ---- | 
 | 207 | 	 * | 
 | 208 | 	 * The distance between the start of the first page and the start of the | 
 | 209 | 	 * buffer is lb_offset.  The hashed (///) areas are the parts of the | 
 | 210 | 	 * page list that contain the actual buffer. | 
 | 211 | 	 * | 
 | 212 | 	 * The advantage of this approach is that the number of pages is | 
 | 213 | 	 * equal to the number of entries in the S/G list that we give to the | 
 | 214 | 	 * hypervisor. | 
 | 215 | 	 */ | 
 | 216 | 	lb_offset = param.local_vaddr & (PAGE_SIZE - 1); | 
 | 217 | 	num_pages = (param.count + lb_offset + PAGE_SIZE - 1) >> PAGE_SHIFT; | 
 | 218 |  | 
 | 219 | 	/* Allocate the buffers we need */ | 
 | 220 |  | 
 | 221 | 	/* | 
 | 222 | 	 * 'pages' is an array of struct page pointers that's initialized by | 
 | 223 | 	 * get_user_pages(). | 
 | 224 | 	 */ | 
 | 225 | 	pages = kzalloc(num_pages * sizeof(struct page *), GFP_KERNEL); | 
 | 226 | 	if (!pages) { | 
 | 227 | 		pr_debug("fsl-hv: could not allocate page list\n"); | 
 | 228 | 		return -ENOMEM; | 
 | 229 | 	} | 
 | 230 |  | 
 | 231 | 	/* | 
 | 232 | 	 * sg_list is the list of fh_sg_list objects that we pass to the | 
 | 233 | 	 * hypervisor. | 
 | 234 | 	 */ | 
 | 235 | 	sg_list_unaligned = kmalloc(num_pages * sizeof(struct fh_sg_list) + | 
 | 236 | 		sizeof(struct fh_sg_list) - 1, GFP_KERNEL); | 
 | 237 | 	if (!sg_list_unaligned) { | 
 | 238 | 		pr_debug("fsl-hv: could not allocate S/G list\n"); | 
 | 239 | 		ret = -ENOMEM; | 
 | 240 | 		goto exit; | 
 | 241 | 	} | 
 | 242 | 	sg_list = PTR_ALIGN(sg_list_unaligned, sizeof(struct fh_sg_list)); | 
 | 243 |  | 
 | 244 | 	/* Get the physical addresses of the source buffer */ | 
 | 245 | 	down_read(¤t->mm->mmap_sem); | 
 | 246 | 	num_pinned = get_user_pages(current, current->mm, | 
 | 247 | 		param.local_vaddr - lb_offset, num_pages, | 
 | 248 | 		(param.source == -1) ? READ : WRITE, | 
 | 249 | 		0, pages, NULL); | 
 | 250 | 	up_read(¤t->mm->mmap_sem); | 
 | 251 |  | 
 | 252 | 	if (num_pinned != num_pages) { | 
 | 253 | 		/* get_user_pages() failed */ | 
 | 254 | 		pr_debug("fsl-hv: could not lock source buffer\n"); | 
 | 255 | 		ret = (num_pinned < 0) ? num_pinned : -EFAULT; | 
 | 256 | 		goto exit; | 
 | 257 | 	} | 
 | 258 |  | 
 | 259 | 	/* | 
 | 260 | 	 * Build the fh_sg_list[] array.  The first page is special | 
 | 261 | 	 * because it's misaligned. | 
 | 262 | 	 */ | 
 | 263 | 	if (param.source == -1) { | 
 | 264 | 		sg_list[0].source = page_to_phys(pages[0]) + lb_offset; | 
 | 265 | 		sg_list[0].target = param.remote_paddr; | 
 | 266 | 	} else { | 
 | 267 | 		sg_list[0].source = param.remote_paddr; | 
 | 268 | 		sg_list[0].target = page_to_phys(pages[0]) + lb_offset; | 
 | 269 | 	} | 
 | 270 | 	sg_list[0].size = min_t(uint64_t, param.count, PAGE_SIZE - lb_offset); | 
 | 271 |  | 
 | 272 | 	remote_paddr = param.remote_paddr + sg_list[0].size; | 
 | 273 | 	count = param.count - sg_list[0].size; | 
 | 274 |  | 
 | 275 | 	for (i = 1; i < num_pages; i++) { | 
 | 276 | 		if (param.source == -1) { | 
 | 277 | 			/* local to remote */ | 
 | 278 | 			sg_list[i].source = page_to_phys(pages[i]); | 
 | 279 | 			sg_list[i].target = remote_paddr; | 
 | 280 | 		} else { | 
 | 281 | 			/* remote to local */ | 
 | 282 | 			sg_list[i].source = remote_paddr; | 
 | 283 | 			sg_list[i].target = page_to_phys(pages[i]); | 
 | 284 | 		} | 
 | 285 | 		sg_list[i].size = min_t(uint64_t, count, PAGE_SIZE); | 
 | 286 |  | 
 | 287 | 		remote_paddr += sg_list[i].size; | 
 | 288 | 		count -= sg_list[i].size; | 
 | 289 | 	} | 
 | 290 |  | 
 | 291 | 	param.ret = fh_partition_memcpy(param.source, param.target, | 
 | 292 | 		virt_to_phys(sg_list), num_pages); | 
 | 293 |  | 
 | 294 | exit: | 
 | 295 | 	if (pages) { | 
 | 296 | 		for (i = 0; i < num_pages; i++) | 
 | 297 | 			if (pages[i]) | 
 | 298 | 				put_page(pages[i]); | 
 | 299 | 	} | 
 | 300 |  | 
 | 301 | 	kfree(sg_list_unaligned); | 
 | 302 | 	kfree(pages); | 
 | 303 |  | 
 | 304 | 	if (!ret) | 
 | 305 | 		if (copy_to_user(&p->ret, ¶m.ret, sizeof(__u32))) | 
 | 306 | 			return -EFAULT; | 
 | 307 |  | 
 | 308 | 	return ret; | 
 | 309 | } | 
 | 310 |  | 
 | 311 | /* | 
 | 312 |  * Ioctl interface for FSL_HV_IOCTL_DOORBELL | 
 | 313 |  * | 
 | 314 |  * Ring a doorbell | 
 | 315 |  */ | 
 | 316 | static long ioctl_doorbell(struct fsl_hv_ioctl_doorbell __user *p) | 
 | 317 | { | 
 | 318 | 	struct fsl_hv_ioctl_doorbell param; | 
 | 319 |  | 
 | 320 | 	/* Get the parameters from the user. */ | 
 | 321 | 	if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_doorbell))) | 
 | 322 | 		return -EFAULT; | 
 | 323 |  | 
 | 324 | 	param.ret = ev_doorbell_send(param.doorbell); | 
 | 325 |  | 
 | 326 | 	if (copy_to_user(&p->ret, ¶m.ret, sizeof(__u32))) | 
 | 327 | 		return -EFAULT; | 
 | 328 |  | 
 | 329 | 	return 0; | 
 | 330 | } | 
 | 331 |  | 
 | 332 | static long ioctl_dtprop(struct fsl_hv_ioctl_prop __user *p, int set) | 
 | 333 | { | 
 | 334 | 	struct fsl_hv_ioctl_prop param; | 
 | 335 | 	char __user *upath, *upropname; | 
 | 336 | 	void __user *upropval; | 
 | 337 | 	char *path = NULL, *propname = NULL; | 
 | 338 | 	void *propval = NULL; | 
 | 339 | 	int ret = 0; | 
 | 340 |  | 
 | 341 | 	/* Get the parameters from the user. */ | 
 | 342 | 	if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_prop))) | 
 | 343 | 		return -EFAULT; | 
 | 344 |  | 
 | 345 | 	upath = (char __user *)(uintptr_t)param.path; | 
 | 346 | 	upropname = (char __user *)(uintptr_t)param.propname; | 
 | 347 | 	upropval = (void __user *)(uintptr_t)param.propval; | 
 | 348 |  | 
 | 349 | 	path = strndup_user(upath, FH_DTPROP_MAX_PATHLEN); | 
 | 350 | 	if (IS_ERR(path)) { | 
 | 351 | 		ret = PTR_ERR(path); | 
 | 352 | 		goto out; | 
 | 353 | 	} | 
 | 354 |  | 
 | 355 | 	propname = strndup_user(upropname, FH_DTPROP_MAX_PATHLEN); | 
 | 356 | 	if (IS_ERR(propname)) { | 
 | 357 | 		ret = PTR_ERR(propname); | 
 | 358 | 		goto out; | 
 | 359 | 	} | 
 | 360 |  | 
 | 361 | 	if (param.proplen > FH_DTPROP_MAX_PROPLEN) { | 
 | 362 | 		ret = -EINVAL; | 
 | 363 | 		goto out; | 
 | 364 | 	} | 
 | 365 |  | 
 | 366 | 	propval = kmalloc(param.proplen, GFP_KERNEL); | 
 | 367 | 	if (!propval) { | 
 | 368 | 		ret = -ENOMEM; | 
 | 369 | 		goto out; | 
 | 370 | 	} | 
 | 371 |  | 
 | 372 | 	if (set) { | 
 | 373 | 		if (copy_from_user(propval, upropval, param.proplen)) { | 
 | 374 | 			ret = -EFAULT; | 
 | 375 | 			goto out; | 
 | 376 | 		} | 
 | 377 |  | 
 | 378 | 		param.ret = fh_partition_set_dtprop(param.handle, | 
 | 379 | 						    virt_to_phys(path), | 
 | 380 | 						    virt_to_phys(propname), | 
 | 381 | 						    virt_to_phys(propval), | 
 | 382 | 						    param.proplen); | 
 | 383 | 	} else { | 
 | 384 | 		param.ret = fh_partition_get_dtprop(param.handle, | 
 | 385 | 						    virt_to_phys(path), | 
 | 386 | 						    virt_to_phys(propname), | 
 | 387 | 						    virt_to_phys(propval), | 
 | 388 | 						    ¶m.proplen); | 
 | 389 |  | 
 | 390 | 		if (param.ret == 0) { | 
 | 391 | 			if (copy_to_user(upropval, propval, param.proplen) || | 
 | 392 | 			    put_user(param.proplen, &p->proplen)) { | 
 | 393 | 				ret = -EFAULT; | 
 | 394 | 				goto out; | 
 | 395 | 			} | 
 | 396 | 		} | 
 | 397 | 	} | 
 | 398 |  | 
 | 399 | 	if (put_user(param.ret, &p->ret)) | 
 | 400 | 		ret = -EFAULT; | 
 | 401 |  | 
 | 402 | out: | 
 | 403 | 	kfree(path); | 
 | 404 | 	kfree(propval); | 
 | 405 | 	kfree(propname); | 
 | 406 |  | 
 | 407 | 	return ret; | 
 | 408 | } | 
 | 409 |  | 
 | 410 | /* | 
 | 411 |  * Ioctl main entry point | 
 | 412 |  */ | 
 | 413 | static long fsl_hv_ioctl(struct file *file, unsigned int cmd, | 
 | 414 | 			 unsigned long argaddr) | 
 | 415 | { | 
 | 416 | 	void __user *arg = (void __user *)argaddr; | 
 | 417 | 	long ret; | 
 | 418 |  | 
 | 419 | 	switch (cmd) { | 
 | 420 | 	case FSL_HV_IOCTL_PARTITION_RESTART: | 
 | 421 | 		ret = ioctl_restart(arg); | 
 | 422 | 		break; | 
 | 423 | 	case FSL_HV_IOCTL_PARTITION_GET_STATUS: | 
 | 424 | 		ret = ioctl_status(arg); | 
 | 425 | 		break; | 
 | 426 | 	case FSL_HV_IOCTL_PARTITION_START: | 
 | 427 | 		ret = ioctl_start(arg); | 
 | 428 | 		break; | 
 | 429 | 	case FSL_HV_IOCTL_PARTITION_STOP: | 
 | 430 | 		ret = ioctl_stop(arg); | 
 | 431 | 		break; | 
 | 432 | 	case FSL_HV_IOCTL_MEMCPY: | 
 | 433 | 		ret = ioctl_memcpy(arg); | 
 | 434 | 		break; | 
 | 435 | 	case FSL_HV_IOCTL_DOORBELL: | 
 | 436 | 		ret = ioctl_doorbell(arg); | 
 | 437 | 		break; | 
 | 438 | 	case FSL_HV_IOCTL_GETPROP: | 
 | 439 | 		ret = ioctl_dtprop(arg, 0); | 
 | 440 | 		break; | 
 | 441 | 	case FSL_HV_IOCTL_SETPROP: | 
 | 442 | 		ret = ioctl_dtprop(arg, 1); | 
 | 443 | 		break; | 
 | 444 | 	default: | 
 | 445 | 		pr_debug("fsl-hv: bad ioctl dir=%u type=%u cmd=%u size=%u\n", | 
 | 446 | 			 _IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), | 
 | 447 | 			 _IOC_SIZE(cmd)); | 
 | 448 | 		return -ENOTTY; | 
 | 449 | 	} | 
 | 450 |  | 
 | 451 | 	return ret; | 
 | 452 | } | 
 | 453 |  | 
 | 454 | /* Linked list of processes that have us open */ | 
 | 455 | static struct list_head db_list; | 
 | 456 |  | 
 | 457 | /* spinlock for db_list */ | 
 | 458 | static DEFINE_SPINLOCK(db_list_lock); | 
 | 459 |  | 
 | 460 | /* The size of the doorbell event queue.  This must be a power of two. */ | 
 | 461 | #define QSIZE	16 | 
 | 462 |  | 
 | 463 | /* Returns the next head/tail pointer, wrapping around the queue if necessary */ | 
 | 464 | #define nextp(x) (((x) + 1) & (QSIZE - 1)) | 
 | 465 |  | 
 | 466 | /* Per-open data structure */ | 
 | 467 | struct doorbell_queue { | 
 | 468 | 	struct list_head list; | 
 | 469 | 	spinlock_t lock; | 
 | 470 | 	wait_queue_head_t wait; | 
 | 471 | 	unsigned int head; | 
 | 472 | 	unsigned int tail; | 
 | 473 | 	uint32_t q[QSIZE]; | 
 | 474 | }; | 
 | 475 |  | 
 | 476 | /* Linked list of ISRs that we registered */ | 
 | 477 | struct list_head isr_list; | 
 | 478 |  | 
 | 479 | /* Per-ISR data structure */ | 
 | 480 | struct doorbell_isr { | 
 | 481 | 	struct list_head list; | 
 | 482 | 	unsigned int irq; | 
 | 483 | 	uint32_t doorbell;	/* The doorbell handle */ | 
 | 484 | 	uint32_t partition;	/* The partition handle, if used */ | 
 | 485 | }; | 
 | 486 |  | 
 | 487 | /* | 
 | 488 |  * Add a doorbell to all of the doorbell queues | 
 | 489 |  */ | 
 | 490 | static void fsl_hv_queue_doorbell(uint32_t doorbell) | 
 | 491 | { | 
 | 492 | 	struct doorbell_queue *dbq; | 
 | 493 | 	unsigned long flags; | 
 | 494 |  | 
 | 495 | 	/* Prevent another core from modifying db_list */ | 
 | 496 | 	spin_lock_irqsave(&db_list_lock, flags); | 
 | 497 |  | 
 | 498 | 	list_for_each_entry(dbq, &db_list, list) { | 
 | 499 | 		if (dbq->head != nextp(dbq->tail)) { | 
 | 500 | 			dbq->q[dbq->tail] = doorbell; | 
 | 501 | 			/* | 
 | 502 | 			 * This memory barrier eliminates the need to grab | 
 | 503 | 			 * the spinlock for dbq. | 
 | 504 | 			 */ | 
 | 505 | 			smp_wmb(); | 
 | 506 | 			dbq->tail = nextp(dbq->tail); | 
 | 507 | 			wake_up_interruptible(&dbq->wait); | 
 | 508 | 		} | 
 | 509 | 	} | 
 | 510 |  | 
 | 511 | 	spin_unlock_irqrestore(&db_list_lock, flags); | 
 | 512 | } | 
 | 513 |  | 
 | 514 | /* | 
 | 515 |  * Interrupt handler for all doorbells | 
 | 516 |  * | 
 | 517 |  * We use the same interrupt handler for all doorbells.  Whenever a doorbell | 
 | 518 |  * is rung, and we receive an interrupt, we just put the handle for that | 
 | 519 |  * doorbell (passed to us as *data) into all of the queues. | 
 | 520 |  */ | 
 | 521 | static irqreturn_t fsl_hv_isr(int irq, void *data) | 
 | 522 | { | 
 | 523 | 	fsl_hv_queue_doorbell((uintptr_t) data); | 
 | 524 |  | 
 | 525 | 	return IRQ_HANDLED; | 
 | 526 | } | 
 | 527 |  | 
 | 528 | /* | 
 | 529 |  * State change thread function | 
 | 530 |  * | 
 | 531 |  * The state change notification arrives in an interrupt, but we can't call | 
 | 532 |  * blocking_notifier_call_chain() in an interrupt handler.  We could call | 
 | 533 |  * atomic_notifier_call_chain(), but that would require the clients' call-back | 
 | 534 |  * function to run in interrupt context.  Since we don't want to impose that | 
 | 535 |  * restriction on the clients, we use a threaded IRQ to process the | 
 | 536 |  * notification in kernel context. | 
 | 537 |  */ | 
 | 538 | static irqreturn_t fsl_hv_state_change_thread(int irq, void *data) | 
 | 539 | { | 
 | 540 | 	struct doorbell_isr *dbisr = data; | 
 | 541 |  | 
 | 542 | 	blocking_notifier_call_chain(&failover_subscribers, dbisr->partition, | 
 | 543 | 				     NULL); | 
 | 544 |  | 
 | 545 | 	return IRQ_HANDLED; | 
 | 546 | } | 
 | 547 |  | 
 | 548 | /* | 
 | 549 |  * Interrupt handler for state-change doorbells | 
 | 550 |  */ | 
 | 551 | static irqreturn_t fsl_hv_state_change_isr(int irq, void *data) | 
 | 552 | { | 
 | 553 | 	unsigned int status; | 
 | 554 | 	struct doorbell_isr *dbisr = data; | 
 | 555 | 	int ret; | 
 | 556 |  | 
 | 557 | 	/* It's still a doorbell, so add it to all the queues. */ | 
 | 558 | 	fsl_hv_queue_doorbell(dbisr->doorbell); | 
 | 559 |  | 
 | 560 | 	/* Determine the new state, and if it's stopped, notify the clients. */ | 
 | 561 | 	ret = fh_partition_get_status(dbisr->partition, &status); | 
 | 562 | 	if (!ret && (status == FH_PARTITION_STOPPED)) | 
 | 563 | 		return IRQ_WAKE_THREAD; | 
 | 564 |  | 
 | 565 | 	return IRQ_HANDLED; | 
 | 566 | } | 
 | 567 |  | 
 | 568 | /* | 
 | 569 |  * Returns a bitmask indicating whether a read will block | 
 | 570 |  */ | 
 | 571 | static unsigned int fsl_hv_poll(struct file *filp, struct poll_table_struct *p) | 
 | 572 | { | 
 | 573 | 	struct doorbell_queue *dbq = filp->private_data; | 
 | 574 | 	unsigned long flags; | 
 | 575 | 	unsigned int mask; | 
 | 576 |  | 
 | 577 | 	spin_lock_irqsave(&dbq->lock, flags); | 
 | 578 |  | 
 | 579 | 	poll_wait(filp, &dbq->wait, p); | 
 | 580 | 	mask = (dbq->head == dbq->tail) ? 0 : (POLLIN | POLLRDNORM); | 
 | 581 |  | 
 | 582 | 	spin_unlock_irqrestore(&dbq->lock, flags); | 
 | 583 |  | 
 | 584 | 	return mask; | 
 | 585 | } | 
 | 586 |  | 
 | 587 | /* | 
 | 588 |  * Return the handles for any incoming doorbells | 
 | 589 |  * | 
 | 590 |  * If there are doorbell handles in the queue for this open instance, then | 
 | 591 |  * return them to the caller as an array of 32-bit integers.  Otherwise, | 
 | 592 |  * block until there is at least one handle to return. | 
 | 593 |  */ | 
 | 594 | static ssize_t fsl_hv_read(struct file *filp, char __user *buf, size_t len, | 
 | 595 | 			   loff_t *off) | 
 | 596 | { | 
 | 597 | 	struct doorbell_queue *dbq = filp->private_data; | 
 | 598 | 	uint32_t __user *p = (uint32_t __user *) buf; /* for put_user() */ | 
 | 599 | 	unsigned long flags; | 
 | 600 | 	ssize_t count = 0; | 
 | 601 |  | 
 | 602 | 	/* Make sure we stop when the user buffer is full. */ | 
 | 603 | 	while (len >= sizeof(uint32_t)) { | 
 | 604 | 		uint32_t dbell;	/* Local copy of doorbell queue data */ | 
 | 605 |  | 
 | 606 | 		spin_lock_irqsave(&dbq->lock, flags); | 
 | 607 |  | 
 | 608 | 		/* | 
 | 609 | 		 * If the queue is empty, then either we're done or we need | 
 | 610 | 		 * to block.  If the application specified O_NONBLOCK, then | 
 | 611 | 		 * we return the appropriate error code. | 
 | 612 | 		 */ | 
 | 613 | 		if (dbq->head == dbq->tail) { | 
 | 614 | 			spin_unlock_irqrestore(&dbq->lock, flags); | 
 | 615 | 			if (count) | 
 | 616 | 				break; | 
 | 617 | 			if (filp->f_flags & O_NONBLOCK) | 
 | 618 | 				return -EAGAIN; | 
 | 619 | 			if (wait_event_interruptible(dbq->wait, | 
 | 620 | 						     dbq->head != dbq->tail)) | 
 | 621 | 				return -ERESTARTSYS; | 
 | 622 | 			continue; | 
 | 623 | 		} | 
 | 624 |  | 
 | 625 | 		/* | 
 | 626 | 		 * Even though we have an smp_wmb() in the ISR, the core | 
 | 627 | 		 * might speculatively execute the "dbell = ..." below while | 
 | 628 | 		 * it's evaluating the if-statement above.  In that case, the | 
 | 629 | 		 * value put into dbell could be stale if the core accepts the | 
 | 630 | 		 * speculation. To prevent that, we need a read memory barrier | 
 | 631 | 		 * here as well. | 
 | 632 | 		 */ | 
 | 633 | 		smp_rmb(); | 
 | 634 |  | 
 | 635 | 		/* Copy the data to a temporary local buffer, because | 
 | 636 | 		 * we can't call copy_to_user() from inside a spinlock | 
 | 637 | 		 */ | 
 | 638 | 		dbell = dbq->q[dbq->head]; | 
 | 639 | 		dbq->head = nextp(dbq->head); | 
 | 640 |  | 
 | 641 | 		spin_unlock_irqrestore(&dbq->lock, flags); | 
 | 642 |  | 
 | 643 | 		if (put_user(dbell, p)) | 
 | 644 | 			return -EFAULT; | 
 | 645 | 		p++; | 
 | 646 | 		count += sizeof(uint32_t); | 
 | 647 | 		len -= sizeof(uint32_t); | 
 | 648 | 	} | 
 | 649 |  | 
 | 650 | 	return count; | 
 | 651 | } | 
 | 652 |  | 
 | 653 | /* | 
 | 654 |  * Open the driver and prepare for reading doorbells. | 
 | 655 |  * | 
 | 656 |  * Every time an application opens the driver, we create a doorbell queue | 
 | 657 |  * for that file handle.  This queue is used for any incoming doorbells. | 
 | 658 |  */ | 
 | 659 | static int fsl_hv_open(struct inode *inode, struct file *filp) | 
 | 660 | { | 
 | 661 | 	struct doorbell_queue *dbq; | 
 | 662 | 	unsigned long flags; | 
 | 663 | 	int ret = 0; | 
 | 664 |  | 
 | 665 | 	dbq = kzalloc(sizeof(struct doorbell_queue), GFP_KERNEL); | 
 | 666 | 	if (!dbq) { | 
 | 667 | 		pr_err("fsl-hv: out of memory\n"); | 
 | 668 | 		return -ENOMEM; | 
 | 669 | 	} | 
 | 670 |  | 
 | 671 | 	spin_lock_init(&dbq->lock); | 
 | 672 | 	init_waitqueue_head(&dbq->wait); | 
 | 673 |  | 
 | 674 | 	spin_lock_irqsave(&db_list_lock, flags); | 
 | 675 | 	list_add(&dbq->list, &db_list); | 
 | 676 | 	spin_unlock_irqrestore(&db_list_lock, flags); | 
 | 677 |  | 
 | 678 | 	filp->private_data = dbq; | 
 | 679 |  | 
 | 680 | 	return ret; | 
 | 681 | } | 
 | 682 |  | 
 | 683 | /* | 
 | 684 |  * Close the driver | 
 | 685 |  */ | 
 | 686 | static int fsl_hv_close(struct inode *inode, struct file *filp) | 
 | 687 | { | 
 | 688 | 	struct doorbell_queue *dbq = filp->private_data; | 
 | 689 | 	unsigned long flags; | 
 | 690 |  | 
 | 691 | 	int ret = 0; | 
 | 692 |  | 
 | 693 | 	spin_lock_irqsave(&db_list_lock, flags); | 
 | 694 | 	list_del(&dbq->list); | 
 | 695 | 	spin_unlock_irqrestore(&db_list_lock, flags); | 
 | 696 |  | 
 | 697 | 	kfree(dbq); | 
 | 698 |  | 
 | 699 | 	return ret; | 
 | 700 | } | 
 | 701 |  | 
 | 702 | static const struct file_operations fsl_hv_fops = { | 
 | 703 | 	.owner = THIS_MODULE, | 
 | 704 | 	.open = fsl_hv_open, | 
 | 705 | 	.release = fsl_hv_close, | 
 | 706 | 	.poll = fsl_hv_poll, | 
 | 707 | 	.read = fsl_hv_read, | 
 | 708 | 	.unlocked_ioctl = fsl_hv_ioctl, | 
| Mihai Caraman | c031ab1 | 2011-10-13 18:05:21 +0300 | [diff] [blame] | 709 | 	.compat_ioctl = fsl_hv_ioctl, | 
| Timur Tabi | 6db7199 | 2011-06-09 15:52:06 -0500 | [diff] [blame] | 710 | }; | 
 | 711 |  | 
 | 712 | static struct miscdevice fsl_hv_misc_dev = { | 
 | 713 | 	MISC_DYNAMIC_MINOR, | 
 | 714 | 	"fsl-hv", | 
 | 715 | 	&fsl_hv_fops | 
 | 716 | }; | 
 | 717 |  | 
 | 718 | static irqreturn_t fsl_hv_shutdown_isr(int irq, void *data) | 
 | 719 | { | 
 | 720 | 	orderly_poweroff(false); | 
 | 721 |  | 
 | 722 | 	return IRQ_HANDLED; | 
 | 723 | } | 
 | 724 |  | 
 | 725 | /* | 
 | 726 |  * Returns the handle of the parent of the given node | 
 | 727 |  * | 
 | 728 |  * The handle is the value of the 'hv-handle' property | 
 | 729 |  */ | 
 | 730 | static int get_parent_handle(struct device_node *np) | 
 | 731 | { | 
 | 732 | 	struct device_node *parent; | 
 | 733 | 	const uint32_t *prop; | 
 | 734 | 	uint32_t handle; | 
 | 735 | 	int len; | 
 | 736 |  | 
 | 737 | 	parent = of_get_parent(np); | 
 | 738 | 	if (!parent) | 
 | 739 | 		/* It's not really possible for this to fail */ | 
 | 740 | 		return -ENODEV; | 
 | 741 |  | 
 | 742 | 	/* | 
 | 743 | 	 * The proper name for the handle property is "hv-handle", but some | 
 | 744 | 	 * older versions of the hypervisor used "reg". | 
 | 745 | 	 */ | 
 | 746 | 	prop = of_get_property(parent, "hv-handle", &len); | 
 | 747 | 	if (!prop) | 
 | 748 | 		prop = of_get_property(parent, "reg", &len); | 
 | 749 |  | 
 | 750 | 	if (!prop || (len != sizeof(uint32_t))) { | 
 | 751 | 		/* This can happen only if the node is malformed */ | 
 | 752 | 		of_node_put(parent); | 
 | 753 | 		return -ENODEV; | 
 | 754 | 	} | 
 | 755 |  | 
 | 756 | 	handle = be32_to_cpup(prop); | 
 | 757 | 	of_node_put(parent); | 
 | 758 |  | 
 | 759 | 	return handle; | 
 | 760 | } | 
 | 761 |  | 
 | 762 | /* | 
 | 763 |  * Register a callback for failover events | 
 | 764 |  * | 
 | 765 |  * This function is called by device drivers to register their callback | 
 | 766 |  * functions for fail-over events. | 
 | 767 |  */ | 
 | 768 | int fsl_hv_failover_register(struct notifier_block *nb) | 
 | 769 | { | 
 | 770 | 	return blocking_notifier_chain_register(&failover_subscribers, nb); | 
 | 771 | } | 
 | 772 | EXPORT_SYMBOL(fsl_hv_failover_register); | 
 | 773 |  | 
 | 774 | /* | 
 | 775 |  * Unregister a callback for failover events | 
 | 776 |  */ | 
 | 777 | int fsl_hv_failover_unregister(struct notifier_block *nb) | 
 | 778 | { | 
 | 779 | 	return blocking_notifier_chain_unregister(&failover_subscribers, nb); | 
 | 780 | } | 
 | 781 | EXPORT_SYMBOL(fsl_hv_failover_unregister); | 
 | 782 |  | 
 | 783 | /* | 
 | 784 |  * Return TRUE if we're running under FSL hypervisor | 
 | 785 |  * | 
 | 786 |  * This function checks to see if we're running under the Freescale | 
 | 787 |  * hypervisor, and returns zero if we're not, or non-zero if we are. | 
 | 788 |  * | 
 | 789 |  * First, it checks if MSR[GS]==1, which means we're running under some | 
 | 790 |  * hypervisor.  Then it checks if there is a hypervisor node in the device | 
 | 791 |  * tree.  Currently, that means there needs to be a node in the root called | 
 | 792 |  * "hypervisor" and which has a property named "fsl,hv-version". | 
 | 793 |  */ | 
 | 794 | static int has_fsl_hypervisor(void) | 
 | 795 | { | 
 | 796 | 	struct device_node *node; | 
 | 797 | 	int ret; | 
 | 798 |  | 
 | 799 | 	if (!(mfmsr() & MSR_GS)) | 
 | 800 | 		return 0; | 
 | 801 |  | 
 | 802 | 	node = of_find_node_by_path("/hypervisor"); | 
 | 803 | 	if (!node) | 
 | 804 | 		return 0; | 
 | 805 |  | 
 | 806 | 	ret = of_find_property(node, "fsl,hv-version", NULL) != NULL; | 
 | 807 |  | 
 | 808 | 	of_node_put(node); | 
 | 809 |  | 
 | 810 | 	return ret; | 
 | 811 | } | 
 | 812 |  | 
 | 813 | /* | 
 | 814 |  * Freescale hypervisor management driver init | 
 | 815 |  * | 
 | 816 |  * This function is called when this module is loaded. | 
 | 817 |  * | 
 | 818 |  * Register ourselves as a miscellaneous driver.  This will register the | 
 | 819 |  * fops structure and create the right sysfs entries for udev. | 
 | 820 |  */ | 
 | 821 | static int __init fsl_hypervisor_init(void) | 
 | 822 | { | 
 | 823 | 	struct device_node *np; | 
 | 824 | 	struct doorbell_isr *dbisr, *n; | 
 | 825 | 	int ret; | 
 | 826 |  | 
 | 827 | 	pr_info("Freescale hypervisor management driver\n"); | 
 | 828 |  | 
 | 829 | 	if (!has_fsl_hypervisor()) { | 
 | 830 | 		pr_info("fsl-hv: no hypervisor found\n"); | 
 | 831 | 		return -ENODEV; | 
 | 832 | 	} | 
 | 833 |  | 
 | 834 | 	ret = misc_register(&fsl_hv_misc_dev); | 
 | 835 | 	if (ret) { | 
 | 836 | 		pr_err("fsl-hv: cannot register device\n"); | 
 | 837 | 		return ret; | 
 | 838 | 	} | 
 | 839 |  | 
 | 840 | 	INIT_LIST_HEAD(&db_list); | 
 | 841 | 	INIT_LIST_HEAD(&isr_list); | 
 | 842 |  | 
 | 843 | 	for_each_compatible_node(np, NULL, "epapr,hv-receive-doorbell") { | 
 | 844 | 		unsigned int irq; | 
 | 845 | 		const uint32_t *handle; | 
 | 846 |  | 
 | 847 | 		handle = of_get_property(np, "interrupts", NULL); | 
 | 848 | 		irq = irq_of_parse_and_map(np, 0); | 
 | 849 | 		if (!handle || (irq == NO_IRQ)) { | 
 | 850 | 			pr_err("fsl-hv: no 'interrupts' property in %s node\n", | 
 | 851 | 				np->full_name); | 
 | 852 | 			continue; | 
 | 853 | 		} | 
 | 854 |  | 
 | 855 | 		dbisr = kzalloc(sizeof(*dbisr), GFP_KERNEL); | 
 | 856 | 		if (!dbisr) | 
 | 857 | 			goto out_of_memory; | 
 | 858 |  | 
 | 859 | 		dbisr->irq = irq; | 
 | 860 | 		dbisr->doorbell = be32_to_cpup(handle); | 
 | 861 |  | 
 | 862 | 		if (of_device_is_compatible(np, "fsl,hv-shutdown-doorbell")) { | 
 | 863 | 			/* The shutdown doorbell gets its own ISR */ | 
 | 864 | 			ret = request_irq(irq, fsl_hv_shutdown_isr, 0, | 
 | 865 | 					  np->name, NULL); | 
 | 866 | 		} else if (of_device_is_compatible(np, | 
 | 867 | 			"fsl,hv-state-change-doorbell")) { | 
 | 868 | 			/* | 
 | 869 | 			 * The state change doorbell triggers a notification if | 
 | 870 | 			 * the state of the managed partition changes to | 
 | 871 | 			 * "stopped". We need a separate interrupt handler for | 
 | 872 | 			 * that, and we also need to know the handle of the | 
 | 873 | 			 * target partition, not just the handle of the | 
 | 874 | 			 * doorbell. | 
 | 875 | 			 */ | 
 | 876 | 			dbisr->partition = ret = get_parent_handle(np); | 
 | 877 | 			if (ret < 0) { | 
 | 878 | 				pr_err("fsl-hv: node %s has missing or " | 
 | 879 | 				       "malformed parent\n", np->full_name); | 
 | 880 | 				kfree(dbisr); | 
 | 881 | 				continue; | 
 | 882 | 			} | 
 | 883 | 			ret = request_threaded_irq(irq, fsl_hv_state_change_isr, | 
 | 884 | 						   fsl_hv_state_change_thread, | 
 | 885 | 						   0, np->name, dbisr); | 
 | 886 | 		} else | 
 | 887 | 			ret = request_irq(irq, fsl_hv_isr, 0, np->name, dbisr); | 
 | 888 |  | 
 | 889 | 		if (ret < 0) { | 
 | 890 | 			pr_err("fsl-hv: could not request irq %u for node %s\n", | 
 | 891 | 			       irq, np->full_name); | 
 | 892 | 			kfree(dbisr); | 
 | 893 | 			continue; | 
 | 894 | 		} | 
 | 895 |  | 
 | 896 | 		list_add(&dbisr->list, &isr_list); | 
 | 897 |  | 
 | 898 | 		pr_info("fsl-hv: registered handler for doorbell %u\n", | 
 | 899 | 			dbisr->doorbell); | 
 | 900 | 	} | 
 | 901 |  | 
 | 902 | 	return 0; | 
 | 903 |  | 
 | 904 | out_of_memory: | 
 | 905 | 	list_for_each_entry_safe(dbisr, n, &isr_list, list) { | 
 | 906 | 		free_irq(dbisr->irq, dbisr); | 
 | 907 | 		list_del(&dbisr->list); | 
 | 908 | 		kfree(dbisr); | 
 | 909 | 	} | 
 | 910 |  | 
 | 911 | 	misc_deregister(&fsl_hv_misc_dev); | 
 | 912 |  | 
 | 913 | 	return -ENOMEM; | 
 | 914 | } | 
 | 915 |  | 
 | 916 | /* | 
 | 917 |  * Freescale hypervisor management driver termination | 
 | 918 |  * | 
 | 919 |  * This function is called when this driver is unloaded. | 
 | 920 |  */ | 
 | 921 | static void __exit fsl_hypervisor_exit(void) | 
 | 922 | { | 
 | 923 | 	struct doorbell_isr *dbisr, *n; | 
 | 924 |  | 
 | 925 | 	list_for_each_entry_safe(dbisr, n, &isr_list, list) { | 
 | 926 | 		free_irq(dbisr->irq, dbisr); | 
 | 927 | 		list_del(&dbisr->list); | 
 | 928 | 		kfree(dbisr); | 
 | 929 | 	} | 
 | 930 |  | 
 | 931 | 	misc_deregister(&fsl_hv_misc_dev); | 
 | 932 | } | 
 | 933 |  | 
 | 934 | module_init(fsl_hypervisor_init); | 
 | 935 | module_exit(fsl_hypervisor_exit); | 
 | 936 |  | 
 | 937 | MODULE_AUTHOR("Timur Tabi <timur@freescale.com>"); | 
 | 938 | MODULE_DESCRIPTION("Freescale hypervisor management driver"); | 
 | 939 | MODULE_LICENSE("GPL v2"); |