| Andy Grover | a8c879a | 2009-02-24 15:30:22 +0000 | [diff] [blame] | 1 | /* | 
 | 2 |  * Copyright (c) 2006 Oracle.  All rights reserved. | 
 | 3 |  * | 
 | 4 |  * This software is available to you under a choice of one of two | 
 | 5 |  * licenses.  You may choose to be licensed under the terms of the GNU | 
 | 6 |  * General Public License (GPL) Version 2, available from the file | 
 | 7 |  * COPYING in the main directory of this source tree, or the | 
 | 8 |  * OpenIB.org BSD license below: | 
 | 9 |  * | 
 | 10 |  *     Redistribution and use in source and binary forms, with or | 
 | 11 |  *     without modification, are permitted provided that the following | 
 | 12 |  *     conditions are met: | 
 | 13 |  * | 
 | 14 |  *      - Redistributions of source code must retain the above | 
 | 15 |  *        copyright notice, this list of conditions and the following | 
 | 16 |  *        disclaimer. | 
 | 17 |  * | 
 | 18 |  *      - Redistributions in binary form must reproduce the above | 
 | 19 |  *        copyright notice, this list of conditions and the following | 
 | 20 |  *        disclaimer in the documentation and/or other materials | 
 | 21 |  *        provided with the distribution. | 
 | 22 |  * | 
 | 23 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | 
 | 24 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | 
 | 25 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | 
 | 26 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | 
 | 27 |  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | 
 | 28 |  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | 
 | 29 |  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | 
 | 30 |  * SOFTWARE. | 
 | 31 |  * | 
 | 32 |  */ | 
 | 33 | #include <linux/percpu.h> | 
 | 34 | #include <linux/seq_file.h> | 
 | 35 | #include <linux/proc_fs.h> | 
 | 36 |  | 
 | 37 | #include "rds.h" | 
 | 38 |  | 
 | 39 | /* | 
 | 40 |  * This file implements a getsockopt() call which copies a set of fixed | 
 | 41 |  * sized structs into a user-specified buffer as a means of providing | 
 | 42 |  * read-only information about RDS. | 
 | 43 |  * | 
 | 44 |  * For a given information source there are a given number of fixed sized | 
 | 45 |  * structs at a given time.  The structs are only copied if the user-specified | 
 | 46 |  * buffer is big enough.  The destination pages that make up the buffer | 
 | 47 |  * are pinned for the duration of the copy. | 
 | 48 |  * | 
 | 49 |  * This gives us the following benefits: | 
 | 50 |  * | 
 | 51 |  * - simple implementation, no copy "position" across multiple calls | 
 | 52 |  * - consistent snapshot of an info source | 
 | 53 |  * - atomic copy works well with whatever locking info source has | 
 | 54 |  * - one portable tool to get rds info across implementations | 
 | 55 |  * - long-lived tool can get info without allocating | 
 | 56 |  * | 
 | 57 |  * at the following costs: | 
 | 58 |  * | 
 | 59 |  * - info source copy must be pinned, may be "large" | 
 | 60 |  */ | 
 | 61 |  | 
 | 62 | struct rds_info_iterator { | 
 | 63 | 	struct page **pages; | 
 | 64 | 	void *addr; | 
 | 65 | 	unsigned long offset; | 
 | 66 | }; | 
 | 67 |  | 
 | 68 | static DEFINE_SPINLOCK(rds_info_lock); | 
 | 69 | static rds_info_func rds_info_funcs[RDS_INFO_LAST - RDS_INFO_FIRST + 1]; | 
 | 70 |  | 
 | 71 | void rds_info_register_func(int optname, rds_info_func func) | 
 | 72 | { | 
 | 73 | 	int offset = optname - RDS_INFO_FIRST; | 
 | 74 |  | 
 | 75 | 	BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); | 
 | 76 |  | 
 | 77 | 	spin_lock(&rds_info_lock); | 
 | 78 | 	BUG_ON(rds_info_funcs[offset] != NULL); | 
 | 79 | 	rds_info_funcs[offset] = func; | 
 | 80 | 	spin_unlock(&rds_info_lock); | 
 | 81 | } | 
 | 82 |  | 
 | 83 | void rds_info_deregister_func(int optname, rds_info_func func) | 
 | 84 | { | 
 | 85 | 	int offset = optname - RDS_INFO_FIRST; | 
 | 86 |  | 
 | 87 | 	BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); | 
 | 88 |  | 
 | 89 | 	spin_lock(&rds_info_lock); | 
 | 90 | 	BUG_ON(rds_info_funcs[offset] != func); | 
 | 91 | 	rds_info_funcs[offset] = NULL; | 
 | 92 | 	spin_unlock(&rds_info_lock); | 
 | 93 | } | 
 | 94 |  | 
 | 95 | /* | 
 | 96 |  * Typically we hold an atomic kmap across multiple rds_info_copy() calls | 
 | 97 |  * because the kmap is so expensive.  This must be called before using blocking | 
 | 98 |  * operations while holding the mapping and as the iterator is torn down. | 
 | 99 |  */ | 
 | 100 | void rds_info_iter_unmap(struct rds_info_iterator *iter) | 
 | 101 | { | 
 | 102 | 	if (iter->addr != NULL) { | 
 | 103 | 		kunmap_atomic(iter->addr, KM_USER0); | 
 | 104 | 		iter->addr = NULL; | 
 | 105 | 	} | 
 | 106 | } | 
 | 107 |  | 
 | 108 | /* | 
 | 109 |  * get_user_pages() called flush_dcache_page() on the pages for us. | 
 | 110 |  */ | 
 | 111 | void rds_info_copy(struct rds_info_iterator *iter, void *data, | 
 | 112 | 		   unsigned long bytes) | 
 | 113 | { | 
 | 114 | 	unsigned long this; | 
 | 115 |  | 
 | 116 | 	while (bytes) { | 
 | 117 | 		if (iter->addr == NULL) | 
 | 118 | 			iter->addr = kmap_atomic(*iter->pages, KM_USER0); | 
 | 119 |  | 
 | 120 | 		this = min(bytes, PAGE_SIZE - iter->offset); | 
 | 121 |  | 
 | 122 | 		rdsdebug("page %p addr %p offset %lu this %lu data %p " | 
 | 123 | 			  "bytes %lu\n", *iter->pages, iter->addr, | 
 | 124 | 			  iter->offset, this, data, bytes); | 
 | 125 |  | 
 | 126 | 		memcpy(iter->addr + iter->offset, data, this); | 
 | 127 |  | 
 | 128 | 		data += this; | 
 | 129 | 		bytes -= this; | 
 | 130 | 		iter->offset += this; | 
 | 131 |  | 
 | 132 | 		if (iter->offset == PAGE_SIZE) { | 
 | 133 | 			kunmap_atomic(iter->addr, KM_USER0); | 
 | 134 | 			iter->addr = NULL; | 
 | 135 | 			iter->offset = 0; | 
 | 136 | 			iter->pages++; | 
 | 137 | 		} | 
 | 138 | 	} | 
 | 139 | } | 
 | 140 |  | 
 | 141 | /* | 
 | 142 |  * @optval points to the userspace buffer that the information snapshot | 
 | 143 |  * will be copied into. | 
 | 144 |  * | 
 | 145 |  * @optlen on input is the size of the buffer in userspace.  @optlen | 
 | 146 |  * on output is the size of the requested snapshot in bytes. | 
 | 147 |  * | 
 | 148 |  * This function returns -errno if there is a failure, particularly -ENOSPC | 
 | 149 |  * if the given userspace buffer was not large enough to fit the snapshot. | 
 | 150 |  * On success it returns the positive number of bytes of each array element | 
 | 151 |  * in the snapshot. | 
 | 152 |  */ | 
 | 153 | int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, | 
 | 154 | 			int __user *optlen) | 
 | 155 | { | 
 | 156 | 	struct rds_info_iterator iter; | 
 | 157 | 	struct rds_info_lengths lens; | 
 | 158 | 	unsigned long nr_pages = 0; | 
 | 159 | 	unsigned long start; | 
 | 160 | 	unsigned long i; | 
 | 161 | 	rds_info_func func; | 
 | 162 | 	struct page **pages = NULL; | 
 | 163 | 	int ret; | 
 | 164 | 	int len; | 
 | 165 | 	int total; | 
 | 166 |  | 
 | 167 | 	if (get_user(len, optlen)) { | 
 | 168 | 		ret = -EFAULT; | 
 | 169 | 		goto out; | 
 | 170 | 	} | 
 | 171 |  | 
 | 172 | 	/* check for all kinds of wrapping and the like */ | 
 | 173 | 	start = (unsigned long)optval; | 
 | 174 | 	if (len < 0 || len + PAGE_SIZE - 1 < len || start + len < start) { | 
 | 175 | 		ret = -EINVAL; | 
 | 176 | 		goto out; | 
 | 177 | 	} | 
 | 178 |  | 
 | 179 | 	/* a 0 len call is just trying to probe its length */ | 
 | 180 | 	if (len == 0) | 
 | 181 | 		goto call_func; | 
 | 182 |  | 
 | 183 | 	nr_pages = (PAGE_ALIGN(start + len) - (start & PAGE_MASK)) | 
 | 184 | 			>> PAGE_SHIFT; | 
 | 185 |  | 
 | 186 | 	pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); | 
 | 187 | 	if (pages == NULL) { | 
 | 188 | 		ret = -ENOMEM; | 
 | 189 | 		goto out; | 
 | 190 | 	} | 
| Andy Grover | 830eb7d | 2009-04-09 14:09:42 +0000 | [diff] [blame] | 191 | 	ret = get_user_pages_fast(start, nr_pages, 1, pages); | 
| Andy Grover | a8c879a | 2009-02-24 15:30:22 +0000 | [diff] [blame] | 192 | 	if (ret != nr_pages) { | 
 | 193 | 		if (ret > 0) | 
 | 194 | 			nr_pages = ret; | 
 | 195 | 		else | 
 | 196 | 			nr_pages = 0; | 
 | 197 | 		ret = -EAGAIN; /* XXX ? */ | 
 | 198 | 		goto out; | 
 | 199 | 	} | 
 | 200 |  | 
 | 201 | 	rdsdebug("len %d nr_pages %lu\n", len, nr_pages); | 
 | 202 |  | 
 | 203 | call_func: | 
 | 204 | 	func = rds_info_funcs[optname - RDS_INFO_FIRST]; | 
 | 205 | 	if (func == NULL) { | 
 | 206 | 		ret = -ENOPROTOOPT; | 
 | 207 | 		goto out; | 
 | 208 | 	} | 
 | 209 |  | 
 | 210 | 	iter.pages = pages; | 
 | 211 | 	iter.addr = NULL; | 
 | 212 | 	iter.offset = start & (PAGE_SIZE - 1); | 
 | 213 |  | 
 | 214 | 	func(sock, len, &iter, &lens); | 
 | 215 | 	BUG_ON(lens.each == 0); | 
 | 216 |  | 
 | 217 | 	total = lens.nr * lens.each; | 
 | 218 |  | 
 | 219 | 	rds_info_iter_unmap(&iter); | 
 | 220 |  | 
 | 221 | 	if (total > len) { | 
 | 222 | 		len = total; | 
 | 223 | 		ret = -ENOSPC; | 
 | 224 | 	} else { | 
 | 225 | 		len = total; | 
 | 226 | 		ret = lens.each; | 
 | 227 | 	} | 
 | 228 |  | 
 | 229 | 	if (put_user(len, optlen)) | 
 | 230 | 		ret = -EFAULT; | 
 | 231 |  | 
 | 232 | out: | 
 | 233 | 	for (i = 0; pages != NULL && i < nr_pages; i++) | 
 | 234 | 		put_page(pages[i]); | 
 | 235 | 	kfree(pages); | 
 | 236 |  | 
 | 237 | 	return ret; | 
 | 238 | } |