| Andy Grover | a8c879a | 2009-02-24 15:30:22 +0000 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright (c) 2006 Oracle.  All rights reserved. | 
|  | 3 | * | 
|  | 4 | * This software is available to you under a choice of one of two | 
|  | 5 | * licenses.  You may choose to be licensed under the terms of the GNU | 
|  | 6 | * General Public License (GPL) Version 2, available from the file | 
|  | 7 | * COPYING in the main directory of this source tree, or the | 
|  | 8 | * OpenIB.org BSD license below: | 
|  | 9 | * | 
|  | 10 | *     Redistribution and use in source and binary forms, with or | 
|  | 11 | *     without modification, are permitted provided that the following | 
|  | 12 | *     conditions are met: | 
|  | 13 | * | 
|  | 14 | *      - Redistributions of source code must retain the above | 
|  | 15 | *        copyright notice, this list of conditions and the following | 
|  | 16 | *        disclaimer. | 
|  | 17 | * | 
|  | 18 | *      - Redistributions in binary form must reproduce the above | 
|  | 19 | *        copyright notice, this list of conditions and the following | 
|  | 20 | *        disclaimer in the documentation and/or other materials | 
|  | 21 | *        provided with the distribution. | 
|  | 22 | * | 
|  | 23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | 
|  | 24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | 
|  | 25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | 
|  | 26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | 
|  | 27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | 
|  | 28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | 
|  | 29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | 
|  | 30 | * SOFTWARE. | 
|  | 31 | * | 
|  | 32 | */ | 
|  | 33 | #include <linux/percpu.h> | 
|  | 34 | #include <linux/seq_file.h> | 
| Tejun Heo | 5a0e3ad | 2010-03-24 17:04:11 +0900 | [diff] [blame] | 35 | #include <linux/slab.h> | 
| Andy Grover | a8c879a | 2009-02-24 15:30:22 +0000 | [diff] [blame] | 36 | #include <linux/proc_fs.h> | 
|  | 37 |  | 
|  | 38 | #include "rds.h" | 
|  | 39 |  | 
|  | 40 | /* | 
|  | 41 | * This file implements a getsockopt() call which copies a set of fixed | 
|  | 42 | * sized structs into a user-specified buffer as a means of providing | 
|  | 43 | * read-only information about RDS. | 
|  | 44 | * | 
|  | 45 | * For a given information source there are a given number of fixed sized | 
|  | 46 | * structs at a given time.  The structs are only copied if the user-specified | 
|  | 47 | * buffer is big enough.  The destination pages that make up the buffer | 
|  | 48 | * are pinned for the duration of the copy. | 
|  | 49 | * | 
|  | 50 | * This gives us the following benefits: | 
|  | 51 | * | 
|  | 52 | * - simple implementation, no copy "position" across multiple calls | 
|  | 53 | * - consistent snapshot of an info source | 
|  | 54 | * - atomic copy works well with whatever locking info source has | 
|  | 55 | * - one portable tool to get rds info across implementations | 
|  | 56 | * - long-lived tool can get info without allocating | 
|  | 57 | * | 
|  | 58 | * at the following costs: | 
|  | 59 | * | 
|  | 60 | * - info source copy must be pinned, may be "large" | 
|  | 61 | */ | 
|  | 62 |  | 
|  | 63 | struct rds_info_iterator { | 
|  | 64 | struct page **pages; | 
|  | 65 | void *addr; | 
|  | 66 | unsigned long offset; | 
|  | 67 | }; | 
|  | 68 |  | 
|  | 69 | static DEFINE_SPINLOCK(rds_info_lock); | 
|  | 70 | static rds_info_func rds_info_funcs[RDS_INFO_LAST - RDS_INFO_FIRST + 1]; | 
|  | 71 |  | 
|  | 72 | void rds_info_register_func(int optname, rds_info_func func) | 
|  | 73 | { | 
|  | 74 | int offset = optname - RDS_INFO_FIRST; | 
|  | 75 |  | 
|  | 76 | BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); | 
|  | 77 |  | 
|  | 78 | spin_lock(&rds_info_lock); | 
|  | 79 | BUG_ON(rds_info_funcs[offset] != NULL); | 
|  | 80 | rds_info_funcs[offset] = func; | 
|  | 81 | spin_unlock(&rds_info_lock); | 
|  | 82 | } | 
| Andy Grover | 616b757 | 2009-08-21 12:28:32 +0000 | [diff] [blame] | 83 | EXPORT_SYMBOL_GPL(rds_info_register_func); | 
| Andy Grover | a8c879a | 2009-02-24 15:30:22 +0000 | [diff] [blame] | 84 |  | 
|  | 85 | void rds_info_deregister_func(int optname, rds_info_func func) | 
|  | 86 | { | 
|  | 87 | int offset = optname - RDS_INFO_FIRST; | 
|  | 88 |  | 
|  | 89 | BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); | 
|  | 90 |  | 
|  | 91 | spin_lock(&rds_info_lock); | 
|  | 92 | BUG_ON(rds_info_funcs[offset] != func); | 
|  | 93 | rds_info_funcs[offset] = NULL; | 
|  | 94 | spin_unlock(&rds_info_lock); | 
|  | 95 | } | 
| Andy Grover | 616b757 | 2009-08-21 12:28:32 +0000 | [diff] [blame] | 96 | EXPORT_SYMBOL_GPL(rds_info_deregister_func); | 
| Andy Grover | a8c879a | 2009-02-24 15:30:22 +0000 | [diff] [blame] | 97 |  | 
|  | 98 | /* | 
|  | 99 | * Typically we hold an atomic kmap across multiple rds_info_copy() calls | 
|  | 100 | * because the kmap is so expensive.  This must be called before using blocking | 
|  | 101 | * operations while holding the mapping and as the iterator is torn down. | 
|  | 102 | */ | 
|  | 103 | void rds_info_iter_unmap(struct rds_info_iterator *iter) | 
|  | 104 | { | 
|  | 105 | if (iter->addr != NULL) { | 
|  | 106 | kunmap_atomic(iter->addr, KM_USER0); | 
|  | 107 | iter->addr = NULL; | 
|  | 108 | } | 
|  | 109 | } | 
|  | 110 |  | 
|  | 111 | /* | 
|  | 112 | * get_user_pages() called flush_dcache_page() on the pages for us. | 
|  | 113 | */ | 
|  | 114 | void rds_info_copy(struct rds_info_iterator *iter, void *data, | 
|  | 115 | unsigned long bytes) | 
|  | 116 | { | 
|  | 117 | unsigned long this; | 
|  | 118 |  | 
|  | 119 | while (bytes) { | 
|  | 120 | if (iter->addr == NULL) | 
|  | 121 | iter->addr = kmap_atomic(*iter->pages, KM_USER0); | 
|  | 122 |  | 
|  | 123 | this = min(bytes, PAGE_SIZE - iter->offset); | 
|  | 124 |  | 
|  | 125 | rdsdebug("page %p addr %p offset %lu this %lu data %p " | 
|  | 126 | "bytes %lu\n", *iter->pages, iter->addr, | 
|  | 127 | iter->offset, this, data, bytes); | 
|  | 128 |  | 
|  | 129 | memcpy(iter->addr + iter->offset, data, this); | 
|  | 130 |  | 
|  | 131 | data += this; | 
|  | 132 | bytes -= this; | 
|  | 133 | iter->offset += this; | 
|  | 134 |  | 
|  | 135 | if (iter->offset == PAGE_SIZE) { | 
|  | 136 | kunmap_atomic(iter->addr, KM_USER0); | 
|  | 137 | iter->addr = NULL; | 
|  | 138 | iter->offset = 0; | 
|  | 139 | iter->pages++; | 
|  | 140 | } | 
|  | 141 | } | 
|  | 142 | } | 
| Andy Grover | 616b757 | 2009-08-21 12:28:32 +0000 | [diff] [blame] | 143 | EXPORT_SYMBOL_GPL(rds_info_copy); | 
| Andy Grover | a8c879a | 2009-02-24 15:30:22 +0000 | [diff] [blame] | 144 |  | 
|  | 145 | /* | 
|  | 146 | * @optval points to the userspace buffer that the information snapshot | 
|  | 147 | * will be copied into. | 
|  | 148 | * | 
|  | 149 | * @optlen on input is the size of the buffer in userspace.  @optlen | 
|  | 150 | * on output is the size of the requested snapshot in bytes. | 
|  | 151 | * | 
|  | 152 | * This function returns -errno if there is a failure, particularly -ENOSPC | 
|  | 153 | * if the given userspace buffer was not large enough to fit the snapshot. | 
|  | 154 | * On success it returns the positive number of bytes of each array element | 
|  | 155 | * in the snapshot. | 
|  | 156 | */ | 
|  | 157 | int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, | 
|  | 158 | int __user *optlen) | 
|  | 159 | { | 
|  | 160 | struct rds_info_iterator iter; | 
|  | 161 | struct rds_info_lengths lens; | 
|  | 162 | unsigned long nr_pages = 0; | 
|  | 163 | unsigned long start; | 
|  | 164 | unsigned long i; | 
|  | 165 | rds_info_func func; | 
|  | 166 | struct page **pages = NULL; | 
|  | 167 | int ret; | 
|  | 168 | int len; | 
|  | 169 | int total; | 
|  | 170 |  | 
|  | 171 | if (get_user(len, optlen)) { | 
|  | 172 | ret = -EFAULT; | 
|  | 173 | goto out; | 
|  | 174 | } | 
|  | 175 |  | 
|  | 176 | /* check for all kinds of wrapping and the like */ | 
|  | 177 | start = (unsigned long)optval; | 
|  | 178 | if (len < 0 || len + PAGE_SIZE - 1 < len || start + len < start) { | 
|  | 179 | ret = -EINVAL; | 
|  | 180 | goto out; | 
|  | 181 | } | 
|  | 182 |  | 
|  | 183 | /* a 0 len call is just trying to probe its length */ | 
|  | 184 | if (len == 0) | 
|  | 185 | goto call_func; | 
|  | 186 |  | 
|  | 187 | nr_pages = (PAGE_ALIGN(start + len) - (start & PAGE_MASK)) | 
|  | 188 | >> PAGE_SHIFT; | 
|  | 189 |  | 
|  | 190 | pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); | 
|  | 191 | if (pages == NULL) { | 
|  | 192 | ret = -ENOMEM; | 
|  | 193 | goto out; | 
|  | 194 | } | 
| Andy Grover | 830eb7d | 2009-04-09 14:09:42 +0000 | [diff] [blame] | 195 | ret = get_user_pages_fast(start, nr_pages, 1, pages); | 
| Andy Grover | a8c879a | 2009-02-24 15:30:22 +0000 | [diff] [blame] | 196 | if (ret != nr_pages) { | 
|  | 197 | if (ret > 0) | 
|  | 198 | nr_pages = ret; | 
|  | 199 | else | 
|  | 200 | nr_pages = 0; | 
|  | 201 | ret = -EAGAIN; /* XXX ? */ | 
|  | 202 | goto out; | 
|  | 203 | } | 
|  | 204 |  | 
|  | 205 | rdsdebug("len %d nr_pages %lu\n", len, nr_pages); | 
|  | 206 |  | 
|  | 207 | call_func: | 
|  | 208 | func = rds_info_funcs[optname - RDS_INFO_FIRST]; | 
|  | 209 | if (func == NULL) { | 
|  | 210 | ret = -ENOPROTOOPT; | 
|  | 211 | goto out; | 
|  | 212 | } | 
|  | 213 |  | 
|  | 214 | iter.pages = pages; | 
|  | 215 | iter.addr = NULL; | 
|  | 216 | iter.offset = start & (PAGE_SIZE - 1); | 
|  | 217 |  | 
|  | 218 | func(sock, len, &iter, &lens); | 
|  | 219 | BUG_ON(lens.each == 0); | 
|  | 220 |  | 
|  | 221 | total = lens.nr * lens.each; | 
|  | 222 |  | 
|  | 223 | rds_info_iter_unmap(&iter); | 
|  | 224 |  | 
|  | 225 | if (total > len) { | 
|  | 226 | len = total; | 
|  | 227 | ret = -ENOSPC; | 
|  | 228 | } else { | 
|  | 229 | len = total; | 
|  | 230 | ret = lens.each; | 
|  | 231 | } | 
|  | 232 |  | 
|  | 233 | if (put_user(len, optlen)) | 
|  | 234 | ret = -EFAULT; | 
|  | 235 |  | 
|  | 236 | out: | 
|  | 237 | for (i = 0; pages != NULL && i < nr_pages; i++) | 
|  | 238 | put_page(pages[i]); | 
|  | 239 | kfree(pages); | 
|  | 240 |  | 
|  | 241 | return ret; | 
|  | 242 | } |