| Frederic Weisbecker | 7636913 | 2011-05-19 19:55:04 +0200 | [diff] [blame] | 1 | /* | 
 | 2 |  * Performance events ring-buffer code: | 
 | 3 |  * | 
 | 4 |  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | 
 | 5 |  *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar | 
 | 6 |  *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 
| Al Viro | d36b691 | 2011-12-29 17:09:01 -0500 | [diff] [blame] | 7 |  *  Copyright  ©  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> | 
| Frederic Weisbecker | 7636913 | 2011-05-19 19:55:04 +0200 | [diff] [blame] | 8 |  * | 
 | 9 |  * For licensing details see kernel-base/COPYING | 
 | 10 |  */ | 
 | 11 |  | 
 | 12 | #include <linux/perf_event.h> | 
 | 13 | #include <linux/vmalloc.h> | 
 | 14 | #include <linux/slab.h> | 
 | 15 |  | 
 | 16 | #include "internal.h" | 
 | 17 |  | 
 | 18 | static bool perf_output_space(struct ring_buffer *rb, unsigned long tail, | 
 | 19 | 			      unsigned long offset, unsigned long head) | 
 | 20 | { | 
 | 21 | 	unsigned long mask; | 
 | 22 |  | 
 | 23 | 	if (!rb->writable) | 
 | 24 | 		return true; | 
 | 25 |  | 
 | 26 | 	mask = perf_data_size(rb) - 1; | 
 | 27 |  | 
 | 28 | 	offset = (offset - tail) & mask; | 
 | 29 | 	head   = (head   - tail) & mask; | 
 | 30 |  | 
 | 31 | 	if ((int)(head - offset) < 0) | 
 | 32 | 		return false; | 
 | 33 |  | 
 | 34 | 	return true; | 
 | 35 | } | 
 | 36 |  | 
 | 37 | static void perf_output_wakeup(struct perf_output_handle *handle) | 
 | 38 | { | 
 | 39 | 	atomic_set(&handle->rb->poll, POLL_IN); | 
 | 40 |  | 
| Peter Zijlstra | a8b0ca1 | 2011-06-27 14:41:57 +0200 | [diff] [blame] | 41 | 	handle->event->pending_wakeup = 1; | 
 | 42 | 	irq_work_queue(&handle->event->pending); | 
| Frederic Weisbecker | 7636913 | 2011-05-19 19:55:04 +0200 | [diff] [blame] | 43 | } | 
 | 44 |  | 
 | 45 | /* | 
 | 46 |  * We need to ensure a later event_id doesn't publish a head when a former | 
 | 47 |  * event isn't done writing. However since we need to deal with NMIs we | 
 | 48 |  * cannot fully serialize things. | 
 | 49 |  * | 
 | 50 |  * We only publish the head (and generate a wakeup) when the outer-most | 
 | 51 |  * event completes. | 
 | 52 |  */ | 
 | 53 | static void perf_output_get_handle(struct perf_output_handle *handle) | 
 | 54 | { | 
 | 55 | 	struct ring_buffer *rb = handle->rb; | 
 | 56 |  | 
 | 57 | 	preempt_disable(); | 
 | 58 | 	local_inc(&rb->nest); | 
 | 59 | 	handle->wakeup = local_read(&rb->wakeup); | 
 | 60 | } | 
 | 61 |  | 
 | 62 | static void perf_output_put_handle(struct perf_output_handle *handle) | 
 | 63 | { | 
 | 64 | 	struct ring_buffer *rb = handle->rb; | 
 | 65 | 	unsigned long head; | 
 | 66 |  | 
 | 67 | again: | 
 | 68 | 	head = local_read(&rb->head); | 
 | 69 |  | 
 | 70 | 	/* | 
 | 71 | 	 * IRQ/NMI can happen here, which means we can miss a head update. | 
 | 72 | 	 */ | 
 | 73 |  | 
 | 74 | 	if (!local_dec_and_test(&rb->nest)) | 
 | 75 | 		goto out; | 
 | 76 |  | 
 | 77 | 	/* | 
 | 78 | 	 * Publish the known good head. Rely on the full barrier implied | 
 | 79 | 	 * by atomic_dec_and_test() order the rb->head read and this | 
 | 80 | 	 * write. | 
 | 81 | 	 */ | 
 | 82 | 	rb->user_page->data_head = head; | 
 | 83 |  | 
 | 84 | 	/* | 
 | 85 | 	 * Now check if we missed an update, rely on the (compiler) | 
 | 86 | 	 * barrier in atomic_dec_and_test() to re-read rb->head. | 
 | 87 | 	 */ | 
 | 88 | 	if (unlikely(head != local_read(&rb->head))) { | 
 | 89 | 		local_inc(&rb->nest); | 
 | 90 | 		goto again; | 
 | 91 | 	} | 
 | 92 |  | 
 | 93 | 	if (handle->wakeup != local_read(&rb->wakeup)) | 
 | 94 | 		perf_output_wakeup(handle); | 
 | 95 |  | 
 | 96 | out: | 
 | 97 | 	preempt_enable(); | 
 | 98 | } | 
 | 99 |  | 
 | 100 | int perf_output_begin(struct perf_output_handle *handle, | 
| Peter Zijlstra | a7ac67e | 2011-06-27 16:47:16 +0200 | [diff] [blame] | 101 | 		      struct perf_event *event, unsigned int size) | 
| Frederic Weisbecker | 7636913 | 2011-05-19 19:55:04 +0200 | [diff] [blame] | 102 | { | 
 | 103 | 	struct ring_buffer *rb; | 
 | 104 | 	unsigned long tail, offset, head; | 
 | 105 | 	int have_lost; | 
 | 106 | 	struct perf_sample_data sample_data; | 
 | 107 | 	struct { | 
 | 108 | 		struct perf_event_header header; | 
 | 109 | 		u64			 id; | 
 | 110 | 		u64			 lost; | 
 | 111 | 	} lost_event; | 
 | 112 |  | 
 | 113 | 	rcu_read_lock(); | 
 | 114 | 	/* | 
 | 115 | 	 * For inherited events we send all the output towards the parent. | 
 | 116 | 	 */ | 
 | 117 | 	if (event->parent) | 
 | 118 | 		event = event->parent; | 
 | 119 |  | 
 | 120 | 	rb = rcu_dereference(event->rb); | 
 | 121 | 	if (!rb) | 
 | 122 | 		goto out; | 
 | 123 |  | 
 | 124 | 	handle->rb	= rb; | 
 | 125 | 	handle->event	= event; | 
| Frederic Weisbecker | 7636913 | 2011-05-19 19:55:04 +0200 | [diff] [blame] | 126 |  | 
 | 127 | 	if (!rb->nr_pages) | 
 | 128 | 		goto out; | 
 | 129 |  | 
 | 130 | 	have_lost = local_read(&rb->lost); | 
 | 131 | 	if (have_lost) { | 
 | 132 | 		lost_event.header.size = sizeof(lost_event); | 
 | 133 | 		perf_event_header__init_id(&lost_event.header, &sample_data, | 
 | 134 | 					   event); | 
 | 135 | 		size += lost_event.header.size; | 
 | 136 | 	} | 
 | 137 |  | 
 | 138 | 	perf_output_get_handle(handle); | 
 | 139 |  | 
 | 140 | 	do { | 
 | 141 | 		/* | 
 | 142 | 		 * Userspace could choose to issue a mb() before updating the | 
 | 143 | 		 * tail pointer. So that all reads will be completed before the | 
 | 144 | 		 * write is issued. | 
 | 145 | 		 */ | 
 | 146 | 		tail = ACCESS_ONCE(rb->user_page->data_tail); | 
 | 147 | 		smp_rmb(); | 
 | 148 | 		offset = head = local_read(&rb->head); | 
 | 149 | 		head += size; | 
 | 150 | 		if (unlikely(!perf_output_space(rb, tail, offset, head))) | 
 | 151 | 			goto fail; | 
 | 152 | 	} while (local_cmpxchg(&rb->head, offset, head) != offset); | 
 | 153 |  | 
 | 154 | 	if (head - local_read(&rb->wakeup) > rb->watermark) | 
 | 155 | 		local_add(rb->watermark, &rb->wakeup); | 
 | 156 |  | 
 | 157 | 	handle->page = offset >> (PAGE_SHIFT + page_order(rb)); | 
 | 158 | 	handle->page &= rb->nr_pages - 1; | 
 | 159 | 	handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1); | 
 | 160 | 	handle->addr = rb->data_pages[handle->page]; | 
 | 161 | 	handle->addr += handle->size; | 
 | 162 | 	handle->size = (PAGE_SIZE << page_order(rb)) - handle->size; | 
 | 163 |  | 
 | 164 | 	if (have_lost) { | 
 | 165 | 		lost_event.header.type = PERF_RECORD_LOST; | 
 | 166 | 		lost_event.header.misc = 0; | 
 | 167 | 		lost_event.id          = event->id; | 
 | 168 | 		lost_event.lost        = local_xchg(&rb->lost, 0); | 
 | 169 |  | 
 | 170 | 		perf_output_put(handle, lost_event); | 
 | 171 | 		perf_event__output_id_sample(event, handle, &sample_data); | 
 | 172 | 	} | 
 | 173 |  | 
 | 174 | 	return 0; | 
 | 175 |  | 
 | 176 | fail: | 
 | 177 | 	local_inc(&rb->lost); | 
 | 178 | 	perf_output_put_handle(handle); | 
 | 179 | out: | 
 | 180 | 	rcu_read_unlock(); | 
 | 181 |  | 
 | 182 | 	return -ENOSPC; | 
 | 183 | } | 
 | 184 |  | 
| Frederic Weisbecker | 91d7753 | 2012-08-07 15:20:38 +0200 | [diff] [blame] | 185 | unsigned int perf_output_copy(struct perf_output_handle *handle, | 
| Frederic Weisbecker | 7636913 | 2011-05-19 19:55:04 +0200 | [diff] [blame] | 186 | 		      const void *buf, unsigned int len) | 
 | 187 | { | 
| Frederic Weisbecker | 91d7753 | 2012-08-07 15:20:38 +0200 | [diff] [blame] | 188 | 	return __output_copy(handle, buf, len); | 
| Frederic Weisbecker | 7636913 | 2011-05-19 19:55:04 +0200 | [diff] [blame] | 189 | } | 
 | 190 |  | 
| Jiri Olsa | 5685e0f | 2012-08-07 15:20:39 +0200 | [diff] [blame] | 191 | unsigned int perf_output_skip(struct perf_output_handle *handle, | 
 | 192 | 			      unsigned int len) | 
 | 193 | { | 
 | 194 | 	return __output_skip(handle, NULL, len); | 
 | 195 | } | 
 | 196 |  | 
| Frederic Weisbecker | 7636913 | 2011-05-19 19:55:04 +0200 | [diff] [blame] | 197 | void perf_output_end(struct perf_output_handle *handle) | 
 | 198 | { | 
| Frederic Weisbecker | 7636913 | 2011-05-19 19:55:04 +0200 | [diff] [blame] | 199 | 	perf_output_put_handle(handle); | 
 | 200 | 	rcu_read_unlock(); | 
 | 201 | } | 
 | 202 |  | 
 | 203 | static void | 
 | 204 | ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) | 
 | 205 | { | 
 | 206 | 	long max_size = perf_data_size(rb); | 
 | 207 |  | 
 | 208 | 	if (watermark) | 
 | 209 | 		rb->watermark = min(max_size, watermark); | 
 | 210 |  | 
 | 211 | 	if (!rb->watermark) | 
 | 212 | 		rb->watermark = max_size / 2; | 
 | 213 |  | 
 | 214 | 	if (flags & RING_BUFFER_WRITABLE) | 
 | 215 | 		rb->writable = 1; | 
 | 216 |  | 
 | 217 | 	atomic_set(&rb->refcount, 1); | 
| Peter Zijlstra | 10c6db1 | 2011-11-26 02:47:31 +0100 | [diff] [blame] | 218 |  | 
 | 219 | 	INIT_LIST_HEAD(&rb->event_list); | 
 | 220 | 	spin_lock_init(&rb->event_lock); | 
| Frederic Weisbecker | 7636913 | 2011-05-19 19:55:04 +0200 | [diff] [blame] | 221 | } | 
 | 222 |  | 
 | 223 | #ifndef CONFIG_PERF_USE_VMALLOC | 
 | 224 |  | 
 | 225 | /* | 
 | 226 |  * Back perf_mmap() with regular GFP_KERNEL-0 pages. | 
 | 227 |  */ | 
 | 228 |  | 
 | 229 | struct page * | 
 | 230 | perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) | 
 | 231 | { | 
 | 232 | 	if (pgoff > rb->nr_pages) | 
 | 233 | 		return NULL; | 
 | 234 |  | 
 | 235 | 	if (pgoff == 0) | 
 | 236 | 		return virt_to_page(rb->user_page); | 
 | 237 |  | 
 | 238 | 	return virt_to_page(rb->data_pages[pgoff - 1]); | 
 | 239 | } | 
 | 240 |  | 
 | 241 | static void *perf_mmap_alloc_page(int cpu) | 
 | 242 | { | 
 | 243 | 	struct page *page; | 
 | 244 | 	int node; | 
 | 245 |  | 
 | 246 | 	node = (cpu == -1) ? cpu : cpu_to_node(cpu); | 
 | 247 | 	page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); | 
 | 248 | 	if (!page) | 
 | 249 | 		return NULL; | 
 | 250 |  | 
 | 251 | 	return page_address(page); | 
 | 252 | } | 
 | 253 |  | 
 | 254 | struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) | 
 | 255 | { | 
 | 256 | 	struct ring_buffer *rb; | 
 | 257 | 	unsigned long size; | 
 | 258 | 	int i; | 
 | 259 |  | 
 | 260 | 	size = sizeof(struct ring_buffer); | 
 | 261 | 	size += nr_pages * sizeof(void *); | 
 | 262 |  | 
 | 263 | 	rb = kzalloc(size, GFP_KERNEL); | 
 | 264 | 	if (!rb) | 
 | 265 | 		goto fail; | 
 | 266 |  | 
 | 267 | 	rb->user_page = perf_mmap_alloc_page(cpu); | 
 | 268 | 	if (!rb->user_page) | 
 | 269 | 		goto fail_user_page; | 
 | 270 |  | 
 | 271 | 	for (i = 0; i < nr_pages; i++) { | 
 | 272 | 		rb->data_pages[i] = perf_mmap_alloc_page(cpu); | 
 | 273 | 		if (!rb->data_pages[i]) | 
 | 274 | 			goto fail_data_pages; | 
 | 275 | 	} | 
 | 276 |  | 
 | 277 | 	rb->nr_pages = nr_pages; | 
 | 278 |  | 
 | 279 | 	ring_buffer_init(rb, watermark, flags); | 
 | 280 |  | 
 | 281 | 	return rb; | 
 | 282 |  | 
 | 283 | fail_data_pages: | 
 | 284 | 	for (i--; i >= 0; i--) | 
 | 285 | 		free_page((unsigned long)rb->data_pages[i]); | 
 | 286 |  | 
 | 287 | 	free_page((unsigned long)rb->user_page); | 
 | 288 |  | 
 | 289 | fail_user_page: | 
 | 290 | 	kfree(rb); | 
 | 291 |  | 
 | 292 | fail: | 
 | 293 | 	return NULL; | 
 | 294 | } | 
 | 295 |  | 
 | 296 | static void perf_mmap_free_page(unsigned long addr) | 
 | 297 | { | 
 | 298 | 	struct page *page = virt_to_page((void *)addr); | 
 | 299 |  | 
 | 300 | 	page->mapping = NULL; | 
 | 301 | 	__free_page(page); | 
 | 302 | } | 
 | 303 |  | 
 | 304 | void rb_free(struct ring_buffer *rb) | 
 | 305 | { | 
 | 306 | 	int i; | 
 | 307 |  | 
 | 308 | 	perf_mmap_free_page((unsigned long)rb->user_page); | 
 | 309 | 	for (i = 0; i < rb->nr_pages; i++) | 
 | 310 | 		perf_mmap_free_page((unsigned long)rb->data_pages[i]); | 
 | 311 | 	kfree(rb); | 
 | 312 | } | 
 | 313 |  | 
 | 314 | #else | 
 | 315 |  | 
 | 316 | struct page * | 
 | 317 | perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) | 
 | 318 | { | 
 | 319 | 	if (pgoff > (1UL << page_order(rb))) | 
 | 320 | 		return NULL; | 
 | 321 |  | 
 | 322 | 	return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE); | 
 | 323 | } | 
 | 324 |  | 
 | 325 | static void perf_mmap_unmark_page(void *addr) | 
 | 326 | { | 
 | 327 | 	struct page *page = vmalloc_to_page(addr); | 
 | 328 |  | 
 | 329 | 	page->mapping = NULL; | 
 | 330 | } | 
 | 331 |  | 
 | 332 | static void rb_free_work(struct work_struct *work) | 
 | 333 | { | 
 | 334 | 	struct ring_buffer *rb; | 
 | 335 | 	void *base; | 
 | 336 | 	int i, nr; | 
 | 337 |  | 
 | 338 | 	rb = container_of(work, struct ring_buffer, work); | 
 | 339 | 	nr = 1 << page_order(rb); | 
 | 340 |  | 
 | 341 | 	base = rb->user_page; | 
 | 342 | 	for (i = 0; i < nr + 1; i++) | 
 | 343 | 		perf_mmap_unmark_page(base + (i * PAGE_SIZE)); | 
 | 344 |  | 
 | 345 | 	vfree(base); | 
 | 346 | 	kfree(rb); | 
 | 347 | } | 
 | 348 |  | 
 | 349 | void rb_free(struct ring_buffer *rb) | 
 | 350 | { | 
 | 351 | 	schedule_work(&rb->work); | 
 | 352 | } | 
 | 353 |  | 
 | 354 | struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) | 
 | 355 | { | 
 | 356 | 	struct ring_buffer *rb; | 
 | 357 | 	unsigned long size; | 
 | 358 | 	void *all_buf; | 
 | 359 |  | 
 | 360 | 	size = sizeof(struct ring_buffer); | 
 | 361 | 	size += sizeof(void *); | 
 | 362 |  | 
 | 363 | 	rb = kzalloc(size, GFP_KERNEL); | 
 | 364 | 	if (!rb) | 
 | 365 | 		goto fail; | 
 | 366 |  | 
 | 367 | 	INIT_WORK(&rb->work, rb_free_work); | 
 | 368 |  | 
 | 369 | 	all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE); | 
 | 370 | 	if (!all_buf) | 
 | 371 | 		goto fail_all_buf; | 
 | 372 |  | 
 | 373 | 	rb->user_page = all_buf; | 
 | 374 | 	rb->data_pages[0] = all_buf + PAGE_SIZE; | 
 | 375 | 	rb->page_order = ilog2(nr_pages); | 
 | 376 | 	rb->nr_pages = 1; | 
 | 377 |  | 
 | 378 | 	ring_buffer_init(rb, watermark, flags); | 
 | 379 |  | 
 | 380 | 	return rb; | 
 | 381 |  | 
 | 382 | fail_all_buf: | 
 | 383 | 	kfree(rb); | 
 | 384 |  | 
 | 385 | fail: | 
 | 386 | 	return NULL; | 
 | 387 | } | 
 | 388 |  | 
 | 389 | #endif |