blob: 070f92d3232a88e68068600fcf2dfac00c505527 [file] [log] [blame]
Thomas Gleixner0793a612008-12-04 20:12:29 +01001/*
2 * Performance counter core code
3 *
Ingo Molnar98144512009-04-29 14:52:50 +02004 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
Paul Mackerrasc5dd0162009-04-30 09:48:16 +10007 * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
Peter Zijlstra7b732a72009-03-23 18:22:10 +01008 *
9 * For licensing details see kernel-base/COPYING
Thomas Gleixner0793a612008-12-04 20:12:29 +010010 */
11
12#include <linux/fs.h>
Peter Zijlstrab9cacc72009-03-25 12:30:22 +010013#include <linux/mm.h>
Thomas Gleixner0793a612008-12-04 20:12:29 +010014#include <linux/cpu.h>
15#include <linux/smp.h>
Ingo Molnar04289bb2008-12-11 08:38:42 +010016#include <linux/file.h>
Thomas Gleixner0793a612008-12-04 20:12:29 +010017#include <linux/poll.h>
18#include <linux/sysfs.h>
19#include <linux/ptrace.h>
20#include <linux/percpu.h>
Peter Zijlstrab9cacc72009-03-25 12:30:22 +010021#include <linux/vmstat.h>
22#include <linux/hardirq.h>
23#include <linux/rculist.h>
Thomas Gleixner0793a612008-12-04 20:12:29 +010024#include <linux/uaccess.h>
25#include <linux/syscalls.h>
26#include <linux/anon_inodes.h>
Ingo Molnaraa9c4c02008-12-17 14:10:57 +010027#include <linux/kernel_stat.h>
Thomas Gleixner0793a612008-12-04 20:12:29 +010028#include <linux/perf_counter.h>
Peter Zijlstra0a4a9392009-03-30 19:07:05 +020029#include <linux/dcache.h>
Thomas Gleixner0793a612008-12-04 20:12:29 +010030
Tim Blechmann4e193bd2009-03-14 14:29:25 +010031#include <asm/irq_regs.h>
32
Thomas Gleixner0793a612008-12-04 20:12:29 +010033/*
34 * Each CPU has a list of per CPU counters:
35 */
36DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
37
Ingo Molnar088e2852008-12-14 20:21:00 +010038int perf_max_counters __read_mostly = 1;
Thomas Gleixner0793a612008-12-04 20:12:29 +010039static int perf_reserved_percpu __read_mostly;
40static int perf_overcommit __read_mostly = 1;
41
Peter Zijlstra7fc23a52009-05-08 18:52:21 +020042static atomic_t nr_counters __read_mostly;
Peter Zijlstra9ee318a2009-04-09 10:53:44 +020043static atomic_t nr_mmap_tracking __read_mostly;
44static atomic_t nr_munmap_tracking __read_mostly;
45static atomic_t nr_comm_tracking __read_mostly;
46
Peter Zijlstra1ccd1542009-04-09 10:53:45 +020047int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
Peter Zijlstra789f90f2009-05-15 15:19:27 +020048int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
Peter Zijlstraa78ac322009-05-25 17:39:05 +020049int sysctl_perf_counter_limit __read_mostly = 100000; /* max NMIs per second */
Peter Zijlstra1ccd1542009-04-09 10:53:45 +020050
Thomas Gleixner0793a612008-12-04 20:12:29 +010051/*
Ingo Molnar1dce8d92009-05-04 19:23:18 +020052 * Lock for (sysadmin-configurable) counter reservations:
Thomas Gleixner0793a612008-12-04 20:12:29 +010053 */
Ingo Molnar1dce8d92009-05-04 19:23:18 +020054static DEFINE_SPINLOCK(perf_resource_lock);
Thomas Gleixner0793a612008-12-04 20:12:29 +010055
56/*
57 * Architecture provided APIs - weak aliases:
58 */
Robert Richter4aeb0b42009-04-29 12:47:03 +020059extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
Thomas Gleixner0793a612008-12-04 20:12:29 +010060{
Paul Mackerrasff6f0542009-01-09 16:19:25 +110061 return NULL;
Thomas Gleixner0793a612008-12-04 20:12:29 +010062}
63
Peter Zijlstra9e35ad32009-05-13 16:21:38 +020064void __weak hw_perf_disable(void) { barrier(); }
65void __weak hw_perf_enable(void) { barrier(); }
66
Paul Mackerras01d02872009-01-14 13:44:19 +110067void __weak hw_perf_counter_setup(int cpu) { barrier(); }
Paul Mackerras3cbed422009-01-09 16:43:42 +110068int __weak hw_perf_group_sched_in(struct perf_counter *group_leader,
69 struct perf_cpu_context *cpuctx,
70 struct perf_counter_context *ctx, int cpu)
71{
72 return 0;
73}
Thomas Gleixner0793a612008-12-04 20:12:29 +010074
Paul Mackerras4eb96fc2009-01-09 17:24:34 +110075void __weak perf_counter_print_debug(void) { }
76
Peter Zijlstra9e35ad32009-05-13 16:21:38 +020077static DEFINE_PER_CPU(int, disable_count);
78
79void __perf_disable(void)
80{
81 __get_cpu_var(disable_count)++;
82}
83
84bool __perf_enable(void)
85{
86 return !--__get_cpu_var(disable_count);
87}
88
89void perf_disable(void)
90{
91 __perf_disable();
92 hw_perf_disable();
93}
Peter Zijlstra9e35ad32009-05-13 16:21:38 +020094
95void perf_enable(void)
96{
97 if (__perf_enable())
98 hw_perf_enable();
99}
Peter Zijlstra9e35ad32009-05-13 16:21:38 +0200100
Paul Mackerrasa63eaf32009-05-22 14:17:31 +1000101static void get_ctx(struct perf_counter_context *ctx)
102{
103 atomic_inc(&ctx->refcount);
104}
105
106static void put_ctx(struct perf_counter_context *ctx)
107{
Paul Mackerras564c2b22009-05-22 14:27:22 +1000108 if (atomic_dec_and_test(&ctx->refcount)) {
109 if (ctx->parent_ctx)
110 put_ctx(ctx->parent_ctx);
Paul Mackerrasa63eaf32009-05-22 14:17:31 +1000111 kfree(ctx);
Paul Mackerras564c2b22009-05-22 14:27:22 +1000112 }
Paul Mackerrasa63eaf32009-05-22 14:17:31 +1000113}
114
Peter Zijlstrafccc7142009-05-23 18:28:56 +0200115/*
116 * Add a counter from the lists for its context.
117 * Must be called with ctx->mutex and ctx->lock held.
118 */
Ingo Molnar04289bb2008-12-11 08:38:42 +0100119static void
120list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
121{
122 struct perf_counter *group_leader = counter->group_leader;
123
124 /*
125 * Depending on whether it is a standalone or sibling counter,
126 * add it straight to the context's counter list, or to the group
127 * leader's sibling list:
128 */
Peter Zijlstra3df5eda2009-05-08 18:52:22 +0200129 if (group_leader == counter)
Ingo Molnar04289bb2008-12-11 08:38:42 +0100130 list_add_tail(&counter->list_entry, &ctx->counter_list);
Peter Zijlstra5c148192009-03-25 12:30:23 +0100131 else {
Ingo Molnar04289bb2008-12-11 08:38:42 +0100132 list_add_tail(&counter->list_entry, &group_leader->sibling_list);
Peter Zijlstra5c148192009-03-25 12:30:23 +0100133 group_leader->nr_siblings++;
134 }
Peter Zijlstra592903c2009-03-13 12:21:36 +0100135
136 list_add_rcu(&counter->event_entry, &ctx->event_list);
Peter Zijlstra8bc20952009-05-15 20:45:59 +0200137 ctx->nr_counters++;
Ingo Molnar04289bb2008-12-11 08:38:42 +0100138}
139
Paul Mackerrasa63eaf32009-05-22 14:17:31 +1000140/*
141 * Remove a counter from the lists for its context.
Peter Zijlstrafccc7142009-05-23 18:28:56 +0200142 * Must be called with ctx->mutex and ctx->lock held.
Paul Mackerrasa63eaf32009-05-22 14:17:31 +1000143 */
Ingo Molnar04289bb2008-12-11 08:38:42 +0100144static void
145list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
146{
147 struct perf_counter *sibling, *tmp;
148
Paul Mackerrasa63eaf32009-05-22 14:17:31 +1000149 if (list_empty(&counter->list_entry))
150 return;
Peter Zijlstra8bc20952009-05-15 20:45:59 +0200151 ctx->nr_counters--;
152
Ingo Molnar04289bb2008-12-11 08:38:42 +0100153 list_del_init(&counter->list_entry);
Peter Zijlstra592903c2009-03-13 12:21:36 +0100154 list_del_rcu(&counter->event_entry);
Ingo Molnar04289bb2008-12-11 08:38:42 +0100155
Peter Zijlstra5c148192009-03-25 12:30:23 +0100156 if (counter->group_leader != counter)
157 counter->group_leader->nr_siblings--;
158
Ingo Molnar04289bb2008-12-11 08:38:42 +0100159 /*
160 * If this was a group counter with sibling counters then
161 * upgrade the siblings to singleton counters by adding them
162 * to the context list directly:
163 */
164 list_for_each_entry_safe(sibling, tmp,
165 &counter->sibling_list, list_entry) {
166
Peter Zijlstra75564232009-03-13 12:21:29 +0100167 list_move_tail(&sibling->list_entry, &ctx->counter_list);
Ingo Molnar04289bb2008-12-11 08:38:42 +0100168 sibling->group_leader = sibling;
169 }
170}
171
Paul Mackerras3b6f9e52009-01-14 21:00:30 +1100172static void
173counter_sched_out(struct perf_counter *counter,
174 struct perf_cpu_context *cpuctx,
175 struct perf_counter_context *ctx)
176{
177 if (counter->state != PERF_COUNTER_STATE_ACTIVE)
178 return;
179
180 counter->state = PERF_COUNTER_STATE_INACTIVE;
Peter Zijlstra4af49982009-04-06 11:45:10 +0200181 counter->tstamp_stopped = ctx->time;
Robert Richter4aeb0b42009-04-29 12:47:03 +0200182 counter->pmu->disable(counter);
Paul Mackerras3b6f9e52009-01-14 21:00:30 +1100183 counter->oncpu = -1;
184
185 if (!is_software_counter(counter))
186 cpuctx->active_oncpu--;
187 ctx->nr_active--;
188 if (counter->hw_event.exclusive || !cpuctx->active_oncpu)
189 cpuctx->exclusive = 0;
190}
191
Paul Mackerrasd859e292009-01-17 18:10:22 +1100192static void
193group_sched_out(struct perf_counter *group_counter,
194 struct perf_cpu_context *cpuctx,
195 struct perf_counter_context *ctx)
196{
197 struct perf_counter *counter;
198
199 if (group_counter->state != PERF_COUNTER_STATE_ACTIVE)
200 return;
201
202 counter_sched_out(group_counter, cpuctx, ctx);
203
204 /*
205 * Schedule out siblings (if any):
206 */
207 list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
208 counter_sched_out(counter, cpuctx, ctx);
209
210 if (group_counter->hw_event.exclusive)
211 cpuctx->exclusive = 0;
212}
213
Thomas Gleixner0793a612008-12-04 20:12:29 +0100214/*
Paul Mackerras564c2b22009-05-22 14:27:22 +1000215 * Mark this context as not being a clone of another.
216 * Called when counters are added to or removed from this context.
217 * We also increment our generation number so that anything that
218 * was cloned from this context before this will not match anything
219 * cloned from this context after this.
220 */
221static void unclone_ctx(struct perf_counter_context *ctx)
222{
223 ++ctx->generation;
224 if (!ctx->parent_ctx)
225 return;
226 put_ctx(ctx->parent_ctx);
227 ctx->parent_ctx = NULL;
228}
229
230/*
Thomas Gleixner0793a612008-12-04 20:12:29 +0100231 * Cross CPU call to remove a performance counter
232 *
233 * We disable the counter on the hardware level first. After that we
234 * remove it from the context list.
235 */
Ingo Molnar04289bb2008-12-11 08:38:42 +0100236static void __perf_counter_remove_from_context(void *info)
Thomas Gleixner0793a612008-12-04 20:12:29 +0100237{
238 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
239 struct perf_counter *counter = info;
240 struct perf_counter_context *ctx = counter->ctx;
Ingo Molnar9b51f662008-12-12 13:49:45 +0100241 unsigned long flags;
Thomas Gleixner0793a612008-12-04 20:12:29 +0100242
243 /*
244 * If this is a task context, we need to check whether it is
245 * the current task context of this cpu. If not it has been
246 * scheduled out before the smp call arrived.
247 */
248 if (ctx->task && cpuctx->task_ctx != ctx)
249 return;
250
Peter Zijlstra849691a2009-04-06 11:45:12 +0200251 spin_lock_irqsave(&ctx->lock, flags);
Ingo Molnar34adc802009-05-20 20:13:28 +0200252 /*
253 * Protect the list operation against NMI by disabling the
254 * counters on a global level.
255 */
256 perf_disable();
Thomas Gleixner0793a612008-12-04 20:12:29 +0100257
Paul Mackerras3b6f9e52009-01-14 21:00:30 +1100258 counter_sched_out(counter, cpuctx, ctx);
259
Ingo Molnar04289bb2008-12-11 08:38:42 +0100260 list_del_counter(counter, ctx);
Thomas Gleixner0793a612008-12-04 20:12:29 +0100261
262 if (!ctx->task) {
263 /*
264 * Allow more per task counters with respect to the
265 * reservation:
266 */
267 cpuctx->max_pertask =
268 min(perf_max_counters - ctx->nr_counters,
269 perf_max_counters - perf_reserved_percpu);
270 }
271
Ingo Molnar34adc802009-05-20 20:13:28 +0200272 perf_enable();
Peter Zijlstra849691a2009-04-06 11:45:12 +0200273 spin_unlock_irqrestore(&ctx->lock, flags);
Thomas Gleixner0793a612008-12-04 20:12:29 +0100274}
275
276
277/*
278 * Remove the counter from a task's (or a CPU's) list of counters.
279 *
Peter Zijlstrafccc7142009-05-23 18:28:56 +0200280 * Must be called with ctx->mutex held.
Thomas Gleixner0793a612008-12-04 20:12:29 +0100281 *
282 * CPU counters are removed with a smp call. For task counters we only
283 * call when the task is on a CPU.
284 */
Ingo Molnar04289bb2008-12-11 08:38:42 +0100285static void perf_counter_remove_from_context(struct perf_counter *counter)
Thomas Gleixner0793a612008-12-04 20:12:29 +0100286{
287 struct perf_counter_context *ctx = counter->ctx;
288 struct task_struct *task = ctx->task;
289
Paul Mackerras564c2b22009-05-22 14:27:22 +1000290 unclone_ctx(ctx);
Thomas Gleixner0793a612008-12-04 20:12:29 +0100291 if (!task) {
292 /*
293 * Per cpu counters are removed via an smp call and
294 * the removal is always sucessful.
295 */
296 smp_call_function_single(counter->cpu,
Ingo Molnar04289bb2008-12-11 08:38:42 +0100297 __perf_counter_remove_from_context,
Thomas Gleixner0793a612008-12-04 20:12:29 +0100298 counter, 1);
299 return;
300 }
301
302retry:
Ingo Molnar04289bb2008-12-11 08:38:42 +0100303 task_oncpu_function_call(task, __perf_counter_remove_from_context,
Thomas Gleixner0793a612008-12-04 20:12:29 +0100304 counter);
305
306 spin_lock_irq(&ctx->lock);
307 /*
308 * If the context is active we need to retry the smp call.
309 */
Ingo Molnar04289bb2008-12-11 08:38:42 +0100310 if (ctx->nr_active && !list_empty(&counter->list_entry)) {
Thomas Gleixner0793a612008-12-04 20:12:29 +0100311 spin_unlock_irq(&ctx->lock);
312 goto retry;
313 }
314
315 /*
316 * The lock prevents that this context is scheduled in so we
Ingo Molnar04289bb2008-12-11 08:38:42 +0100317 * can remove the counter safely, if the call above did not
Thomas Gleixner0793a612008-12-04 20:12:29 +0100318 * succeed.
319 */
Ingo Molnar04289bb2008-12-11 08:38:42 +0100320 if (!list_empty(&counter->list_entry)) {
Ingo Molnar04289bb2008-12-11 08:38:42 +0100321 list_del_counter(counter, ctx);
Thomas Gleixner0793a612008-12-04 20:12:29 +0100322 }
323 spin_unlock_irq(&ctx->lock);
324}
325
Peter Zijlstra4af49982009-04-06 11:45:10 +0200326static inline u64 perf_clock(void)
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100327{
Peter Zijlstra4af49982009-04-06 11:45:10 +0200328 return cpu_clock(smp_processor_id());
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100329}
330
331/*
332 * Update the record of the current time in a context.
333 */
Peter Zijlstra4af49982009-04-06 11:45:10 +0200334static void update_context_time(struct perf_counter_context *ctx)
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100335{
Peter Zijlstra4af49982009-04-06 11:45:10 +0200336 u64 now = perf_clock();
337
338 ctx->time += now - ctx->timestamp;
339 ctx->timestamp = now;
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100340}
341
342/*
343 * Update the total_time_enabled and total_time_running fields for a counter.
344 */
345static void update_counter_times(struct perf_counter *counter)
346{
347 struct perf_counter_context *ctx = counter->ctx;
348 u64 run_end;
349
Peter Zijlstra4af49982009-04-06 11:45:10 +0200350 if (counter->state < PERF_COUNTER_STATE_INACTIVE)
351 return;
352
353 counter->total_time_enabled = ctx->time - counter->tstamp_enabled;
354
355 if (counter->state == PERF_COUNTER_STATE_INACTIVE)
356 run_end = counter->tstamp_stopped;
357 else
358 run_end = ctx->time;
359
360 counter->total_time_running = run_end - counter->tstamp_running;
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100361}
362
363/*
364 * Update total_time_enabled and total_time_running for all counters in a group.
365 */
366static void update_group_times(struct perf_counter *leader)
367{
368 struct perf_counter *counter;
369
370 update_counter_times(leader);
371 list_for_each_entry(counter, &leader->sibling_list, list_entry)
372 update_counter_times(counter);
373}
374
375/*
Paul Mackerrasd859e292009-01-17 18:10:22 +1100376 * Cross CPU call to disable a performance counter
377 */
378static void __perf_counter_disable(void *info)
379{
380 struct perf_counter *counter = info;
381 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
382 struct perf_counter_context *ctx = counter->ctx;
383 unsigned long flags;
384
385 /*
386 * If this is a per-task counter, need to check whether this
387 * counter's task is the current task on this cpu.
388 */
389 if (ctx->task && cpuctx->task_ctx != ctx)
390 return;
391
Peter Zijlstra849691a2009-04-06 11:45:12 +0200392 spin_lock_irqsave(&ctx->lock, flags);
Paul Mackerrasd859e292009-01-17 18:10:22 +1100393
394 /*
395 * If the counter is on, turn it off.
396 * If it is in error state, leave it in error state.
397 */
398 if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
Peter Zijlstra4af49982009-04-06 11:45:10 +0200399 update_context_time(ctx);
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100400 update_counter_times(counter);
Paul Mackerrasd859e292009-01-17 18:10:22 +1100401 if (counter == counter->group_leader)
402 group_sched_out(counter, cpuctx, ctx);
403 else
404 counter_sched_out(counter, cpuctx, ctx);
405 counter->state = PERF_COUNTER_STATE_OFF;
406 }
407
Peter Zijlstra849691a2009-04-06 11:45:12 +0200408 spin_unlock_irqrestore(&ctx->lock, flags);
Paul Mackerrasd859e292009-01-17 18:10:22 +1100409}
410
411/*
412 * Disable a counter.
413 */
414static void perf_counter_disable(struct perf_counter *counter)
415{
416 struct perf_counter_context *ctx = counter->ctx;
417 struct task_struct *task = ctx->task;
418
419 if (!task) {
420 /*
421 * Disable the counter on the cpu that it's on
422 */
423 smp_call_function_single(counter->cpu, __perf_counter_disable,
424 counter, 1);
425 return;
426 }
427
428 retry:
429 task_oncpu_function_call(task, __perf_counter_disable, counter);
430
431 spin_lock_irq(&ctx->lock);
432 /*
433 * If the counter is still active, we need to retry the cross-call.
434 */
435 if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
436 spin_unlock_irq(&ctx->lock);
437 goto retry;
438 }
439
440 /*
441 * Since we have the lock this context can't be scheduled
442 * in, so we can change the state safely.
443 */
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100444 if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
445 update_counter_times(counter);
Paul Mackerrasd859e292009-01-17 18:10:22 +1100446 counter->state = PERF_COUNTER_STATE_OFF;
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100447 }
Paul Mackerrasd859e292009-01-17 18:10:22 +1100448
449 spin_unlock_irq(&ctx->lock);
450}
451
Ingo Molnar235c7fc2008-12-21 14:43:25 +0100452static int
453counter_sched_in(struct perf_counter *counter,
454 struct perf_cpu_context *cpuctx,
455 struct perf_counter_context *ctx,
456 int cpu)
457{
Paul Mackerras3b6f9e52009-01-14 21:00:30 +1100458 if (counter->state <= PERF_COUNTER_STATE_OFF)
Ingo Molnar235c7fc2008-12-21 14:43:25 +0100459 return 0;
460
461 counter->state = PERF_COUNTER_STATE_ACTIVE;
462 counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */
463 /*
464 * The new state must be visible before we turn it on in the hardware:
465 */
466 smp_wmb();
467
Robert Richter4aeb0b42009-04-29 12:47:03 +0200468 if (counter->pmu->enable(counter)) {
Ingo Molnar235c7fc2008-12-21 14:43:25 +0100469 counter->state = PERF_COUNTER_STATE_INACTIVE;
470 counter->oncpu = -1;
471 return -EAGAIN;
472 }
473
Peter Zijlstra4af49982009-04-06 11:45:10 +0200474 counter->tstamp_running += ctx->time - counter->tstamp_stopped;
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100475
Paul Mackerras3b6f9e52009-01-14 21:00:30 +1100476 if (!is_software_counter(counter))
477 cpuctx->active_oncpu++;
Ingo Molnar235c7fc2008-12-21 14:43:25 +0100478 ctx->nr_active++;
479
Paul Mackerras3b6f9e52009-01-14 21:00:30 +1100480 if (counter->hw_event.exclusive)
481 cpuctx->exclusive = 1;
482
Ingo Molnar235c7fc2008-12-21 14:43:25 +0100483 return 0;
484}
485
Paul Mackerras6751b712009-05-11 12:08:02 +1000486static int
487group_sched_in(struct perf_counter *group_counter,
488 struct perf_cpu_context *cpuctx,
489 struct perf_counter_context *ctx,
490 int cpu)
491{
492 struct perf_counter *counter, *partial_group;
493 int ret;
494
495 if (group_counter->state == PERF_COUNTER_STATE_OFF)
496 return 0;
497
498 ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu);
499 if (ret)
500 return ret < 0 ? ret : 0;
501
502 group_counter->prev_state = group_counter->state;
503 if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
504 return -EAGAIN;
505
506 /*
507 * Schedule in siblings as one group (if any):
508 */
509 list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
510 counter->prev_state = counter->state;
511 if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
512 partial_group = counter;
513 goto group_error;
514 }
515 }
516
517 return 0;
518
519group_error:
520 /*
521 * Groups can be scheduled in as one unit only, so undo any
522 * partial group before returning:
523 */
524 list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
525 if (counter == partial_group)
526 break;
527 counter_sched_out(counter, cpuctx, ctx);
528 }
529 counter_sched_out(group_counter, cpuctx, ctx);
530
531 return -EAGAIN;
532}
533
Thomas Gleixner0793a612008-12-04 20:12:29 +0100534/*
Paul Mackerras3b6f9e52009-01-14 21:00:30 +1100535 * Return 1 for a group consisting entirely of software counters,
536 * 0 if the group contains any hardware counters.
537 */
538static int is_software_only_group(struct perf_counter *leader)
539{
540 struct perf_counter *counter;
541
542 if (!is_software_counter(leader))
543 return 0;
Peter Zijlstra5c148192009-03-25 12:30:23 +0100544
Paul Mackerras3b6f9e52009-01-14 21:00:30 +1100545 list_for_each_entry(counter, &leader->sibling_list, list_entry)
546 if (!is_software_counter(counter))
547 return 0;
Peter Zijlstra5c148192009-03-25 12:30:23 +0100548
Paul Mackerras3b6f9e52009-01-14 21:00:30 +1100549 return 1;
550}
551
552/*
553 * Work out whether we can put this counter group on the CPU now.
554 */
555static int group_can_go_on(struct perf_counter *counter,
556 struct perf_cpu_context *cpuctx,
557 int can_add_hw)
558{
559 /*
560 * Groups consisting entirely of software counters can always go on.
561 */
562 if (is_software_only_group(counter))
563 return 1;
564 /*
565 * If an exclusive group is already on, no other hardware
566 * counters can go on.
567 */
568 if (cpuctx->exclusive)
569 return 0;
570 /*
571 * If this group is exclusive and there are already
572 * counters on the CPU, it can't go on.
573 */
574 if (counter->hw_event.exclusive && cpuctx->active_oncpu)
575 return 0;
576 /*
577 * Otherwise, try to add it if all previous groups were able
578 * to go on.
579 */
580 return can_add_hw;
581}
582
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100583static void add_counter_to_ctx(struct perf_counter *counter,
584 struct perf_counter_context *ctx)
585{
586 list_add_counter(counter, ctx);
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100587 counter->prev_state = PERF_COUNTER_STATE_OFF;
Peter Zijlstra4af49982009-04-06 11:45:10 +0200588 counter->tstamp_enabled = ctx->time;
589 counter->tstamp_running = ctx->time;
590 counter->tstamp_stopped = ctx->time;
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100591}
592
Paul Mackerras3b6f9e52009-01-14 21:00:30 +1100593/*
Ingo Molnar235c7fc2008-12-21 14:43:25 +0100594 * Cross CPU call to install and enable a performance counter
Peter Zijlstra682076a2009-05-23 18:28:57 +0200595 *
596 * Must be called with ctx->mutex held
Thomas Gleixner0793a612008-12-04 20:12:29 +0100597 */
598static void __perf_install_in_context(void *info)
599{
600 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
601 struct perf_counter *counter = info;
602 struct perf_counter_context *ctx = counter->ctx;
Paul Mackerrasd859e292009-01-17 18:10:22 +1100603 struct perf_counter *leader = counter->group_leader;
Thomas Gleixner0793a612008-12-04 20:12:29 +0100604 int cpu = smp_processor_id();
Ingo Molnar9b51f662008-12-12 13:49:45 +0100605 unsigned long flags;
Paul Mackerras3b6f9e52009-01-14 21:00:30 +1100606 int err;
Thomas Gleixner0793a612008-12-04 20:12:29 +0100607
608 /*
609 * If this is a task context, we need to check whether it is
610 * the current task context of this cpu. If not it has been
611 * scheduled out before the smp call arrived.
Paul Mackerrasa63eaf32009-05-22 14:17:31 +1000612 * Or possibly this is the right context but it isn't
613 * on this cpu because it had no counters.
Thomas Gleixner0793a612008-12-04 20:12:29 +0100614 */
Paul Mackerrasa63eaf32009-05-22 14:17:31 +1000615 if (ctx->task && cpuctx->task_ctx != ctx) {
616 if (cpuctx->task_ctx || ctx->task != current)
617 return;
618 cpuctx->task_ctx = ctx;
619 }
Thomas Gleixner0793a612008-12-04 20:12:29 +0100620
Peter Zijlstra849691a2009-04-06 11:45:12 +0200621 spin_lock_irqsave(&ctx->lock, flags);
Paul Mackerrasa63eaf32009-05-22 14:17:31 +1000622 ctx->is_active = 1;
Peter Zijlstra4af49982009-04-06 11:45:10 +0200623 update_context_time(ctx);
Thomas Gleixner0793a612008-12-04 20:12:29 +0100624
625 /*
626 * Protect the list operation against NMI by disabling the
627 * counters on a global level. NOP for non NMI based counters.
628 */
Peter Zijlstra9e35ad32009-05-13 16:21:38 +0200629 perf_disable();
Thomas Gleixner0793a612008-12-04 20:12:29 +0100630
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100631 add_counter_to_ctx(counter, ctx);
Thomas Gleixner0793a612008-12-04 20:12:29 +0100632
Paul Mackerras3b6f9e52009-01-14 21:00:30 +1100633 /*
Paul Mackerrasd859e292009-01-17 18:10:22 +1100634 * Don't put the counter on if it is disabled or if
635 * it is in a group and the group isn't on.
636 */
637 if (counter->state != PERF_COUNTER_STATE_INACTIVE ||
638 (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE))
639 goto unlock;
640
641 /*
Paul Mackerras3b6f9e52009-01-14 21:00:30 +1100642 * An exclusive counter can't go on if there are already active
643 * hardware counters, and no hardware counter can go on if there
644 * is already an exclusive counter on.
645 */
Paul Mackerrasd859e292009-01-17 18:10:22 +1100646 if (!group_can_go_on(counter, cpuctx, 1))
Paul Mackerras3b6f9e52009-01-14 21:00:30 +1100647 err = -EEXIST;
648 else
649 err = counter_sched_in(counter, cpuctx, ctx, cpu);
Thomas Gleixner0793a612008-12-04 20:12:29 +0100650
Paul Mackerrasd859e292009-01-17 18:10:22 +1100651 if (err) {
652 /*
653 * This counter couldn't go on. If it is in a group
654 * then we have to pull the whole group off.
655 * If the counter group is pinned then put it in error state.
656 */
657 if (leader != counter)
658 group_sched_out(leader, cpuctx, ctx);
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100659 if (leader->hw_event.pinned) {
660 update_group_times(leader);
Paul Mackerrasd859e292009-01-17 18:10:22 +1100661 leader->state = PERF_COUNTER_STATE_ERROR;
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100662 }
Paul Mackerrasd859e292009-01-17 18:10:22 +1100663 }
Paul Mackerras3b6f9e52009-01-14 21:00:30 +1100664
665 if (!err && !ctx->task && cpuctx->max_pertask)
Thomas Gleixner0793a612008-12-04 20:12:29 +0100666 cpuctx->max_pertask--;
667
Paul Mackerrasd859e292009-01-17 18:10:22 +1100668 unlock:
Peter Zijlstra9e35ad32009-05-13 16:21:38 +0200669 perf_enable();
Ingo Molnar235c7fc2008-12-21 14:43:25 +0100670
Peter Zijlstra849691a2009-04-06 11:45:12 +0200671 spin_unlock_irqrestore(&ctx->lock, flags);
Thomas Gleixner0793a612008-12-04 20:12:29 +0100672}
673
674/*
675 * Attach a performance counter to a context
676 *
677 * First we add the counter to the list with the hardware enable bit
678 * in counter->hw_config cleared.
679 *
680 * If the counter is attached to a task which is on a CPU we use a smp
681 * call to enable it in the task context. The task might have been
682 * scheduled away, but we check this in the smp call again.
Paul Mackerrasd859e292009-01-17 18:10:22 +1100683 *
684 * Must be called with ctx->mutex held.
Thomas Gleixner0793a612008-12-04 20:12:29 +0100685 */
686static void
687perf_install_in_context(struct perf_counter_context *ctx,
688 struct perf_counter *counter,
689 int cpu)
690{
691 struct task_struct *task = ctx->task;
692
Thomas Gleixner0793a612008-12-04 20:12:29 +0100693 if (!task) {
694 /*
695 * Per cpu counters are installed via an smp call and
696 * the install is always sucessful.
697 */
698 smp_call_function_single(cpu, __perf_install_in_context,
699 counter, 1);
700 return;
701 }
702
Thomas Gleixner0793a612008-12-04 20:12:29 +0100703retry:
704 task_oncpu_function_call(task, __perf_install_in_context,
705 counter);
706
707 spin_lock_irq(&ctx->lock);
708 /*
Thomas Gleixner0793a612008-12-04 20:12:29 +0100709 * we need to retry the smp call.
710 */
Paul Mackerrasd859e292009-01-17 18:10:22 +1100711 if (ctx->is_active && list_empty(&counter->list_entry)) {
Thomas Gleixner0793a612008-12-04 20:12:29 +0100712 spin_unlock_irq(&ctx->lock);
713 goto retry;
714 }
715
716 /*
717 * The lock prevents that this context is scheduled in so we
718 * can add the counter safely, if it the call above did not
719 * succeed.
720 */
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100721 if (list_empty(&counter->list_entry))
722 add_counter_to_ctx(counter, ctx);
Thomas Gleixner0793a612008-12-04 20:12:29 +0100723 spin_unlock_irq(&ctx->lock);
724}
725
Paul Mackerrasd859e292009-01-17 18:10:22 +1100726/*
727 * Cross CPU call to enable a performance counter
728 */
729static void __perf_counter_enable(void *info)
Ingo Molnar04289bb2008-12-11 08:38:42 +0100730{
Paul Mackerrasd859e292009-01-17 18:10:22 +1100731 struct perf_counter *counter = info;
732 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
733 struct perf_counter_context *ctx = counter->ctx;
734 struct perf_counter *leader = counter->group_leader;
735 unsigned long flags;
736 int err;
Ingo Molnar04289bb2008-12-11 08:38:42 +0100737
738 /*
Paul Mackerrasd859e292009-01-17 18:10:22 +1100739 * If this is a per-task counter, need to check whether this
740 * counter's task is the current task on this cpu.
Ingo Molnar04289bb2008-12-11 08:38:42 +0100741 */
Paul Mackerrasa63eaf32009-05-22 14:17:31 +1000742 if (ctx->task && cpuctx->task_ctx != ctx) {
743 if (cpuctx->task_ctx || ctx->task != current)
744 return;
745 cpuctx->task_ctx = ctx;
746 }
Paul Mackerras3b6f9e52009-01-14 21:00:30 +1100747
Peter Zijlstra849691a2009-04-06 11:45:12 +0200748 spin_lock_irqsave(&ctx->lock, flags);
Paul Mackerrasa63eaf32009-05-22 14:17:31 +1000749 ctx->is_active = 1;
Peter Zijlstra4af49982009-04-06 11:45:10 +0200750 update_context_time(ctx);
Paul Mackerrasd859e292009-01-17 18:10:22 +1100751
Paul Mackerrasc07c99b2009-02-13 22:10:34 +1100752 counter->prev_state = counter->state;
Paul Mackerrasd859e292009-01-17 18:10:22 +1100753 if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
754 goto unlock;
755 counter->state = PERF_COUNTER_STATE_INACTIVE;
Peter Zijlstra4af49982009-04-06 11:45:10 +0200756 counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
Paul Mackerrasd859e292009-01-17 18:10:22 +1100757
758 /*
759 * If the counter is in a group and isn't the group leader,
760 * then don't put it on unless the group is on.
761 */
762 if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)
763 goto unlock;
764
Paul Mackerrase758a332009-05-12 21:59:01 +1000765 if (!group_can_go_on(counter, cpuctx, 1)) {
Paul Mackerrasd859e292009-01-17 18:10:22 +1100766 err = -EEXIST;
Paul Mackerrase758a332009-05-12 21:59:01 +1000767 } else {
Peter Zijlstra9e35ad32009-05-13 16:21:38 +0200768 perf_disable();
Paul Mackerrase758a332009-05-12 21:59:01 +1000769 if (counter == leader)
770 err = group_sched_in(counter, cpuctx, ctx,
771 smp_processor_id());
772 else
773 err = counter_sched_in(counter, cpuctx, ctx,
774 smp_processor_id());
Peter Zijlstra9e35ad32009-05-13 16:21:38 +0200775 perf_enable();
Paul Mackerrase758a332009-05-12 21:59:01 +1000776 }
Paul Mackerrasd859e292009-01-17 18:10:22 +1100777
778 if (err) {
779 /*
780 * If this counter can't go on and it's part of a
781 * group, then the whole group has to come off.
782 */
783 if (leader != counter)
784 group_sched_out(leader, cpuctx, ctx);
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100785 if (leader->hw_event.pinned) {
786 update_group_times(leader);
Paul Mackerrasd859e292009-01-17 18:10:22 +1100787 leader->state = PERF_COUNTER_STATE_ERROR;
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100788 }
Paul Mackerrasd859e292009-01-17 18:10:22 +1100789 }
790
791 unlock:
Peter Zijlstra849691a2009-04-06 11:45:12 +0200792 spin_unlock_irqrestore(&ctx->lock, flags);
Paul Mackerrasd859e292009-01-17 18:10:22 +1100793}
794
795/*
796 * Enable a counter.
797 */
798static void perf_counter_enable(struct perf_counter *counter)
799{
800 struct perf_counter_context *ctx = counter->ctx;
801 struct task_struct *task = ctx->task;
802
803 if (!task) {
804 /*
805 * Enable the counter on the cpu that it's on
806 */
807 smp_call_function_single(counter->cpu, __perf_counter_enable,
808 counter, 1);
809 return;
810 }
811
812 spin_lock_irq(&ctx->lock);
813 if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
814 goto out;
815
816 /*
817 * If the counter is in error state, clear that first.
818 * That way, if we see the counter in error state below, we
819 * know that it has gone back into error state, as distinct
820 * from the task having been scheduled away before the
821 * cross-call arrived.
822 */
823 if (counter->state == PERF_COUNTER_STATE_ERROR)
824 counter->state = PERF_COUNTER_STATE_OFF;
825
826 retry:
827 spin_unlock_irq(&ctx->lock);
828 task_oncpu_function_call(task, __perf_counter_enable, counter);
829
830 spin_lock_irq(&ctx->lock);
831
832 /*
833 * If the context is active and the counter is still off,
834 * we need to retry the cross-call.
835 */
836 if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF)
837 goto retry;
838
839 /*
840 * Since we have the lock this context can't be scheduled
841 * in, so we can change the state safely.
842 */
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100843 if (counter->state == PERF_COUNTER_STATE_OFF) {
Paul Mackerrasd859e292009-01-17 18:10:22 +1100844 counter->state = PERF_COUNTER_STATE_INACTIVE;
Peter Zijlstra4af49982009-04-06 11:45:10 +0200845 counter->tstamp_enabled =
846 ctx->time - counter->total_time_enabled;
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100847 }
Paul Mackerrasd859e292009-01-17 18:10:22 +1100848 out:
849 spin_unlock_irq(&ctx->lock);
850}
851
Peter Zijlstra2023b352009-05-05 17:50:26 +0200852static int perf_counter_refresh(struct perf_counter *counter, int refresh)
Peter Zijlstra79f14642009-04-06 11:45:07 +0200853{
Peter Zijlstra2023b352009-05-05 17:50:26 +0200854 /*
855 * not supported on inherited counters
856 */
857 if (counter->hw_event.inherit)
858 return -EINVAL;
859
Peter Zijlstra79f14642009-04-06 11:45:07 +0200860 atomic_add(refresh, &counter->event_limit);
861 perf_counter_enable(counter);
Peter Zijlstra2023b352009-05-05 17:50:26 +0200862
863 return 0;
Peter Zijlstra79f14642009-04-06 11:45:07 +0200864}
865
Ingo Molnar235c7fc2008-12-21 14:43:25 +0100866void __perf_counter_sched_out(struct perf_counter_context *ctx,
867 struct perf_cpu_context *cpuctx)
868{
869 struct perf_counter *counter;
870
Ingo Molnar235c7fc2008-12-21 14:43:25 +0100871 spin_lock(&ctx->lock);
Paul Mackerrasd859e292009-01-17 18:10:22 +1100872 ctx->is_active = 0;
873 if (likely(!ctx->nr_counters))
874 goto out;
Peter Zijlstra4af49982009-04-06 11:45:10 +0200875 update_context_time(ctx);
Paul Mackerrasd859e292009-01-17 18:10:22 +1100876
Peter Zijlstra9e35ad32009-05-13 16:21:38 +0200877 perf_disable();
Ingo Molnar235c7fc2008-12-21 14:43:25 +0100878 if (ctx->nr_active) {
Peter Zijlstraafedadf2009-05-20 12:21:22 +0200879 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
880 if (counter != counter->group_leader)
881 counter_sched_out(counter, cpuctx, ctx);
882 else
883 group_sched_out(counter, cpuctx, ctx);
884 }
Ingo Molnar235c7fc2008-12-21 14:43:25 +0100885 }
Peter Zijlstra9e35ad32009-05-13 16:21:38 +0200886 perf_enable();
Paul Mackerrasd859e292009-01-17 18:10:22 +1100887 out:
Ingo Molnar235c7fc2008-12-21 14:43:25 +0100888 spin_unlock(&ctx->lock);
889}
890
Thomas Gleixner0793a612008-12-04 20:12:29 +0100891/*
Paul Mackerras564c2b22009-05-22 14:27:22 +1000892 * Test whether two contexts are equivalent, i.e. whether they
893 * have both been cloned from the same version of the same context
894 * and they both have the same number of enabled counters.
895 * If the number of enabled counters is the same, then the set
896 * of enabled counters should be the same, because these are both
897 * inherited contexts, therefore we can't access individual counters
898 * in them directly with an fd; we can only enable/disable all
899 * counters via prctl, or enable/disable all counters in a family
900 * via ioctl, which will have the same effect on both contexts.
901 */
902static int context_equiv(struct perf_counter_context *ctx1,
903 struct perf_counter_context *ctx2)
904{
905 return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
Peter Zijlstra475c5572009-05-23 18:29:01 +0200906 && ctx1->parent_gen == ctx2->parent_gen;
Paul Mackerras564c2b22009-05-22 14:27:22 +1000907}
908
909/*
Thomas Gleixner0793a612008-12-04 20:12:29 +0100910 * Called from scheduler to remove the counters of the current task,
911 * with interrupts disabled.
912 *
913 * We stop each counter and update the counter value in counter->count.
914 *
Ingo Molnar76715812008-12-17 14:20:28 +0100915 * This does not protect us against NMI, but disable()
Thomas Gleixner0793a612008-12-04 20:12:29 +0100916 * sets the disabled bit in the control field of counter _before_
917 * accessing the counter control register. If a NMI hits, then it will
918 * not restart the counter.
919 */
Paul Mackerras564c2b22009-05-22 14:27:22 +1000920void perf_counter_task_sched_out(struct task_struct *task,
921 struct task_struct *next, int cpu)
Thomas Gleixner0793a612008-12-04 20:12:29 +0100922{
923 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
Paul Mackerrasa63eaf32009-05-22 14:17:31 +1000924 struct perf_counter_context *ctx = task->perf_counter_ctxp;
Paul Mackerras564c2b22009-05-22 14:27:22 +1000925 struct perf_counter_context *next_ctx;
Peter Zijlstra4a0deca2009-03-19 20:26:12 +0100926 struct pt_regs *regs;
Thomas Gleixner0793a612008-12-04 20:12:29 +0100927
Peter Zijlstra10989fb2009-05-25 14:45:28 +0200928 regs = task_pt_regs(task);
929 perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0);
930
Paul Mackerrasa63eaf32009-05-22 14:17:31 +1000931 if (likely(!ctx || !cpuctx->task_ctx))
Thomas Gleixner0793a612008-12-04 20:12:29 +0100932 return;
933
Peter Zijlstrabce379b2009-04-06 11:45:13 +0200934 update_context_time(ctx);
Paul Mackerras564c2b22009-05-22 14:27:22 +1000935 next_ctx = next->perf_counter_ctxp;
936 if (next_ctx && context_equiv(ctx, next_ctx)) {
937 task->perf_counter_ctxp = next_ctx;
938 next->perf_counter_ctxp = ctx;
939 ctx->task = next;
940 next_ctx->task = task;
941 return;
942 }
943
Ingo Molnar235c7fc2008-12-21 14:43:25 +0100944 __perf_counter_sched_out(ctx, cpuctx);
945
Thomas Gleixner0793a612008-12-04 20:12:29 +0100946 cpuctx->task_ctx = NULL;
947}
948
Paul Mackerrasa08b1592009-05-11 15:46:10 +1000949static void __perf_counter_task_sched_out(struct perf_counter_context *ctx)
950{
951 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
952
Paul Mackerrasa63eaf32009-05-22 14:17:31 +1000953 if (!cpuctx->task_ctx)
954 return;
Paul Mackerrasa08b1592009-05-11 15:46:10 +1000955 __perf_counter_sched_out(ctx, cpuctx);
956 cpuctx->task_ctx = NULL;
957}
958
Ingo Molnar235c7fc2008-12-21 14:43:25 +0100959static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
Ingo Molnar04289bb2008-12-11 08:38:42 +0100960{
Ingo Molnar235c7fc2008-12-21 14:43:25 +0100961 __perf_counter_sched_out(&cpuctx->ctx, cpuctx);
Ingo Molnar04289bb2008-12-11 08:38:42 +0100962}
963
Ingo Molnar235c7fc2008-12-21 14:43:25 +0100964static void
965__perf_counter_sched_in(struct perf_counter_context *ctx,
966 struct perf_cpu_context *cpuctx, int cpu)
Thomas Gleixner0793a612008-12-04 20:12:29 +0100967{
Thomas Gleixner0793a612008-12-04 20:12:29 +0100968 struct perf_counter *counter;
Paul Mackerrasdd0e6ba2009-01-12 15:11:00 +1100969 int can_add_hw = 1;
Thomas Gleixner0793a612008-12-04 20:12:29 +0100970
Thomas Gleixner0793a612008-12-04 20:12:29 +0100971 spin_lock(&ctx->lock);
Paul Mackerrasd859e292009-01-17 18:10:22 +1100972 ctx->is_active = 1;
973 if (likely(!ctx->nr_counters))
974 goto out;
975
Peter Zijlstra4af49982009-04-06 11:45:10 +0200976 ctx->timestamp = perf_clock();
Paul Mackerras53cfbf52009-03-25 22:46:58 +1100977
Peter Zijlstra9e35ad32009-05-13 16:21:38 +0200978 perf_disable();
Paul Mackerras3b6f9e52009-01-14 21:00:30 +1100979
980 /*
981 * First go through the list and put on any pinned groups
982 * in order to give them the best chance of going on.
983 */
Ingo Molnar04289bb2008-12-11 08:38:42 +0100984 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
Paul Mackerras3b6f9e52009-01-14 21:00:30 +1100985 if (counter->state <= PERF_COUNTER_STATE_OFF ||
986 !counter->hw_event.pinned)
987 continue;
988 if (counter->cpu != -1 && counter->cpu != cpu)
989 continue;
990
Peter Zijlstraafedadf2009-05-20 12:21:22 +0200991 if (counter != counter->group_leader)
992 counter_sched_in(counter, cpuctx, ctx, cpu);
993 else {
994 if (group_can_go_on(counter, cpuctx, 1))
995 group_sched_in(counter, cpuctx, ctx, cpu);
996 }
Paul Mackerras3b6f9e52009-01-14 21:00:30 +1100997
998 /*
999 * If this pinned group hasn't been scheduled,
1000 * put it in error state.
1001 */
Paul Mackerras53cfbf52009-03-25 22:46:58 +11001002 if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
1003 update_group_times(counter);
Paul Mackerras3b6f9e52009-01-14 21:00:30 +11001004 counter->state = PERF_COUNTER_STATE_ERROR;
Paul Mackerras53cfbf52009-03-25 22:46:58 +11001005 }
Paul Mackerras3b6f9e52009-01-14 21:00:30 +11001006 }
1007
1008 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
1009 /*
1010 * Ignore counters in OFF or ERROR state, and
1011 * ignore pinned counters since we did them already.
1012 */
1013 if (counter->state <= PERF_COUNTER_STATE_OFF ||
1014 counter->hw_event.pinned)
1015 continue;
1016
Ingo Molnar04289bb2008-12-11 08:38:42 +01001017 /*
1018 * Listen to the 'cpu' scheduling filter constraint
1019 * of counters:
1020 */
Thomas Gleixner0793a612008-12-04 20:12:29 +01001021 if (counter->cpu != -1 && counter->cpu != cpu)
1022 continue;
1023
Peter Zijlstraafedadf2009-05-20 12:21:22 +02001024 if (counter != counter->group_leader) {
1025 if (counter_sched_in(counter, cpuctx, ctx, cpu))
Paul Mackerrasdd0e6ba2009-01-12 15:11:00 +11001026 can_add_hw = 0;
Peter Zijlstraafedadf2009-05-20 12:21:22 +02001027 } else {
1028 if (group_can_go_on(counter, cpuctx, can_add_hw)) {
1029 if (group_sched_in(counter, cpuctx, ctx, cpu))
1030 can_add_hw = 0;
1031 }
Paul Mackerras3b6f9e52009-01-14 21:00:30 +11001032 }
Thomas Gleixner0793a612008-12-04 20:12:29 +01001033 }
Peter Zijlstra9e35ad32009-05-13 16:21:38 +02001034 perf_enable();
Paul Mackerrasd859e292009-01-17 18:10:22 +11001035 out:
Thomas Gleixner0793a612008-12-04 20:12:29 +01001036 spin_unlock(&ctx->lock);
Ingo Molnar235c7fc2008-12-21 14:43:25 +01001037}
Ingo Molnar04289bb2008-12-11 08:38:42 +01001038
Ingo Molnar235c7fc2008-12-21 14:43:25 +01001039/*
1040 * Called from scheduler to add the counters of the current task
1041 * with interrupts disabled.
1042 *
1043 * We restore the counter value and then enable it.
1044 *
1045 * This does not protect us against NMI, but enable()
1046 * sets the enabled bit in the control field of counter _before_
1047 * accessing the counter control register. If a NMI hits, then it will
1048 * keep the counter running.
1049 */
1050void perf_counter_task_sched_in(struct task_struct *task, int cpu)
1051{
1052 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10001053 struct perf_counter_context *ctx = task->perf_counter_ctxp;
Ingo Molnar235c7fc2008-12-21 14:43:25 +01001054
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10001055 if (likely(!ctx))
1056 return;
Paul Mackerras564c2b22009-05-22 14:27:22 +10001057 if (cpuctx->task_ctx == ctx)
1058 return;
Ingo Molnar235c7fc2008-12-21 14:43:25 +01001059 __perf_counter_sched_in(ctx, cpuctx, cpu);
Thomas Gleixner0793a612008-12-04 20:12:29 +01001060 cpuctx->task_ctx = ctx;
1061}
1062
Ingo Molnar235c7fc2008-12-21 14:43:25 +01001063static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
1064{
1065 struct perf_counter_context *ctx = &cpuctx->ctx;
1066
1067 __perf_counter_sched_in(ctx, cpuctx, cpu);
1068}
1069
Peter Zijlstraa78ac322009-05-25 17:39:05 +02001070#define MAX_INTERRUPTS (~0ULL)
1071
1072static void perf_log_throttle(struct perf_counter *counter, int enable);
Peter Zijlstra26b119b2009-05-20 12:21:20 +02001073static void perf_log_period(struct perf_counter *counter, u64 period);
1074
1075static void perf_adjust_freq(struct perf_counter_context *ctx)
Peter Zijlstra60db5e02009-05-15 15:19:28 +02001076{
1077 struct perf_counter *counter;
Peter Zijlstraa78ac322009-05-25 17:39:05 +02001078 u64 interrupts, irq_period;
Peter Zijlstra60db5e02009-05-15 15:19:28 +02001079 u64 events, period;
1080 s64 delta;
1081
1082 spin_lock(&ctx->lock);
1083 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
1084 if (counter->state != PERF_COUNTER_STATE_ACTIVE)
1085 continue;
1086
Peter Zijlstraa78ac322009-05-25 17:39:05 +02001087 interrupts = counter->hw.interrupts;
1088 counter->hw.interrupts = 0;
1089
1090 if (interrupts == MAX_INTERRUPTS) {
1091 perf_log_throttle(counter, 1);
1092 counter->pmu->unthrottle(counter);
1093 interrupts = 2*sysctl_perf_counter_limit/HZ;
1094 }
1095
Peter Zijlstra60db5e02009-05-15 15:19:28 +02001096 if (!counter->hw_event.freq || !counter->hw_event.irq_freq)
1097 continue;
1098
Peter Zijlstraa78ac322009-05-25 17:39:05 +02001099 events = HZ * interrupts * counter->hw.irq_period;
Peter Zijlstra60db5e02009-05-15 15:19:28 +02001100 period = div64_u64(events, counter->hw_event.irq_freq);
1101
1102 delta = (s64)(1 + period - counter->hw.irq_period);
1103 delta >>= 1;
1104
1105 irq_period = counter->hw.irq_period + delta;
1106
1107 if (!irq_period)
1108 irq_period = 1;
1109
Peter Zijlstra26b119b2009-05-20 12:21:20 +02001110 perf_log_period(counter, irq_period);
1111
Peter Zijlstra60db5e02009-05-15 15:19:28 +02001112 counter->hw.irq_period = irq_period;
Peter Zijlstra60db5e02009-05-15 15:19:28 +02001113 }
1114 spin_unlock(&ctx->lock);
1115}
1116
Ingo Molnar235c7fc2008-12-21 14:43:25 +01001117/*
1118 * Round-robin a context's counters:
1119 */
1120static void rotate_ctx(struct perf_counter_context *ctx)
Thomas Gleixner0793a612008-12-04 20:12:29 +01001121{
Thomas Gleixner0793a612008-12-04 20:12:29 +01001122 struct perf_counter *counter;
1123
Ingo Molnar235c7fc2008-12-21 14:43:25 +01001124 if (!ctx->nr_counters)
Thomas Gleixner0793a612008-12-04 20:12:29 +01001125 return;
1126
Thomas Gleixner0793a612008-12-04 20:12:29 +01001127 spin_lock(&ctx->lock);
Thomas Gleixner0793a612008-12-04 20:12:29 +01001128 /*
Ingo Molnar04289bb2008-12-11 08:38:42 +01001129 * Rotate the first entry last (works just fine for group counters too):
Thomas Gleixner0793a612008-12-04 20:12:29 +01001130 */
Peter Zijlstra9e35ad32009-05-13 16:21:38 +02001131 perf_disable();
Ingo Molnar04289bb2008-12-11 08:38:42 +01001132 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
Peter Zijlstra75564232009-03-13 12:21:29 +01001133 list_move_tail(&counter->list_entry, &ctx->counter_list);
Thomas Gleixner0793a612008-12-04 20:12:29 +01001134 break;
1135 }
Peter Zijlstra9e35ad32009-05-13 16:21:38 +02001136 perf_enable();
Thomas Gleixner0793a612008-12-04 20:12:29 +01001137
1138 spin_unlock(&ctx->lock);
Ingo Molnar235c7fc2008-12-21 14:43:25 +01001139}
Thomas Gleixner0793a612008-12-04 20:12:29 +01001140
Ingo Molnar235c7fc2008-12-21 14:43:25 +01001141void perf_counter_task_tick(struct task_struct *curr, int cpu)
1142{
Peter Zijlstra7fc23a52009-05-08 18:52:21 +02001143 struct perf_cpu_context *cpuctx;
1144 struct perf_counter_context *ctx;
1145
1146 if (!atomic_read(&nr_counters))
1147 return;
1148
1149 cpuctx = &per_cpu(perf_cpu_context, cpu);
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10001150 ctx = curr->perf_counter_ctxp;
Ingo Molnar235c7fc2008-12-21 14:43:25 +01001151
Peter Zijlstra60db5e02009-05-15 15:19:28 +02001152 perf_adjust_freq(&cpuctx->ctx);
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10001153 if (ctx)
1154 perf_adjust_freq(ctx);
Peter Zijlstra60db5e02009-05-15 15:19:28 +02001155
Ingo Molnarb82914c2009-05-04 18:54:32 +02001156 perf_counter_cpu_sched_out(cpuctx);
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10001157 if (ctx)
1158 __perf_counter_task_sched_out(ctx);
Ingo Molnar235c7fc2008-12-21 14:43:25 +01001159
Ingo Molnarb82914c2009-05-04 18:54:32 +02001160 rotate_ctx(&cpuctx->ctx);
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10001161 if (ctx)
1162 rotate_ctx(ctx);
Ingo Molnar235c7fc2008-12-21 14:43:25 +01001163
Ingo Molnarb82914c2009-05-04 18:54:32 +02001164 perf_counter_cpu_sched_in(cpuctx, cpu);
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10001165 if (ctx)
1166 perf_counter_task_sched_in(curr, cpu);
Thomas Gleixner0793a612008-12-04 20:12:29 +01001167}
1168
1169/*
Thomas Gleixner0793a612008-12-04 20:12:29 +01001170 * Cross CPU call to read the hardware counter
1171 */
Ingo Molnar76715812008-12-17 14:20:28 +01001172static void __read(void *info)
Thomas Gleixner0793a612008-12-04 20:12:29 +01001173{
Ingo Molnar621a01e2008-12-11 12:46:46 +01001174 struct perf_counter *counter = info;
Paul Mackerras53cfbf52009-03-25 22:46:58 +11001175 struct perf_counter_context *ctx = counter->ctx;
Ingo Molnaraa9c4c02008-12-17 14:10:57 +01001176 unsigned long flags;
Ingo Molnar621a01e2008-12-11 12:46:46 +01001177
Peter Zijlstra849691a2009-04-06 11:45:12 +02001178 local_irq_save(flags);
Paul Mackerras53cfbf52009-03-25 22:46:58 +11001179 if (ctx->is_active)
Peter Zijlstra4af49982009-04-06 11:45:10 +02001180 update_context_time(ctx);
Robert Richter4aeb0b42009-04-29 12:47:03 +02001181 counter->pmu->read(counter);
Paul Mackerras53cfbf52009-03-25 22:46:58 +11001182 update_counter_times(counter);
Peter Zijlstra849691a2009-04-06 11:45:12 +02001183 local_irq_restore(flags);
Thomas Gleixner0793a612008-12-04 20:12:29 +01001184}
1185
Ingo Molnar04289bb2008-12-11 08:38:42 +01001186static u64 perf_counter_read(struct perf_counter *counter)
Thomas Gleixner0793a612008-12-04 20:12:29 +01001187{
1188 /*
1189 * If counter is enabled and currently active on a CPU, update the
1190 * value in the counter structure:
1191 */
Ingo Molnar6a930702008-12-11 15:17:03 +01001192 if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
Thomas Gleixner0793a612008-12-04 20:12:29 +01001193 smp_call_function_single(counter->oncpu,
Ingo Molnar76715812008-12-17 14:20:28 +01001194 __read, counter, 1);
Paul Mackerras53cfbf52009-03-25 22:46:58 +11001195 } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
1196 update_counter_times(counter);
Thomas Gleixner0793a612008-12-04 20:12:29 +01001197 }
1198
Ingo Molnaree060942008-12-13 09:00:03 +01001199 return atomic64_read(&counter->count);
Thomas Gleixner0793a612008-12-04 20:12:29 +01001200}
1201
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10001202/*
1203 * Initialize the perf_counter context in a task_struct:
1204 */
1205static void
1206__perf_counter_init_context(struct perf_counter_context *ctx,
1207 struct task_struct *task)
1208{
1209 memset(ctx, 0, sizeof(*ctx));
1210 spin_lock_init(&ctx->lock);
1211 mutex_init(&ctx->mutex);
1212 INIT_LIST_HEAD(&ctx->counter_list);
1213 INIT_LIST_HEAD(&ctx->event_list);
1214 atomic_set(&ctx->refcount, 1);
1215 ctx->task = task;
1216}
1217
Thomas Gleixner0793a612008-12-04 20:12:29 +01001218static void put_context(struct perf_counter_context *ctx)
1219{
1220 if (ctx->task)
1221 put_task_struct(ctx->task);
1222}
1223
1224static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
1225{
1226 struct perf_cpu_context *cpuctx;
1227 struct perf_counter_context *ctx;
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10001228 struct perf_counter_context *tctx;
Thomas Gleixner0793a612008-12-04 20:12:29 +01001229 struct task_struct *task;
1230
1231 /*
1232 * If cpu is not a wildcard then this is a percpu counter:
1233 */
1234 if (cpu != -1) {
1235 /* Must be root to operate on a CPU counter: */
Peter Zijlstra1ccd1542009-04-09 10:53:45 +02001236 if (sysctl_perf_counter_priv && !capable(CAP_SYS_ADMIN))
Thomas Gleixner0793a612008-12-04 20:12:29 +01001237 return ERR_PTR(-EACCES);
1238
1239 if (cpu < 0 || cpu > num_possible_cpus())
1240 return ERR_PTR(-EINVAL);
1241
1242 /*
1243 * We could be clever and allow to attach a counter to an
1244 * offline CPU and activate it when the CPU comes up, but
1245 * that's for later.
1246 */
1247 if (!cpu_isset(cpu, cpu_online_map))
1248 return ERR_PTR(-ENODEV);
1249
1250 cpuctx = &per_cpu(perf_cpu_context, cpu);
1251 ctx = &cpuctx->ctx;
1252
Thomas Gleixner0793a612008-12-04 20:12:29 +01001253 return ctx;
1254 }
1255
1256 rcu_read_lock();
1257 if (!pid)
1258 task = current;
1259 else
1260 task = find_task_by_vpid(pid);
1261 if (task)
1262 get_task_struct(task);
1263 rcu_read_unlock();
1264
1265 if (!task)
1266 return ERR_PTR(-ESRCH);
1267
Thomas Gleixner0793a612008-12-04 20:12:29 +01001268 /* Reuse ptrace permission checks for now. */
1269 if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10001270 put_task_struct(task);
Thomas Gleixner0793a612008-12-04 20:12:29 +01001271 return ERR_PTR(-EACCES);
1272 }
1273
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10001274 ctx = task->perf_counter_ctxp;
1275 if (!ctx) {
1276 ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
1277 if (!ctx) {
1278 put_task_struct(task);
1279 return ERR_PTR(-ENOMEM);
1280 }
1281 __perf_counter_init_context(ctx, task);
1282 /*
1283 * Make sure other cpus see correct values for *ctx
1284 * once task->perf_counter_ctxp is visible to them.
1285 */
1286 smp_wmb();
1287 tctx = cmpxchg(&task->perf_counter_ctxp, NULL, ctx);
1288 if (tctx) {
1289 /*
1290 * We raced with some other task; use
1291 * the context they set.
1292 */
1293 kfree(ctx);
1294 ctx = tctx;
1295 }
1296 }
1297
Thomas Gleixner0793a612008-12-04 20:12:29 +01001298 return ctx;
1299}
1300
Peter Zijlstra592903c2009-03-13 12:21:36 +01001301static void free_counter_rcu(struct rcu_head *head)
1302{
1303 struct perf_counter *counter;
1304
1305 counter = container_of(head, struct perf_counter, rcu_head);
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10001306 put_ctx(counter->ctx);
Peter Zijlstra592903c2009-03-13 12:21:36 +01001307 kfree(counter);
1308}
1309
Peter Zijlstra925d5192009-03-30 19:07:02 +02001310static void perf_pending_sync(struct perf_counter *counter);
1311
Peter Zijlstraf1600952009-03-19 20:26:16 +01001312static void free_counter(struct perf_counter *counter)
1313{
Peter Zijlstra925d5192009-03-30 19:07:02 +02001314 perf_pending_sync(counter);
1315
Peter Zijlstra7fc23a52009-05-08 18:52:21 +02001316 atomic_dec(&nr_counters);
Peter Zijlstra9ee318a2009-04-09 10:53:44 +02001317 if (counter->hw_event.mmap)
1318 atomic_dec(&nr_mmap_tracking);
1319 if (counter->hw_event.munmap)
1320 atomic_dec(&nr_munmap_tracking);
1321 if (counter->hw_event.comm)
1322 atomic_dec(&nr_comm_tracking);
1323
Peter Zijlstrae077df42009-03-19 20:26:17 +01001324 if (counter->destroy)
1325 counter->destroy(counter);
1326
Peter Zijlstraf1600952009-03-19 20:26:16 +01001327 call_rcu(&counter->rcu_head, free_counter_rcu);
1328}
1329
Thomas Gleixner0793a612008-12-04 20:12:29 +01001330/*
1331 * Called when the last reference to the file is gone.
1332 */
1333static int perf_release(struct inode *inode, struct file *file)
1334{
1335 struct perf_counter *counter = file->private_data;
1336 struct perf_counter_context *ctx = counter->ctx;
1337
1338 file->private_data = NULL;
1339
Paul Mackerrasd859e292009-01-17 18:10:22 +11001340 mutex_lock(&ctx->mutex);
Ingo Molnar04289bb2008-12-11 08:38:42 +01001341 perf_counter_remove_from_context(counter);
Paul Mackerrasd859e292009-01-17 18:10:22 +11001342 mutex_unlock(&ctx->mutex);
Thomas Gleixner0793a612008-12-04 20:12:29 +01001343
Peter Zijlstra082ff5a2009-05-23 18:29:00 +02001344 mutex_lock(&counter->owner->perf_counter_mutex);
1345 list_del_init(&counter->owner_entry);
1346 mutex_unlock(&counter->owner->perf_counter_mutex);
1347 put_task_struct(counter->owner);
1348
Peter Zijlstraf1600952009-03-19 20:26:16 +01001349 free_counter(counter);
Mike Galbraith5af75912009-02-11 10:53:37 +01001350 put_context(ctx);
Thomas Gleixner0793a612008-12-04 20:12:29 +01001351
1352 return 0;
1353}
1354
1355/*
1356 * Read the performance counter - simple non blocking version for now
1357 */
1358static ssize_t
1359perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
1360{
Paul Mackerras53cfbf52009-03-25 22:46:58 +11001361 u64 values[3];
1362 int n;
Thomas Gleixner0793a612008-12-04 20:12:29 +01001363
Paul Mackerras3b6f9e52009-01-14 21:00:30 +11001364 /*
1365 * Return end-of-file for a read on a counter that is in
1366 * error state (i.e. because it was pinned but it couldn't be
1367 * scheduled on to the CPU at some point).
1368 */
1369 if (counter->state == PERF_COUNTER_STATE_ERROR)
1370 return 0;
1371
Peter Zijlstrafccc7142009-05-23 18:28:56 +02001372 mutex_lock(&counter->child_mutex);
Paul Mackerras53cfbf52009-03-25 22:46:58 +11001373 values[0] = perf_counter_read(counter);
1374 n = 1;
1375 if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1376 values[n++] = counter->total_time_enabled +
1377 atomic64_read(&counter->child_total_time_enabled);
1378 if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1379 values[n++] = counter->total_time_running +
1380 atomic64_read(&counter->child_total_time_running);
Peter Zijlstrafccc7142009-05-23 18:28:56 +02001381 mutex_unlock(&counter->child_mutex);
Thomas Gleixner0793a612008-12-04 20:12:29 +01001382
Paul Mackerras53cfbf52009-03-25 22:46:58 +11001383 if (count < n * sizeof(u64))
1384 return -EINVAL;
1385 count = n * sizeof(u64);
1386
1387 if (copy_to_user(buf, values, count))
1388 return -EFAULT;
1389
1390 return count;
Thomas Gleixner0793a612008-12-04 20:12:29 +01001391}
1392
1393static ssize_t
Thomas Gleixner0793a612008-12-04 20:12:29 +01001394perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
1395{
1396 struct perf_counter *counter = file->private_data;
1397
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001398 return perf_read_hw(counter, buf, count);
Thomas Gleixner0793a612008-12-04 20:12:29 +01001399}
1400
1401static unsigned int perf_poll(struct file *file, poll_table *wait)
1402{
1403 struct perf_counter *counter = file->private_data;
Peter Zijlstrac7138f32009-03-24 13:18:16 +01001404 struct perf_mmap_data *data;
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02001405 unsigned int events = POLL_HUP;
Peter Zijlstrac7138f32009-03-24 13:18:16 +01001406
1407 rcu_read_lock();
1408 data = rcu_dereference(counter->data);
1409 if (data)
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02001410 events = atomic_xchg(&data->poll, 0);
Peter Zijlstrac7138f32009-03-24 13:18:16 +01001411 rcu_read_unlock();
Thomas Gleixner0793a612008-12-04 20:12:29 +01001412
1413 poll_wait(file, &counter->waitq, wait);
1414
Thomas Gleixner0793a612008-12-04 20:12:29 +01001415 return events;
1416}
1417
Peter Zijlstra6de6a7b2009-05-05 17:50:23 +02001418static void perf_counter_reset(struct perf_counter *counter)
1419{
Peter Zijlstra3df5eda2009-05-08 18:52:22 +02001420 (void)perf_counter_read(counter);
Paul Mackerras615a3f12009-05-11 15:50:21 +10001421 atomic64_set(&counter->count, 0);
Peter Zijlstra3df5eda2009-05-08 18:52:22 +02001422 perf_counter_update_userpage(counter);
1423}
1424
1425static void perf_counter_for_each_sibling(struct perf_counter *counter,
1426 void (*func)(struct perf_counter *))
1427{
1428 struct perf_counter_context *ctx = counter->ctx;
1429 struct perf_counter *sibling;
1430
Peter Zijlstra682076a2009-05-23 18:28:57 +02001431 mutex_lock(&ctx->mutex);
Peter Zijlstra3df5eda2009-05-08 18:52:22 +02001432 counter = counter->group_leader;
1433
1434 func(counter);
1435 list_for_each_entry(sibling, &counter->sibling_list, list_entry)
1436 func(sibling);
Peter Zijlstra682076a2009-05-23 18:28:57 +02001437 mutex_unlock(&ctx->mutex);
Peter Zijlstra3df5eda2009-05-08 18:52:22 +02001438}
1439
1440static void perf_counter_for_each_child(struct perf_counter *counter,
1441 void (*func)(struct perf_counter *))
1442{
1443 struct perf_counter *child;
1444
Peter Zijlstrafccc7142009-05-23 18:28:56 +02001445 mutex_lock(&counter->child_mutex);
Peter Zijlstra3df5eda2009-05-08 18:52:22 +02001446 func(counter);
1447 list_for_each_entry(child, &counter->child_list, child_list)
1448 func(child);
Peter Zijlstrafccc7142009-05-23 18:28:56 +02001449 mutex_unlock(&counter->child_mutex);
Peter Zijlstra3df5eda2009-05-08 18:52:22 +02001450}
1451
1452static void perf_counter_for_each(struct perf_counter *counter,
1453 void (*func)(struct perf_counter *))
1454{
1455 struct perf_counter *child;
1456
Peter Zijlstrafccc7142009-05-23 18:28:56 +02001457 mutex_lock(&counter->child_mutex);
Peter Zijlstra3df5eda2009-05-08 18:52:22 +02001458 perf_counter_for_each_sibling(counter, func);
1459 list_for_each_entry(child, &counter->child_list, child_list)
1460 perf_counter_for_each_sibling(child, func);
Peter Zijlstrafccc7142009-05-23 18:28:56 +02001461 mutex_unlock(&counter->child_mutex);
Peter Zijlstra6de6a7b2009-05-05 17:50:23 +02001462}
1463
Paul Mackerrasd859e292009-01-17 18:10:22 +11001464static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1465{
1466 struct perf_counter *counter = file->private_data;
Peter Zijlstra3df5eda2009-05-08 18:52:22 +02001467 void (*func)(struct perf_counter *);
1468 u32 flags = arg;
Paul Mackerrasd859e292009-01-17 18:10:22 +11001469
1470 switch (cmd) {
1471 case PERF_COUNTER_IOC_ENABLE:
Peter Zijlstra3df5eda2009-05-08 18:52:22 +02001472 func = perf_counter_enable;
Paul Mackerrasd859e292009-01-17 18:10:22 +11001473 break;
1474 case PERF_COUNTER_IOC_DISABLE:
Peter Zijlstra3df5eda2009-05-08 18:52:22 +02001475 func = perf_counter_disable;
Peter Zijlstra79f14642009-04-06 11:45:07 +02001476 break;
Peter Zijlstra6de6a7b2009-05-05 17:50:23 +02001477 case PERF_COUNTER_IOC_RESET:
Peter Zijlstra3df5eda2009-05-08 18:52:22 +02001478 func = perf_counter_reset;
Peter Zijlstra6de6a7b2009-05-05 17:50:23 +02001479 break;
Peter Zijlstra3df5eda2009-05-08 18:52:22 +02001480
1481 case PERF_COUNTER_IOC_REFRESH:
1482 return perf_counter_refresh(counter, arg);
Paul Mackerrasd859e292009-01-17 18:10:22 +11001483 default:
Peter Zijlstra3df5eda2009-05-08 18:52:22 +02001484 return -ENOTTY;
Paul Mackerrasd859e292009-01-17 18:10:22 +11001485 }
Peter Zijlstra3df5eda2009-05-08 18:52:22 +02001486
1487 if (flags & PERF_IOC_FLAG_GROUP)
1488 perf_counter_for_each(counter, func);
1489 else
1490 perf_counter_for_each_child(counter, func);
1491
1492 return 0;
Paul Mackerrasd859e292009-01-17 18:10:22 +11001493}
1494
Peter Zijlstra771d7cd2009-05-25 14:45:26 +02001495int perf_counter_task_enable(void)
1496{
1497 struct perf_counter *counter;
1498
1499 mutex_lock(&current->perf_counter_mutex);
1500 list_for_each_entry(counter, &current->perf_counter_list, owner_entry)
1501 perf_counter_for_each_child(counter, perf_counter_enable);
1502 mutex_unlock(&current->perf_counter_mutex);
1503
1504 return 0;
1505}
1506
1507int perf_counter_task_disable(void)
1508{
1509 struct perf_counter *counter;
1510
1511 mutex_lock(&current->perf_counter_mutex);
1512 list_for_each_entry(counter, &current->perf_counter_list, owner_entry)
1513 perf_counter_for_each_child(counter, perf_counter_disable);
1514 mutex_unlock(&current->perf_counter_mutex);
1515
1516 return 0;
1517}
1518
Peter Zijlstra38ff6672009-03-30 19:07:03 +02001519/*
1520 * Callers need to ensure there can be no nesting of this function, otherwise
1521 * the seqlock logic goes bad. We can not serialize this because the arch
1522 * code calls this from NMI context.
1523 */
1524void perf_counter_update_userpage(struct perf_counter *counter)
Paul Mackerras37d81822009-03-23 18:22:08 +01001525{
Peter Zijlstra38ff6672009-03-30 19:07:03 +02001526 struct perf_mmap_data *data;
1527 struct perf_counter_mmap_page *userpg;
1528
1529 rcu_read_lock();
1530 data = rcu_dereference(counter->data);
1531 if (!data)
1532 goto unlock;
1533
1534 userpg = data->user_page;
Paul Mackerras37d81822009-03-23 18:22:08 +01001535
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001536 /*
1537 * Disable preemption so as to not let the corresponding user-space
1538 * spin too long if we get preempted.
1539 */
1540 preempt_disable();
Paul Mackerras37d81822009-03-23 18:22:08 +01001541 ++userpg->lock;
Peter Zijlstra92f22a32009-04-02 11:12:04 +02001542 barrier();
Paul Mackerras37d81822009-03-23 18:22:08 +01001543 userpg->index = counter->hw.idx;
1544 userpg->offset = atomic64_read(&counter->count);
1545 if (counter->state == PERF_COUNTER_STATE_ACTIVE)
1546 userpg->offset -= atomic64_read(&counter->hw.prev_count);
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001547
Peter Zijlstra92f22a32009-04-02 11:12:04 +02001548 barrier();
Paul Mackerras37d81822009-03-23 18:22:08 +01001549 ++userpg->lock;
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001550 preempt_enable();
Peter Zijlstra38ff6672009-03-30 19:07:03 +02001551unlock:
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001552 rcu_read_unlock();
Paul Mackerras37d81822009-03-23 18:22:08 +01001553}
1554
1555static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1556{
1557 struct perf_counter *counter = vma->vm_file->private_data;
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001558 struct perf_mmap_data *data;
1559 int ret = VM_FAULT_SIGBUS;
Paul Mackerras37d81822009-03-23 18:22:08 +01001560
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001561 rcu_read_lock();
1562 data = rcu_dereference(counter->data);
1563 if (!data)
1564 goto unlock;
Paul Mackerras37d81822009-03-23 18:22:08 +01001565
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001566 if (vmf->pgoff == 0) {
1567 vmf->page = virt_to_page(data->user_page);
1568 } else {
1569 int nr = vmf->pgoff - 1;
1570
1571 if ((unsigned)nr > data->nr_pages)
1572 goto unlock;
1573
1574 vmf->page = virt_to_page(data->data_pages[nr]);
1575 }
Paul Mackerras37d81822009-03-23 18:22:08 +01001576 get_page(vmf->page);
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001577 ret = 0;
1578unlock:
1579 rcu_read_unlock();
1580
1581 return ret;
1582}
1583
1584static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
1585{
1586 struct perf_mmap_data *data;
1587 unsigned long size;
1588 int i;
1589
1590 WARN_ON(atomic_read(&counter->mmap_count));
1591
1592 size = sizeof(struct perf_mmap_data);
1593 size += nr_pages * sizeof(void *);
1594
1595 data = kzalloc(size, GFP_KERNEL);
1596 if (!data)
1597 goto fail;
1598
1599 data->user_page = (void *)get_zeroed_page(GFP_KERNEL);
1600 if (!data->user_page)
1601 goto fail_user_page;
1602
1603 for (i = 0; i < nr_pages; i++) {
1604 data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL);
1605 if (!data->data_pages[i])
1606 goto fail_data_pages;
1607 }
1608
1609 data->nr_pages = nr_pages;
Peter Zijlstra22c15582009-05-05 17:50:25 +02001610 atomic_set(&data->lock, -1);
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001611
1612 rcu_assign_pointer(counter->data, data);
1613
Paul Mackerras37d81822009-03-23 18:22:08 +01001614 return 0;
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001615
1616fail_data_pages:
1617 for (i--; i >= 0; i--)
1618 free_page((unsigned long)data->data_pages[i]);
1619
1620 free_page((unsigned long)data->user_page);
1621
1622fail_user_page:
1623 kfree(data);
1624
1625fail:
1626 return -ENOMEM;
1627}
1628
1629static void __perf_mmap_data_free(struct rcu_head *rcu_head)
1630{
1631 struct perf_mmap_data *data = container_of(rcu_head,
1632 struct perf_mmap_data, rcu_head);
1633 int i;
1634
1635 free_page((unsigned long)data->user_page);
1636 for (i = 0; i < data->nr_pages; i++)
1637 free_page((unsigned long)data->data_pages[i]);
1638 kfree(data);
1639}
1640
1641static void perf_mmap_data_free(struct perf_counter *counter)
1642{
1643 struct perf_mmap_data *data = counter->data;
1644
1645 WARN_ON(atomic_read(&counter->mmap_count));
1646
1647 rcu_assign_pointer(counter->data, NULL);
1648 call_rcu(&data->rcu_head, __perf_mmap_data_free);
1649}
1650
1651static void perf_mmap_open(struct vm_area_struct *vma)
1652{
1653 struct perf_counter *counter = vma->vm_file->private_data;
1654
1655 atomic_inc(&counter->mmap_count);
1656}
1657
1658static void perf_mmap_close(struct vm_area_struct *vma)
1659{
1660 struct perf_counter *counter = vma->vm_file->private_data;
1661
1662 if (atomic_dec_and_mutex_lock(&counter->mmap_count,
1663 &counter->mmap_mutex)) {
Peter Zijlstra789f90f2009-05-15 15:19:27 +02001664 struct user_struct *user = current_user();
1665
1666 atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm);
Peter Zijlstrac5078f72009-05-05 17:50:24 +02001667 vma->vm_mm->locked_vm -= counter->data->nr_locked;
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001668 perf_mmap_data_free(counter);
1669 mutex_unlock(&counter->mmap_mutex);
1670 }
Paul Mackerras37d81822009-03-23 18:22:08 +01001671}
1672
1673static struct vm_operations_struct perf_mmap_vmops = {
Peter Zijlstraebb3c4c2009-04-06 11:45:05 +02001674 .open = perf_mmap_open,
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001675 .close = perf_mmap_close,
Paul Mackerras37d81822009-03-23 18:22:08 +01001676 .fault = perf_mmap_fault,
1677};
1678
1679static int perf_mmap(struct file *file, struct vm_area_struct *vma)
1680{
1681 struct perf_counter *counter = file->private_data;
Peter Zijlstra789f90f2009-05-15 15:19:27 +02001682 struct user_struct *user = current_user();
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001683 unsigned long vma_size;
1684 unsigned long nr_pages;
Peter Zijlstra789f90f2009-05-15 15:19:27 +02001685 unsigned long user_locked, user_lock_limit;
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001686 unsigned long locked, lock_limit;
Peter Zijlstra789f90f2009-05-15 15:19:27 +02001687 long user_extra, extra;
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001688 int ret = 0;
Paul Mackerras37d81822009-03-23 18:22:08 +01001689
1690 if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
1691 return -EINVAL;
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001692
1693 vma_size = vma->vm_end - vma->vm_start;
1694 nr_pages = (vma_size / PAGE_SIZE) - 1;
1695
Peter Zijlstra7730d862009-03-25 12:48:31 +01001696 /*
1697 * If we have data pages ensure they're a power-of-two number, so we
1698 * can do bitmasks instead of modulo.
1699 */
1700 if (nr_pages != 0 && !is_power_of_2(nr_pages))
Paul Mackerras37d81822009-03-23 18:22:08 +01001701 return -EINVAL;
1702
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001703 if (vma_size != PAGE_SIZE * (1 + nr_pages))
Paul Mackerras37d81822009-03-23 18:22:08 +01001704 return -EINVAL;
1705
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001706 if (vma->vm_pgoff != 0)
1707 return -EINVAL;
Paul Mackerras37d81822009-03-23 18:22:08 +01001708
Peter Zijlstraebb3c4c2009-04-06 11:45:05 +02001709 mutex_lock(&counter->mmap_mutex);
1710 if (atomic_inc_not_zero(&counter->mmap_count)) {
1711 if (nr_pages != counter->data->nr_pages)
1712 ret = -EINVAL;
1713 goto unlock;
1714 }
1715
Peter Zijlstra789f90f2009-05-15 15:19:27 +02001716 user_extra = nr_pages + 1;
1717 user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
Ingo Molnara3862d32009-05-24 09:02:37 +02001718
1719 /*
1720 * Increase the limit linearly with more CPUs:
1721 */
1722 user_lock_limit *= num_online_cpus();
1723
Peter Zijlstra789f90f2009-05-15 15:19:27 +02001724 user_locked = atomic_long_read(&user->locked_vm) + user_extra;
Peter Zijlstrac5078f72009-05-05 17:50:24 +02001725
Peter Zijlstra789f90f2009-05-15 15:19:27 +02001726 extra = 0;
1727 if (user_locked > user_lock_limit)
1728 extra = user_locked - user_lock_limit;
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001729
1730 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
1731 lock_limit >>= PAGE_SHIFT;
Peter Zijlstra789f90f2009-05-15 15:19:27 +02001732 locked = vma->vm_mm->locked_vm + extra;
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001733
Peter Zijlstraebb3c4c2009-04-06 11:45:05 +02001734 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
1735 ret = -EPERM;
1736 goto unlock;
1737 }
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001738
1739 WARN_ON(counter->data);
1740 ret = perf_mmap_data_alloc(counter, nr_pages);
Peter Zijlstraebb3c4c2009-04-06 11:45:05 +02001741 if (ret)
1742 goto unlock;
1743
1744 atomic_set(&counter->mmap_count, 1);
Peter Zijlstra789f90f2009-05-15 15:19:27 +02001745 atomic_long_add(user_extra, &user->locked_vm);
Peter Zijlstrac5078f72009-05-05 17:50:24 +02001746 vma->vm_mm->locked_vm += extra;
1747 counter->data->nr_locked = extra;
Peter Zijlstraebb3c4c2009-04-06 11:45:05 +02001748unlock:
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001749 mutex_unlock(&counter->mmap_mutex);
Paul Mackerras37d81822009-03-23 18:22:08 +01001750
1751 vma->vm_flags &= ~VM_MAYWRITE;
1752 vma->vm_flags |= VM_RESERVED;
1753 vma->vm_ops = &perf_mmap_vmops;
Peter Zijlstra7b732a72009-03-23 18:22:10 +01001754
1755 return ret;
Paul Mackerras37d81822009-03-23 18:22:08 +01001756}
1757
Peter Zijlstra3c446b3d2009-04-06 11:45:01 +02001758static int perf_fasync(int fd, struct file *filp, int on)
1759{
1760 struct perf_counter *counter = filp->private_data;
1761 struct inode *inode = filp->f_path.dentry->d_inode;
1762 int retval;
1763
1764 mutex_lock(&inode->i_mutex);
1765 retval = fasync_helper(fd, filp, on, &counter->fasync);
1766 mutex_unlock(&inode->i_mutex);
1767
1768 if (retval < 0)
1769 return retval;
1770
1771 return 0;
1772}
1773
Thomas Gleixner0793a612008-12-04 20:12:29 +01001774static const struct file_operations perf_fops = {
1775 .release = perf_release,
1776 .read = perf_read,
1777 .poll = perf_poll,
Paul Mackerrasd859e292009-01-17 18:10:22 +11001778 .unlocked_ioctl = perf_ioctl,
1779 .compat_ioctl = perf_ioctl,
Paul Mackerras37d81822009-03-23 18:22:08 +01001780 .mmap = perf_mmap,
Peter Zijlstra3c446b3d2009-04-06 11:45:01 +02001781 .fasync = perf_fasync,
Thomas Gleixner0793a612008-12-04 20:12:29 +01001782};
1783
Peter Zijlstra15dbf272009-03-13 12:21:32 +01001784/*
Peter Zijlstra925d5192009-03-30 19:07:02 +02001785 * Perf counter wakeup
1786 *
1787 * If there's data, ensure we set the poll() state and publish everything
1788 * to user-space before waking everybody up.
1789 */
1790
1791void perf_counter_wakeup(struct perf_counter *counter)
1792{
Peter Zijlstra925d5192009-03-30 19:07:02 +02001793 wake_up_all(&counter->waitq);
Peter Zijlstra4c9e2542009-04-06 11:45:09 +02001794
1795 if (counter->pending_kill) {
1796 kill_fasync(&counter->fasync, SIGIO, counter->pending_kill);
1797 counter->pending_kill = 0;
1798 }
Peter Zijlstra925d5192009-03-30 19:07:02 +02001799}
1800
1801/*
1802 * Pending wakeups
1803 *
1804 * Handle the case where we need to wakeup up from NMI (or rq->lock) context.
1805 *
1806 * The NMI bit means we cannot possibly take locks. Therefore, maintain a
1807 * single linked list and use cmpxchg() to add entries lockless.
1808 */
1809
Peter Zijlstra79f14642009-04-06 11:45:07 +02001810static void perf_pending_counter(struct perf_pending_entry *entry)
1811{
1812 struct perf_counter *counter = container_of(entry,
1813 struct perf_counter, pending);
1814
1815 if (counter->pending_disable) {
1816 counter->pending_disable = 0;
1817 perf_counter_disable(counter);
1818 }
1819
1820 if (counter->pending_wakeup) {
1821 counter->pending_wakeup = 0;
1822 perf_counter_wakeup(counter);
1823 }
1824}
1825
Peter Zijlstra671dec52009-04-06 11:45:02 +02001826#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
Peter Zijlstra925d5192009-03-30 19:07:02 +02001827
Peter Zijlstra671dec52009-04-06 11:45:02 +02001828static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
Peter Zijlstra925d5192009-03-30 19:07:02 +02001829 PENDING_TAIL,
1830};
1831
Peter Zijlstra671dec52009-04-06 11:45:02 +02001832static void perf_pending_queue(struct perf_pending_entry *entry,
1833 void (*func)(struct perf_pending_entry *))
Peter Zijlstra925d5192009-03-30 19:07:02 +02001834{
Peter Zijlstra671dec52009-04-06 11:45:02 +02001835 struct perf_pending_entry **head;
Peter Zijlstra925d5192009-03-30 19:07:02 +02001836
Peter Zijlstra671dec52009-04-06 11:45:02 +02001837 if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
Peter Zijlstra925d5192009-03-30 19:07:02 +02001838 return;
1839
Peter Zijlstra671dec52009-04-06 11:45:02 +02001840 entry->func = func;
1841
1842 head = &get_cpu_var(perf_pending_head);
Peter Zijlstra925d5192009-03-30 19:07:02 +02001843
1844 do {
Peter Zijlstra671dec52009-04-06 11:45:02 +02001845 entry->next = *head;
1846 } while (cmpxchg(head, entry->next, entry) != entry->next);
Peter Zijlstra925d5192009-03-30 19:07:02 +02001847
1848 set_perf_counter_pending();
1849
Peter Zijlstra671dec52009-04-06 11:45:02 +02001850 put_cpu_var(perf_pending_head);
Peter Zijlstra925d5192009-03-30 19:07:02 +02001851}
1852
1853static int __perf_pending_run(void)
1854{
Peter Zijlstra671dec52009-04-06 11:45:02 +02001855 struct perf_pending_entry *list;
Peter Zijlstra925d5192009-03-30 19:07:02 +02001856 int nr = 0;
1857
Peter Zijlstra671dec52009-04-06 11:45:02 +02001858 list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
Peter Zijlstra925d5192009-03-30 19:07:02 +02001859 while (list != PENDING_TAIL) {
Peter Zijlstra671dec52009-04-06 11:45:02 +02001860 void (*func)(struct perf_pending_entry *);
1861 struct perf_pending_entry *entry = list;
Peter Zijlstra925d5192009-03-30 19:07:02 +02001862
1863 list = list->next;
1864
Peter Zijlstra671dec52009-04-06 11:45:02 +02001865 func = entry->func;
1866 entry->next = NULL;
Peter Zijlstra925d5192009-03-30 19:07:02 +02001867 /*
1868 * Ensure we observe the unqueue before we issue the wakeup,
1869 * so that we won't be waiting forever.
1870 * -- see perf_not_pending().
1871 */
1872 smp_wmb();
1873
Peter Zijlstra671dec52009-04-06 11:45:02 +02001874 func(entry);
Peter Zijlstra925d5192009-03-30 19:07:02 +02001875 nr++;
1876 }
1877
1878 return nr;
1879}
1880
1881static inline int perf_not_pending(struct perf_counter *counter)
1882{
1883 /*
1884 * If we flush on whatever cpu we run, there is a chance we don't
1885 * need to wait.
1886 */
1887 get_cpu();
1888 __perf_pending_run();
1889 put_cpu();
1890
1891 /*
1892 * Ensure we see the proper queue state before going to sleep
1893 * so that we do not miss the wakeup. -- see perf_pending_handle()
1894 */
1895 smp_rmb();
Peter Zijlstra671dec52009-04-06 11:45:02 +02001896 return counter->pending.next == NULL;
Peter Zijlstra925d5192009-03-30 19:07:02 +02001897}
1898
1899static void perf_pending_sync(struct perf_counter *counter)
1900{
1901 wait_event(counter->waitq, perf_not_pending(counter));
1902}
1903
1904void perf_counter_do_pending(void)
1905{
1906 __perf_pending_run();
1907}
1908
1909/*
Peter Zijlstra394ee072009-03-30 19:07:14 +02001910 * Callchain support -- arch specific
1911 */
1912
Peter Zijlstra9c03d882009-04-06 11:45:00 +02001913__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
Peter Zijlstra394ee072009-03-30 19:07:14 +02001914{
1915 return NULL;
1916}
1917
1918/*
Peter Zijlstra0322cd62009-03-19 20:26:19 +01001919 * Output
1920 */
1921
Peter Zijlstrab9cacc72009-03-25 12:30:22 +01001922struct perf_output_handle {
1923 struct perf_counter *counter;
1924 struct perf_mmap_data *data;
1925 unsigned int offset;
Peter Zijlstra63e35b22009-03-25 12:30:24 +01001926 unsigned int head;
Peter Zijlstra78d613e2009-03-30 19:07:11 +02001927 int nmi;
Peter Zijlstra4c9e2542009-04-06 11:45:09 +02001928 int overflow;
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02001929 int locked;
1930 unsigned long flags;
Peter Zijlstrab9cacc72009-03-25 12:30:22 +01001931};
1932
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02001933static void perf_output_wakeup(struct perf_output_handle *handle)
Peter Zijlstra78d613e2009-03-30 19:07:11 +02001934{
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02001935 atomic_set(&handle->data->poll, POLL_IN);
1936
Peter Zijlstra671dec52009-04-06 11:45:02 +02001937 if (handle->nmi) {
Peter Zijlstra79f14642009-04-06 11:45:07 +02001938 handle->counter->pending_wakeup = 1;
Peter Zijlstra671dec52009-04-06 11:45:02 +02001939 perf_pending_queue(&handle->counter->pending,
Peter Zijlstra79f14642009-04-06 11:45:07 +02001940 perf_pending_counter);
Peter Zijlstra671dec52009-04-06 11:45:02 +02001941 } else
Peter Zijlstra78d613e2009-03-30 19:07:11 +02001942 perf_counter_wakeup(handle->counter);
1943}
1944
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02001945/*
1946 * Curious locking construct.
1947 *
1948 * We need to ensure a later event doesn't publish a head when a former
1949 * event isn't done writing. However since we need to deal with NMIs we
1950 * cannot fully serialize things.
1951 *
1952 * What we do is serialize between CPUs so we only have to deal with NMI
1953 * nesting on a single CPU.
1954 *
1955 * We only publish the head (and generate a wakeup) when the outer-most
1956 * event completes.
1957 */
1958static void perf_output_lock(struct perf_output_handle *handle)
1959{
1960 struct perf_mmap_data *data = handle->data;
1961 int cpu;
1962
1963 handle->locked = 0;
1964
1965 local_irq_save(handle->flags);
1966 cpu = smp_processor_id();
1967
1968 if (in_nmi() && atomic_read(&data->lock) == cpu)
1969 return;
1970
Peter Zijlstra22c15582009-05-05 17:50:25 +02001971 while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02001972 cpu_relax();
1973
1974 handle->locked = 1;
1975}
1976
1977static void perf_output_unlock(struct perf_output_handle *handle)
1978{
1979 struct perf_mmap_data *data = handle->data;
1980 int head, cpu;
1981
Peter Zijlstrac66de4a2009-05-05 17:50:22 +02001982 data->done_head = data->head;
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02001983
1984 if (!handle->locked)
1985 goto out;
1986
1987again:
1988 /*
1989 * The xchg implies a full barrier that ensures all writes are done
1990 * before we publish the new head, matched by a rmb() in userspace when
1991 * reading this position.
1992 */
Peter Zijlstrac66de4a2009-05-05 17:50:22 +02001993 while ((head = atomic_xchg(&data->done_head, 0)))
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02001994 data->user_page->data_head = head;
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02001995
1996 /*
Peter Zijlstrac66de4a2009-05-05 17:50:22 +02001997 * NMI can happen here, which means we can miss a done_head update.
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02001998 */
1999
Peter Zijlstra22c15582009-05-05 17:50:25 +02002000 cpu = atomic_xchg(&data->lock, -1);
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02002001 WARN_ON_ONCE(cpu != smp_processor_id());
2002
2003 /*
2004 * Therefore we have to validate we did not indeed do so.
2005 */
Peter Zijlstrac66de4a2009-05-05 17:50:22 +02002006 if (unlikely(atomic_read(&data->done_head))) {
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02002007 /*
2008 * Since we had it locked, we can lock it again.
2009 */
Peter Zijlstra22c15582009-05-05 17:50:25 +02002010 while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02002011 cpu_relax();
2012
2013 goto again;
2014 }
2015
Peter Zijlstrac66de4a2009-05-05 17:50:22 +02002016 if (atomic_xchg(&data->wakeup, 0))
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02002017 perf_output_wakeup(handle);
2018out:
2019 local_irq_restore(handle->flags);
2020}
2021
Peter Zijlstrab9cacc72009-03-25 12:30:22 +01002022static int perf_output_begin(struct perf_output_handle *handle,
Peter Zijlstra78d613e2009-03-30 19:07:11 +02002023 struct perf_counter *counter, unsigned int size,
Peter Zijlstra4c9e2542009-04-06 11:45:09 +02002024 int nmi, int overflow)
Peter Zijlstra0322cd62009-03-19 20:26:19 +01002025{
Peter Zijlstra7b732a72009-03-23 18:22:10 +01002026 struct perf_mmap_data *data;
Peter Zijlstrab9cacc72009-03-25 12:30:22 +01002027 unsigned int offset, head;
Peter Zijlstra0322cd62009-03-19 20:26:19 +01002028
Peter Zijlstra2023b352009-05-05 17:50:26 +02002029 /*
2030 * For inherited counters we send all the output towards the parent.
2031 */
2032 if (counter->parent)
2033 counter = counter->parent;
2034
Peter Zijlstra7b732a72009-03-23 18:22:10 +01002035 rcu_read_lock();
Peter Zijlstra7b732a72009-03-23 18:22:10 +01002036 data = rcu_dereference(counter->data);
2037 if (!data)
2038 goto out;
Peter Zijlstra0322cd62009-03-19 20:26:19 +01002039
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02002040 handle->data = data;
Peter Zijlstra4c9e2542009-04-06 11:45:09 +02002041 handle->counter = counter;
2042 handle->nmi = nmi;
2043 handle->overflow = overflow;
Peter Zijlstra78d613e2009-03-30 19:07:11 +02002044
Peter Zijlstra7b732a72009-03-23 18:22:10 +01002045 if (!data->nr_pages)
Peter Zijlstra78d613e2009-03-30 19:07:11 +02002046 goto fail;
Peter Zijlstra7b732a72009-03-23 18:22:10 +01002047
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02002048 perf_output_lock(handle);
2049
Peter Zijlstra7b732a72009-03-23 18:22:10 +01002050 do {
2051 offset = head = atomic_read(&data->head);
Peter Zijlstrac7138f32009-03-24 13:18:16 +01002052 head += size;
Peter Zijlstra7b732a72009-03-23 18:22:10 +01002053 } while (atomic_cmpxchg(&data->head, offset, head) != offset);
2054
Peter Zijlstrab9cacc72009-03-25 12:30:22 +01002055 handle->offset = offset;
Peter Zijlstra63e35b22009-03-25 12:30:24 +01002056 handle->head = head;
Peter Zijlstrac66de4a2009-05-05 17:50:22 +02002057
2058 if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT))
2059 atomic_set(&data->wakeup, 1);
Peter Zijlstra7b732a72009-03-23 18:22:10 +01002060
Peter Zijlstrab9cacc72009-03-25 12:30:22 +01002061 return 0;
Peter Zijlstra7b732a72009-03-23 18:22:10 +01002062
Peter Zijlstra78d613e2009-03-30 19:07:11 +02002063fail:
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02002064 perf_output_wakeup(handle);
Peter Zijlstra7b732a72009-03-23 18:22:10 +01002065out:
2066 rcu_read_unlock();
2067
Peter Zijlstrab9cacc72009-03-25 12:30:22 +01002068 return -ENOSPC;
2069}
2070
2071static void perf_output_copy(struct perf_output_handle *handle,
2072 void *buf, unsigned int len)
2073{
2074 unsigned int pages_mask;
2075 unsigned int offset;
2076 unsigned int size;
2077 void **pages;
2078
2079 offset = handle->offset;
2080 pages_mask = handle->data->nr_pages - 1;
2081 pages = handle->data->data_pages;
2082
2083 do {
2084 unsigned int page_offset;
2085 int nr;
2086
2087 nr = (offset >> PAGE_SHIFT) & pages_mask;
2088 page_offset = offset & (PAGE_SIZE - 1);
2089 size = min_t(unsigned int, PAGE_SIZE - page_offset, len);
2090
2091 memcpy(pages[nr] + page_offset, buf, size);
2092
2093 len -= size;
2094 buf += size;
2095 offset += size;
2096 } while (len);
2097
2098 handle->offset = offset;
Peter Zijlstra63e35b22009-03-25 12:30:24 +01002099
Peter Zijlstra53020fe2009-05-13 21:26:19 +02002100 /*
2101 * Check we didn't copy past our reservation window, taking the
2102 * possible unsigned int wrap into account.
2103 */
2104 WARN_ON_ONCE(((int)(handle->head - handle->offset)) < 0);
Peter Zijlstrab9cacc72009-03-25 12:30:22 +01002105}
2106
Peter Zijlstra5c148192009-03-25 12:30:23 +01002107#define perf_output_put(handle, x) \
2108 perf_output_copy((handle), &(x), sizeof(x))
2109
Peter Zijlstra78d613e2009-03-30 19:07:11 +02002110static void perf_output_end(struct perf_output_handle *handle)
Peter Zijlstrab9cacc72009-03-25 12:30:22 +01002111{
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02002112 struct perf_counter *counter = handle->counter;
2113 struct perf_mmap_data *data = handle->data;
2114
2115 int wakeup_events = counter->hw_event.wakeup_events;
Peter Zijlstrac4578102009-04-02 11:12:01 +02002116
Peter Zijlstra4c9e2542009-04-06 11:45:09 +02002117 if (handle->overflow && wakeup_events) {
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02002118 int events = atomic_inc_return(&data->events);
Peter Zijlstrac4578102009-04-02 11:12:01 +02002119 if (events >= wakeup_events) {
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02002120 atomic_sub(wakeup_events, &data->events);
Peter Zijlstrac66de4a2009-05-05 17:50:22 +02002121 atomic_set(&data->wakeup, 1);
Peter Zijlstrac4578102009-04-02 11:12:01 +02002122 }
Peter Zijlstrac33a0bc2009-05-01 12:23:16 +02002123 }
2124
2125 perf_output_unlock(handle);
Peter Zijlstrab9cacc72009-03-25 12:30:22 +01002126 rcu_read_unlock();
2127}
2128
Peter Zijlstraf6c7d5f2009-04-06 11:45:04 +02002129static void perf_counter_output(struct perf_counter *counter,
Peter Zijlstra78f13e92009-04-08 15:01:33 +02002130 int nmi, struct pt_regs *regs, u64 addr)
Peter Zijlstra7b732a72009-03-23 18:22:10 +01002131{
Peter Zijlstra5ed00412009-03-30 19:07:12 +02002132 int ret;
Peter Zijlstra8a057d82009-04-02 11:11:59 +02002133 u64 record_type = counter->hw_event.record_type;
Peter Zijlstra5ed00412009-03-30 19:07:12 +02002134 struct perf_output_handle handle;
2135 struct perf_event_header header;
2136 u64 ip;
Peter Zijlstra5c148192009-03-25 12:30:23 +01002137 struct {
Peter Zijlstraea5d20c2009-03-25 12:30:25 +01002138 u32 pid, tid;
Peter Zijlstra5ed00412009-03-30 19:07:12 +02002139 } tid_entry;
Peter Zijlstra8a057d82009-04-02 11:11:59 +02002140 struct {
2141 u64 event;
2142 u64 counter;
2143 } group_entry;
Peter Zijlstra394ee072009-03-30 19:07:14 +02002144 struct perf_callchain_entry *callchain = NULL;
2145 int callchain_size = 0;
Peter Zijlstra339f7c92009-04-06 11:45:06 +02002146 u64 time;
Peter Zijlstraf370e1e2009-05-08 18:52:24 +02002147 struct {
2148 u32 cpu, reserved;
2149 } cpu_entry;
Peter Zijlstra7b732a72009-03-23 18:22:10 +01002150
Peter Zijlstra6b6e5482009-04-08 15:01:27 +02002151 header.type = 0;
Peter Zijlstra5ed00412009-03-30 19:07:12 +02002152 header.size = sizeof(header);
Peter Zijlstra7b732a72009-03-23 18:22:10 +01002153
Peter Zijlstra6b6e5482009-04-08 15:01:27 +02002154 header.misc = PERF_EVENT_MISC_OVERFLOW;
Paul Mackerras9d23a902009-05-14 21:48:08 +10002155 header.misc |= perf_misc_flags(regs);
Peter Zijlstra6fab0192009-04-08 15:01:26 +02002156
Peter Zijlstra8a057d82009-04-02 11:11:59 +02002157 if (record_type & PERF_RECORD_IP) {
Paul Mackerras9d23a902009-05-14 21:48:08 +10002158 ip = perf_instruction_pointer(regs);
Peter Zijlstra6b6e5482009-04-08 15:01:27 +02002159 header.type |= PERF_RECORD_IP;
Peter Zijlstra8a057d82009-04-02 11:11:59 +02002160 header.size += sizeof(ip);
2161 }
Peter Zijlstraea5d20c2009-03-25 12:30:25 +01002162
Peter Zijlstra8a057d82009-04-02 11:11:59 +02002163 if (record_type & PERF_RECORD_TID) {
Peter Zijlstraea5d20c2009-03-25 12:30:25 +01002164 /* namespace issues */
Peter Zijlstra5ed00412009-03-30 19:07:12 +02002165 tid_entry.pid = current->group_leader->pid;
2166 tid_entry.tid = current->pid;
Peter Zijlstraea5d20c2009-03-25 12:30:25 +01002167
Peter Zijlstra6b6e5482009-04-08 15:01:27 +02002168 header.type |= PERF_RECORD_TID;
Peter Zijlstra5ed00412009-03-30 19:07:12 +02002169 header.size += sizeof(tid_entry);
2170 }
Peter Zijlstraea5d20c2009-03-25 12:30:25 +01002171
Peter Zijlstra4d855452009-04-08 15:01:32 +02002172 if (record_type & PERF_RECORD_TIME) {
2173 /*
2174 * Maybe do better on x86 and provide cpu_clock_nmi()
2175 */
2176 time = sched_clock();
2177
2178 header.type |= PERF_RECORD_TIME;
2179 header.size += sizeof(u64);
2180 }
2181
Peter Zijlstra78f13e92009-04-08 15:01:33 +02002182 if (record_type & PERF_RECORD_ADDR) {
2183 header.type |= PERF_RECORD_ADDR;
2184 header.size += sizeof(u64);
2185 }
2186
Peter Zijlstraa85f61a2009-05-08 18:52:23 +02002187 if (record_type & PERF_RECORD_CONFIG) {
2188 header.type |= PERF_RECORD_CONFIG;
2189 header.size += sizeof(u64);
2190 }
2191
Peter Zijlstraf370e1e2009-05-08 18:52:24 +02002192 if (record_type & PERF_RECORD_CPU) {
2193 header.type |= PERF_RECORD_CPU;
2194 header.size += sizeof(cpu_entry);
2195
2196 cpu_entry.cpu = raw_smp_processor_id();
2197 }
2198
Peter Zijlstra8a057d82009-04-02 11:11:59 +02002199 if (record_type & PERF_RECORD_GROUP) {
Peter Zijlstra6b6e5482009-04-08 15:01:27 +02002200 header.type |= PERF_RECORD_GROUP;
Peter Zijlstra8a057d82009-04-02 11:11:59 +02002201 header.size += sizeof(u64) +
2202 counter->nr_siblings * sizeof(group_entry);
2203 }
2204
2205 if (record_type & PERF_RECORD_CALLCHAIN) {
Peter Zijlstra394ee072009-03-30 19:07:14 +02002206 callchain = perf_callchain(regs);
2207
2208 if (callchain) {
Peter Zijlstra9c03d882009-04-06 11:45:00 +02002209 callchain_size = (1 + callchain->nr) * sizeof(u64);
Peter Zijlstra394ee072009-03-30 19:07:14 +02002210
Peter Zijlstra6b6e5482009-04-08 15:01:27 +02002211 header.type |= PERF_RECORD_CALLCHAIN;
Peter Zijlstra394ee072009-03-30 19:07:14 +02002212 header.size += callchain_size;
2213 }
2214 }
2215
Peter Zijlstra4c9e2542009-04-06 11:45:09 +02002216 ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
Peter Zijlstra5ed00412009-03-30 19:07:12 +02002217 if (ret)
2218 return;
Peter Zijlstraea5d20c2009-03-25 12:30:25 +01002219
Peter Zijlstra5ed00412009-03-30 19:07:12 +02002220 perf_output_put(&handle, header);
Peter Zijlstra5ed00412009-03-30 19:07:12 +02002221
Peter Zijlstra8a057d82009-04-02 11:11:59 +02002222 if (record_type & PERF_RECORD_IP)
2223 perf_output_put(&handle, ip);
2224
2225 if (record_type & PERF_RECORD_TID)
Peter Zijlstra5ed00412009-03-30 19:07:12 +02002226 perf_output_put(&handle, tid_entry);
2227
Peter Zijlstra4d855452009-04-08 15:01:32 +02002228 if (record_type & PERF_RECORD_TIME)
2229 perf_output_put(&handle, time);
2230
Peter Zijlstra78f13e92009-04-08 15:01:33 +02002231 if (record_type & PERF_RECORD_ADDR)
2232 perf_output_put(&handle, addr);
2233
Peter Zijlstraa85f61a2009-05-08 18:52:23 +02002234 if (record_type & PERF_RECORD_CONFIG)
2235 perf_output_put(&handle, counter->hw_event.config);
2236
Peter Zijlstraf370e1e2009-05-08 18:52:24 +02002237 if (record_type & PERF_RECORD_CPU)
2238 perf_output_put(&handle, cpu_entry);
2239
Peter Zijlstra2023b352009-05-05 17:50:26 +02002240 /*
2241 * XXX PERF_RECORD_GROUP vs inherited counters seems difficult.
2242 */
Peter Zijlstra8a057d82009-04-02 11:11:59 +02002243 if (record_type & PERF_RECORD_GROUP) {
2244 struct perf_counter *leader, *sub;
2245 u64 nr = counter->nr_siblings;
2246
2247 perf_output_put(&handle, nr);
2248
2249 leader = counter->group_leader;
2250 list_for_each_entry(sub, &leader->sibling_list, list_entry) {
2251 if (sub != counter)
Robert Richter4aeb0b42009-04-29 12:47:03 +02002252 sub->pmu->read(sub);
Peter Zijlstra8a057d82009-04-02 11:11:59 +02002253
2254 group_entry.event = sub->hw_event.config;
2255 group_entry.counter = atomic64_read(&sub->count);
2256
2257 perf_output_put(&handle, group_entry);
2258 }
2259 }
2260
Peter Zijlstra394ee072009-03-30 19:07:14 +02002261 if (callchain)
2262 perf_output_copy(&handle, callchain, callchain_size);
2263
Peter Zijlstra5ed00412009-03-30 19:07:12 +02002264 perf_output_end(&handle);
Peter Zijlstra7b732a72009-03-23 18:22:10 +01002265}
2266
Peter Zijlstra0322cd62009-03-19 20:26:19 +01002267/*
Peter Zijlstra8d1b2d92009-04-08 15:01:30 +02002268 * comm tracking
2269 */
2270
2271struct perf_comm_event {
2272 struct task_struct *task;
2273 char *comm;
2274 int comm_size;
2275
2276 struct {
2277 struct perf_event_header header;
2278
2279 u32 pid;
2280 u32 tid;
2281 } event;
2282};
2283
2284static void perf_counter_comm_output(struct perf_counter *counter,
2285 struct perf_comm_event *comm_event)
2286{
2287 struct perf_output_handle handle;
2288 int size = comm_event->event.header.size;
2289 int ret = perf_output_begin(&handle, counter, size, 0, 0);
2290
2291 if (ret)
2292 return;
2293
2294 perf_output_put(&handle, comm_event->event);
2295 perf_output_copy(&handle, comm_event->comm,
2296 comm_event->comm_size);
2297 perf_output_end(&handle);
2298}
2299
2300static int perf_counter_comm_match(struct perf_counter *counter,
2301 struct perf_comm_event *comm_event)
2302{
2303 if (counter->hw_event.comm &&
2304 comm_event->event.header.type == PERF_EVENT_COMM)
2305 return 1;
2306
2307 return 0;
2308}
2309
2310static void perf_counter_comm_ctx(struct perf_counter_context *ctx,
2311 struct perf_comm_event *comm_event)
2312{
2313 struct perf_counter *counter;
2314
2315 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
2316 return;
2317
2318 rcu_read_lock();
2319 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
2320 if (perf_counter_comm_match(counter, comm_event))
2321 perf_counter_comm_output(counter, comm_event);
2322 }
2323 rcu_read_unlock();
2324}
2325
2326static void perf_counter_comm_event(struct perf_comm_event *comm_event)
2327{
2328 struct perf_cpu_context *cpuctx;
2329 unsigned int size;
2330 char *comm = comm_event->task->comm;
2331
Ingo Molnar888fcee2009-04-09 09:48:22 +02002332 size = ALIGN(strlen(comm)+1, sizeof(u64));
Peter Zijlstra8d1b2d92009-04-08 15:01:30 +02002333
2334 comm_event->comm = comm;
2335 comm_event->comm_size = size;
2336
2337 comm_event->event.header.size = sizeof(comm_event->event) + size;
2338
2339 cpuctx = &get_cpu_var(perf_cpu_context);
2340 perf_counter_comm_ctx(&cpuctx->ctx, comm_event);
2341 put_cpu_var(perf_cpu_context);
2342
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10002343 perf_counter_comm_ctx(current->perf_counter_ctxp, comm_event);
Peter Zijlstra8d1b2d92009-04-08 15:01:30 +02002344}
2345
2346void perf_counter_comm(struct task_struct *task)
2347{
Peter Zijlstra9ee318a2009-04-09 10:53:44 +02002348 struct perf_comm_event comm_event;
2349
2350 if (!atomic_read(&nr_comm_tracking))
2351 return;
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10002352 if (!current->perf_counter_ctxp)
2353 return;
2354
Peter Zijlstra9ee318a2009-04-09 10:53:44 +02002355 comm_event = (struct perf_comm_event){
Peter Zijlstra8d1b2d92009-04-08 15:01:30 +02002356 .task = task,
2357 .event = {
2358 .header = { .type = PERF_EVENT_COMM, },
2359 .pid = task->group_leader->pid,
2360 .tid = task->pid,
2361 },
2362 };
2363
2364 perf_counter_comm_event(&comm_event);
2365}
2366
2367/*
Peter Zijlstra0a4a9392009-03-30 19:07:05 +02002368 * mmap tracking
2369 */
2370
2371struct perf_mmap_event {
2372 struct file *file;
2373 char *file_name;
2374 int file_size;
2375
2376 struct {
2377 struct perf_event_header header;
2378
2379 u32 pid;
2380 u32 tid;
2381 u64 start;
2382 u64 len;
2383 u64 pgoff;
2384 } event;
2385};
2386
2387static void perf_counter_mmap_output(struct perf_counter *counter,
2388 struct perf_mmap_event *mmap_event)
2389{
2390 struct perf_output_handle handle;
2391 int size = mmap_event->event.header.size;
Peter Zijlstra4c9e2542009-04-06 11:45:09 +02002392 int ret = perf_output_begin(&handle, counter, size, 0, 0);
Peter Zijlstra0a4a9392009-03-30 19:07:05 +02002393
2394 if (ret)
2395 return;
2396
2397 perf_output_put(&handle, mmap_event->event);
2398 perf_output_copy(&handle, mmap_event->file_name,
2399 mmap_event->file_size);
Peter Zijlstra78d613e2009-03-30 19:07:11 +02002400 perf_output_end(&handle);
Peter Zijlstra0a4a9392009-03-30 19:07:05 +02002401}
2402
2403static int perf_counter_mmap_match(struct perf_counter *counter,
2404 struct perf_mmap_event *mmap_event)
2405{
2406 if (counter->hw_event.mmap &&
2407 mmap_event->event.header.type == PERF_EVENT_MMAP)
2408 return 1;
2409
2410 if (counter->hw_event.munmap &&
2411 mmap_event->event.header.type == PERF_EVENT_MUNMAP)
2412 return 1;
2413
2414 return 0;
2415}
2416
2417static void perf_counter_mmap_ctx(struct perf_counter_context *ctx,
2418 struct perf_mmap_event *mmap_event)
2419{
2420 struct perf_counter *counter;
2421
2422 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
2423 return;
2424
2425 rcu_read_lock();
2426 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
2427 if (perf_counter_mmap_match(counter, mmap_event))
2428 perf_counter_mmap_output(counter, mmap_event);
2429 }
2430 rcu_read_unlock();
2431}
2432
2433static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
2434{
2435 struct perf_cpu_context *cpuctx;
2436 struct file *file = mmap_event->file;
2437 unsigned int size;
2438 char tmp[16];
2439 char *buf = NULL;
2440 char *name;
2441
2442 if (file) {
2443 buf = kzalloc(PATH_MAX, GFP_KERNEL);
2444 if (!buf) {
2445 name = strncpy(tmp, "//enomem", sizeof(tmp));
2446 goto got_name;
2447 }
Peter Zijlstrad3d21c42009-04-09 10:53:46 +02002448 name = d_path(&file->f_path, buf, PATH_MAX);
Peter Zijlstra0a4a9392009-03-30 19:07:05 +02002449 if (IS_ERR(name)) {
2450 name = strncpy(tmp, "//toolong", sizeof(tmp));
2451 goto got_name;
2452 }
2453 } else {
2454 name = strncpy(tmp, "//anon", sizeof(tmp));
2455 goto got_name;
2456 }
2457
2458got_name:
Ingo Molnar888fcee2009-04-09 09:48:22 +02002459 size = ALIGN(strlen(name)+1, sizeof(u64));
Peter Zijlstra0a4a9392009-03-30 19:07:05 +02002460
2461 mmap_event->file_name = name;
2462 mmap_event->file_size = size;
2463
2464 mmap_event->event.header.size = sizeof(mmap_event->event) + size;
2465
2466 cpuctx = &get_cpu_var(perf_cpu_context);
2467 perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event);
2468 put_cpu_var(perf_cpu_context);
2469
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10002470 perf_counter_mmap_ctx(current->perf_counter_ctxp, mmap_event);
Peter Zijlstra0a4a9392009-03-30 19:07:05 +02002471
2472 kfree(buf);
2473}
2474
2475void perf_counter_mmap(unsigned long addr, unsigned long len,
2476 unsigned long pgoff, struct file *file)
2477{
Peter Zijlstra9ee318a2009-04-09 10:53:44 +02002478 struct perf_mmap_event mmap_event;
2479
2480 if (!atomic_read(&nr_mmap_tracking))
2481 return;
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10002482 if (!current->perf_counter_ctxp)
2483 return;
Peter Zijlstra9ee318a2009-04-09 10:53:44 +02002484
2485 mmap_event = (struct perf_mmap_event){
Peter Zijlstra0a4a9392009-03-30 19:07:05 +02002486 .file = file,
2487 .event = {
2488 .header = { .type = PERF_EVENT_MMAP, },
2489 .pid = current->group_leader->pid,
2490 .tid = current->pid,
2491 .start = addr,
2492 .len = len,
2493 .pgoff = pgoff,
2494 },
2495 };
2496
2497 perf_counter_mmap_event(&mmap_event);
2498}
2499
2500void perf_counter_munmap(unsigned long addr, unsigned long len,
2501 unsigned long pgoff, struct file *file)
2502{
Peter Zijlstra9ee318a2009-04-09 10:53:44 +02002503 struct perf_mmap_event mmap_event;
2504
2505 if (!atomic_read(&nr_munmap_tracking))
2506 return;
2507
2508 mmap_event = (struct perf_mmap_event){
Peter Zijlstra0a4a9392009-03-30 19:07:05 +02002509 .file = file,
2510 .event = {
2511 .header = { .type = PERF_EVENT_MUNMAP, },
2512 .pid = current->group_leader->pid,
2513 .tid = current->pid,
2514 .start = addr,
2515 .len = len,
2516 .pgoff = pgoff,
2517 },
2518 };
2519
2520 perf_counter_mmap_event(&mmap_event);
2521}
2522
2523/*
Peter Zijlstrae220d2d2009-05-23 18:28:55 +02002524 * Log irq_period changes so that analyzing tools can re-normalize the
2525 * event flow.
Peter Zijlstra26b119b2009-05-20 12:21:20 +02002526 */
2527
2528static void perf_log_period(struct perf_counter *counter, u64 period)
2529{
2530 struct perf_output_handle handle;
2531 int ret;
2532
2533 struct {
2534 struct perf_event_header header;
2535 u64 time;
2536 u64 period;
2537 } freq_event = {
2538 .header = {
2539 .type = PERF_EVENT_PERIOD,
2540 .misc = 0,
2541 .size = sizeof(freq_event),
2542 },
2543 .time = sched_clock(),
2544 .period = period,
2545 };
2546
2547 if (counter->hw.irq_period == period)
2548 return;
2549
2550 ret = perf_output_begin(&handle, counter, sizeof(freq_event), 0, 0);
2551 if (ret)
2552 return;
2553
2554 perf_output_put(&handle, freq_event);
2555 perf_output_end(&handle);
2556}
2557
2558/*
Peter Zijlstraa78ac322009-05-25 17:39:05 +02002559 * IRQ throttle logging
2560 */
2561
2562static void perf_log_throttle(struct perf_counter *counter, int enable)
2563{
2564 struct perf_output_handle handle;
2565 int ret;
2566
2567 struct {
2568 struct perf_event_header header;
2569 u64 time;
2570 } throttle_event = {
2571 .header = {
2572 .type = PERF_EVENT_THROTTLE + 1,
2573 .misc = 0,
2574 .size = sizeof(throttle_event),
2575 },
2576 .time = sched_clock(),
2577 };
2578
Ingo Molnar0127c3e2009-05-25 22:03:26 +02002579 ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0);
Peter Zijlstraa78ac322009-05-25 17:39:05 +02002580 if (ret)
2581 return;
2582
2583 perf_output_put(&handle, throttle_event);
2584 perf_output_end(&handle);
2585}
2586
2587/*
Peter Zijlstraf6c7d5f2009-04-06 11:45:04 +02002588 * Generic counter overflow handling.
2589 */
2590
2591int perf_counter_overflow(struct perf_counter *counter,
Peter Zijlstra78f13e92009-04-08 15:01:33 +02002592 int nmi, struct pt_regs *regs, u64 addr)
Peter Zijlstraf6c7d5f2009-04-06 11:45:04 +02002593{
Peter Zijlstra79f14642009-04-06 11:45:07 +02002594 int events = atomic_read(&counter->event_limit);
Peter Zijlstraa78ac322009-05-25 17:39:05 +02002595 int throttle = counter->pmu->unthrottle != NULL;
Peter Zijlstra79f14642009-04-06 11:45:07 +02002596 int ret = 0;
2597
Peter Zijlstraa78ac322009-05-25 17:39:05 +02002598 if (!throttle) {
2599 counter->hw.interrupts++;
2600 } else if (counter->hw.interrupts != MAX_INTERRUPTS) {
2601 counter->hw.interrupts++;
2602 if (HZ*counter->hw.interrupts > (u64)sysctl_perf_counter_limit) {
2603 counter->hw.interrupts = MAX_INTERRUPTS;
2604 perf_log_throttle(counter, 0);
2605 ret = 1;
2606 }
2607 }
Peter Zijlstra60db5e02009-05-15 15:19:28 +02002608
Peter Zijlstra2023b352009-05-05 17:50:26 +02002609 /*
2610 * XXX event_limit might not quite work as expected on inherited
2611 * counters
2612 */
2613
Peter Zijlstra4c9e2542009-04-06 11:45:09 +02002614 counter->pending_kill = POLL_IN;
Peter Zijlstra79f14642009-04-06 11:45:07 +02002615 if (events && atomic_dec_and_test(&counter->event_limit)) {
2616 ret = 1;
Peter Zijlstra4c9e2542009-04-06 11:45:09 +02002617 counter->pending_kill = POLL_HUP;
Peter Zijlstra79f14642009-04-06 11:45:07 +02002618 if (nmi) {
2619 counter->pending_disable = 1;
2620 perf_pending_queue(&counter->pending,
2621 perf_pending_counter);
2622 } else
2623 perf_counter_disable(counter);
2624 }
2625
Peter Zijlstra78f13e92009-04-08 15:01:33 +02002626 perf_counter_output(counter, nmi, regs, addr);
Peter Zijlstra79f14642009-04-06 11:45:07 +02002627 return ret;
Peter Zijlstraf6c7d5f2009-04-06 11:45:04 +02002628}
2629
2630/*
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002631 * Generic software counter infrastructure
2632 */
2633
2634static void perf_swcounter_update(struct perf_counter *counter)
2635{
2636 struct hw_perf_counter *hwc = &counter->hw;
2637 u64 prev, now;
2638 s64 delta;
2639
2640again:
2641 prev = atomic64_read(&hwc->prev_count);
2642 now = atomic64_read(&hwc->count);
2643 if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev)
2644 goto again;
2645
2646 delta = now - prev;
2647
2648 atomic64_add(delta, &counter->count);
2649 atomic64_sub(delta, &hwc->period_left);
2650}
2651
2652static void perf_swcounter_set_period(struct perf_counter *counter)
2653{
2654 struct hw_perf_counter *hwc = &counter->hw;
2655 s64 left = atomic64_read(&hwc->period_left);
2656 s64 period = hwc->irq_period;
2657
2658 if (unlikely(left <= -period)) {
2659 left = period;
2660 atomic64_set(&hwc->period_left, left);
2661 }
2662
2663 if (unlikely(left <= 0)) {
2664 left += period;
2665 atomic64_add(period, &hwc->period_left);
2666 }
2667
2668 atomic64_set(&hwc->prev_count, -left);
2669 atomic64_set(&hwc->count, -left);
2670}
2671
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002672static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
2673{
Peter Zijlstraf6c7d5f2009-04-06 11:45:04 +02002674 enum hrtimer_restart ret = HRTIMER_RESTART;
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002675 struct perf_counter *counter;
2676 struct pt_regs *regs;
Peter Zijlstra60db5e02009-05-15 15:19:28 +02002677 u64 period;
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002678
2679 counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
Robert Richter4aeb0b42009-04-29 12:47:03 +02002680 counter->pmu->read(counter);
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002681
2682 regs = get_irq_regs();
2683 /*
2684 * In case we exclude kernel IPs or are somehow not in interrupt
2685 * context, provide the next best thing, the user IP.
2686 */
2687 if ((counter->hw_event.exclude_kernel || !regs) &&
2688 !counter->hw_event.exclude_user)
2689 regs = task_pt_regs(current);
2690
Peter Zijlstraf6c7d5f2009-04-06 11:45:04 +02002691 if (regs) {
Peter Zijlstra78f13e92009-04-08 15:01:33 +02002692 if (perf_counter_overflow(counter, 0, regs, 0))
Peter Zijlstraf6c7d5f2009-04-06 11:45:04 +02002693 ret = HRTIMER_NORESTART;
2694 }
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002695
Peter Zijlstra60db5e02009-05-15 15:19:28 +02002696 period = max_t(u64, 10000, counter->hw.irq_period);
2697 hrtimer_forward_now(hrtimer, ns_to_ktime(period));
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002698
Peter Zijlstraf6c7d5f2009-04-06 11:45:04 +02002699 return ret;
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002700}
2701
2702static void perf_swcounter_overflow(struct perf_counter *counter,
Peter Zijlstra78f13e92009-04-08 15:01:33 +02002703 int nmi, struct pt_regs *regs, u64 addr)
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002704{
Peter Zijlstrab8e83512009-03-19 20:26:18 +01002705 perf_swcounter_update(counter);
2706 perf_swcounter_set_period(counter);
Peter Zijlstra78f13e92009-04-08 15:01:33 +02002707 if (perf_counter_overflow(counter, nmi, regs, addr))
Peter Zijlstraf6c7d5f2009-04-06 11:45:04 +02002708 /* soft-disable the counter */
2709 ;
2710
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002711}
2712
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002713static int perf_swcounter_match(struct perf_counter *counter,
Peter Zijlstrab8e83512009-03-19 20:26:18 +01002714 enum perf_event_types type,
2715 u32 event, struct pt_regs *regs)
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002716{
2717 if (counter->state != PERF_COUNTER_STATE_ACTIVE)
2718 return 0;
2719
Peter Zijlstraf4a2deb2009-03-23 18:22:06 +01002720 if (perf_event_raw(&counter->hw_event))
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002721 return 0;
2722
Peter Zijlstraf4a2deb2009-03-23 18:22:06 +01002723 if (perf_event_type(&counter->hw_event) != type)
Peter Zijlstrab8e83512009-03-19 20:26:18 +01002724 return 0;
2725
Peter Zijlstraf4a2deb2009-03-23 18:22:06 +01002726 if (perf_event_id(&counter->hw_event) != event)
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002727 return 0;
2728
2729 if (counter->hw_event.exclude_user && user_mode(regs))
2730 return 0;
2731
2732 if (counter->hw_event.exclude_kernel && !user_mode(regs))
2733 return 0;
2734
2735 return 1;
2736}
2737
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002738static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
Peter Zijlstra78f13e92009-04-08 15:01:33 +02002739 int nmi, struct pt_regs *regs, u64 addr)
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002740{
2741 int neg = atomic64_add_negative(nr, &counter->hw.count);
2742 if (counter->hw.irq_period && !neg)
Peter Zijlstra78f13e92009-04-08 15:01:33 +02002743 perf_swcounter_overflow(counter, nmi, regs, addr);
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002744}
2745
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002746static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
Peter Zijlstrab8e83512009-03-19 20:26:18 +01002747 enum perf_event_types type, u32 event,
Peter Zijlstra78f13e92009-04-08 15:01:33 +02002748 u64 nr, int nmi, struct pt_regs *regs,
2749 u64 addr)
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002750{
2751 struct perf_counter *counter;
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002752
Peter Zijlstra01ef09d2009-03-19 20:26:11 +01002753 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002754 return;
2755
Peter Zijlstra592903c2009-03-13 12:21:36 +01002756 rcu_read_lock();
2757 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
Peter Zijlstrab8e83512009-03-19 20:26:18 +01002758 if (perf_swcounter_match(counter, type, event, regs))
Peter Zijlstra78f13e92009-04-08 15:01:33 +02002759 perf_swcounter_add(counter, nr, nmi, regs, addr);
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002760 }
Peter Zijlstra592903c2009-03-13 12:21:36 +01002761 rcu_read_unlock();
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002762}
2763
Peter Zijlstra96f6d442009-03-23 18:22:07 +01002764static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
2765{
2766 if (in_nmi())
2767 return &cpuctx->recursion[3];
2768
2769 if (in_irq())
2770 return &cpuctx->recursion[2];
2771
2772 if (in_softirq())
2773 return &cpuctx->recursion[1];
2774
2775 return &cpuctx->recursion[0];
2776}
2777
Peter Zijlstrab8e83512009-03-19 20:26:18 +01002778static void __perf_swcounter_event(enum perf_event_types type, u32 event,
Peter Zijlstra78f13e92009-04-08 15:01:33 +02002779 u64 nr, int nmi, struct pt_regs *regs,
2780 u64 addr)
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002781{
2782 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
Peter Zijlstra96f6d442009-03-23 18:22:07 +01002783 int *recursion = perf_swcounter_recursion_context(cpuctx);
2784
2785 if (*recursion)
2786 goto out;
2787
2788 (*recursion)++;
2789 barrier();
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002790
Peter Zijlstra78f13e92009-04-08 15:01:33 +02002791 perf_swcounter_ctx_event(&cpuctx->ctx, type, event,
2792 nr, nmi, regs, addr);
Peter Zijlstrab8e83512009-03-19 20:26:18 +01002793 if (cpuctx->task_ctx) {
2794 perf_swcounter_ctx_event(cpuctx->task_ctx, type, event,
Peter Zijlstra78f13e92009-04-08 15:01:33 +02002795 nr, nmi, regs, addr);
Peter Zijlstrab8e83512009-03-19 20:26:18 +01002796 }
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002797
Peter Zijlstra96f6d442009-03-23 18:22:07 +01002798 barrier();
2799 (*recursion)--;
2800
2801out:
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002802 put_cpu_var(perf_cpu_context);
2803}
2804
Peter Zijlstra78f13e92009-04-08 15:01:33 +02002805void
2806perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
Peter Zijlstrab8e83512009-03-19 20:26:18 +01002807{
Peter Zijlstra78f13e92009-04-08 15:01:33 +02002808 __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs, addr);
Peter Zijlstrab8e83512009-03-19 20:26:18 +01002809}
2810
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002811static void perf_swcounter_read(struct perf_counter *counter)
2812{
2813 perf_swcounter_update(counter);
2814}
2815
2816static int perf_swcounter_enable(struct perf_counter *counter)
2817{
2818 perf_swcounter_set_period(counter);
2819 return 0;
2820}
2821
2822static void perf_swcounter_disable(struct perf_counter *counter)
2823{
2824 perf_swcounter_update(counter);
2825}
2826
Robert Richter4aeb0b42009-04-29 12:47:03 +02002827static const struct pmu perf_ops_generic = {
Peter Zijlstraac17dc82009-03-13 12:21:34 +01002828 .enable = perf_swcounter_enable,
2829 .disable = perf_swcounter_disable,
2830 .read = perf_swcounter_read,
2831};
2832
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002833/*
2834 * Software counter: cpu wall time clock
2835 */
2836
Paul Mackerras9abf8a02009-01-09 16:26:43 +11002837static void cpu_clock_perf_counter_update(struct perf_counter *counter)
2838{
2839 int cpu = raw_smp_processor_id();
2840 s64 prev;
2841 u64 now;
2842
2843 now = cpu_clock(cpu);
2844 prev = atomic64_read(&counter->hw.prev_count);
2845 atomic64_set(&counter->hw.prev_count, now);
2846 atomic64_add(now - prev, &counter->count);
2847}
2848
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002849static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
2850{
2851 struct hw_perf_counter *hwc = &counter->hw;
2852 int cpu = raw_smp_processor_id();
2853
2854 atomic64_set(&hwc->prev_count, cpu_clock(cpu));
Peter Zijlstra039fc912009-03-13 16:43:47 +01002855 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2856 hwc->hrtimer.function = perf_swcounter_hrtimer;
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002857 if (hwc->irq_period) {
Peter Zijlstra60db5e02009-05-15 15:19:28 +02002858 u64 period = max_t(u64, 10000, hwc->irq_period);
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002859 __hrtimer_start_range_ns(&hwc->hrtimer,
Peter Zijlstra60db5e02009-05-15 15:19:28 +02002860 ns_to_ktime(period), 0,
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002861 HRTIMER_MODE_REL, 0);
2862 }
2863
2864 return 0;
2865}
2866
Ingo Molnar5c92d122008-12-11 13:21:10 +01002867static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
2868{
Peter Zijlstrab986d7e2009-05-20 12:21:21 +02002869 if (counter->hw.irq_period)
2870 hrtimer_cancel(&counter->hw.hrtimer);
Paul Mackerras9abf8a02009-01-09 16:26:43 +11002871 cpu_clock_perf_counter_update(counter);
Ingo Molnar5c92d122008-12-11 13:21:10 +01002872}
2873
2874static void cpu_clock_perf_counter_read(struct perf_counter *counter)
2875{
Paul Mackerras9abf8a02009-01-09 16:26:43 +11002876 cpu_clock_perf_counter_update(counter);
Ingo Molnar5c92d122008-12-11 13:21:10 +01002877}
2878
Robert Richter4aeb0b42009-04-29 12:47:03 +02002879static const struct pmu perf_ops_cpu_clock = {
Ingo Molnar76715812008-12-17 14:20:28 +01002880 .enable = cpu_clock_perf_counter_enable,
2881 .disable = cpu_clock_perf_counter_disable,
2882 .read = cpu_clock_perf_counter_read,
Ingo Molnar5c92d122008-12-11 13:21:10 +01002883};
2884
Ingo Molnaraa9c4c02008-12-17 14:10:57 +01002885/*
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002886 * Software counter: task time clock
2887 */
2888
Peter Zijlstrae30e08f2009-04-08 15:01:25 +02002889static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now)
Ingo Molnarbae43c92008-12-11 14:03:20 +01002890{
Peter Zijlstrae30e08f2009-04-08 15:01:25 +02002891 u64 prev;
Ingo Molnar8cb391e2008-12-14 12:22:31 +01002892 s64 delta;
Ingo Molnarbae43c92008-12-11 14:03:20 +01002893
Peter Zijlstraa39d6f22009-04-06 11:45:11 +02002894 prev = atomic64_xchg(&counter->hw.prev_count, now);
Ingo Molnar8cb391e2008-12-14 12:22:31 +01002895 delta = now - prev;
Ingo Molnar8cb391e2008-12-14 12:22:31 +01002896 atomic64_add(delta, &counter->count);
Ingo Molnarbae43c92008-12-11 14:03:20 +01002897}
2898
Ingo Molnar95cdd2e2008-12-21 13:50:42 +01002899static int task_clock_perf_counter_enable(struct perf_counter *counter)
Ingo Molnar8cb391e2008-12-14 12:22:31 +01002900{
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002901 struct hw_perf_counter *hwc = &counter->hw;
Peter Zijlstraa39d6f22009-04-06 11:45:11 +02002902 u64 now;
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002903
Peter Zijlstraa39d6f22009-04-06 11:45:11 +02002904 now = counter->ctx->time;
2905
2906 atomic64_set(&hwc->prev_count, now);
Peter Zijlstra039fc912009-03-13 16:43:47 +01002907 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2908 hwc->hrtimer.function = perf_swcounter_hrtimer;
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002909 if (hwc->irq_period) {
Peter Zijlstra60db5e02009-05-15 15:19:28 +02002910 u64 period = max_t(u64, 10000, hwc->irq_period);
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002911 __hrtimer_start_range_ns(&hwc->hrtimer,
Peter Zijlstra60db5e02009-05-15 15:19:28 +02002912 ns_to_ktime(period), 0,
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002913 HRTIMER_MODE_REL, 0);
2914 }
Ingo Molnar95cdd2e2008-12-21 13:50:42 +01002915
2916 return 0;
Ingo Molnar8cb391e2008-12-14 12:22:31 +01002917}
2918
2919static void task_clock_perf_counter_disable(struct perf_counter *counter)
2920{
Peter Zijlstrab986d7e2009-05-20 12:21:21 +02002921 if (counter->hw.irq_period)
2922 hrtimer_cancel(&counter->hw.hrtimer);
Peter Zijlstrae30e08f2009-04-08 15:01:25 +02002923 task_clock_perf_counter_update(counter, counter->ctx->time);
2924
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002925}
Ingo Molnaraa9c4c02008-12-17 14:10:57 +01002926
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01002927static void task_clock_perf_counter_read(struct perf_counter *counter)
2928{
Peter Zijlstrae30e08f2009-04-08 15:01:25 +02002929 u64 time;
2930
2931 if (!in_nmi()) {
2932 update_context_time(counter->ctx);
2933 time = counter->ctx->time;
2934 } else {
2935 u64 now = perf_clock();
2936 u64 delta = now - counter->ctx->timestamp;
2937 time = counter->ctx->time + delta;
2938 }
2939
2940 task_clock_perf_counter_update(counter, time);
Ingo Molnarbae43c92008-12-11 14:03:20 +01002941}
2942
Robert Richter4aeb0b42009-04-29 12:47:03 +02002943static const struct pmu perf_ops_task_clock = {
Ingo Molnar76715812008-12-17 14:20:28 +01002944 .enable = task_clock_perf_counter_enable,
2945 .disable = task_clock_perf_counter_disable,
2946 .read = task_clock_perf_counter_read,
Ingo Molnarbae43c92008-12-11 14:03:20 +01002947};
2948
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002949/*
Peter Zijlstra15dbf272009-03-13 12:21:32 +01002950 * Software counter: cpu migrations
2951 */
2952
Paul Mackerras23a185c2009-02-09 22:42:47 +11002953static inline u64 get_cpu_migrations(struct perf_counter *counter)
Ingo Molnar6c594c22008-12-14 12:34:15 +01002954{
Paul Mackerras23a185c2009-02-09 22:42:47 +11002955 struct task_struct *curr = counter->ctx->task;
2956
2957 if (curr)
2958 return curr->se.nr_migrations;
2959 return cpu_nr_migrations(smp_processor_id());
Ingo Molnar6c594c22008-12-14 12:34:15 +01002960}
2961
2962static void cpu_migrations_perf_counter_update(struct perf_counter *counter)
2963{
2964 u64 prev, now;
2965 s64 delta;
2966
2967 prev = atomic64_read(&counter->hw.prev_count);
Paul Mackerras23a185c2009-02-09 22:42:47 +11002968 now = get_cpu_migrations(counter);
Ingo Molnar6c594c22008-12-14 12:34:15 +01002969
2970 atomic64_set(&counter->hw.prev_count, now);
2971
2972 delta = now - prev;
Ingo Molnar6c594c22008-12-14 12:34:15 +01002973
2974 atomic64_add(delta, &counter->count);
2975}
2976
2977static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
2978{
2979 cpu_migrations_perf_counter_update(counter);
2980}
2981
Ingo Molnar95cdd2e2008-12-21 13:50:42 +01002982static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
Ingo Molnar6c594c22008-12-14 12:34:15 +01002983{
Paul Mackerrasc07c99b2009-02-13 22:10:34 +11002984 if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
2985 atomic64_set(&counter->hw.prev_count,
2986 get_cpu_migrations(counter));
Ingo Molnar95cdd2e2008-12-21 13:50:42 +01002987 return 0;
Ingo Molnar6c594c22008-12-14 12:34:15 +01002988}
2989
2990static void cpu_migrations_perf_counter_disable(struct perf_counter *counter)
2991{
2992 cpu_migrations_perf_counter_update(counter);
2993}
2994
Robert Richter4aeb0b42009-04-29 12:47:03 +02002995static const struct pmu perf_ops_cpu_migrations = {
Ingo Molnar76715812008-12-17 14:20:28 +01002996 .enable = cpu_migrations_perf_counter_enable,
2997 .disable = cpu_migrations_perf_counter_disable,
2998 .read = cpu_migrations_perf_counter_read,
Ingo Molnar6c594c22008-12-14 12:34:15 +01002999};
3000
Peter Zijlstrae077df42009-03-19 20:26:17 +01003001#ifdef CONFIG_EVENT_PROFILE
3002void perf_tpcounter_event(int event_id)
3003{
Peter Zijlstrab8e83512009-03-19 20:26:18 +01003004 struct pt_regs *regs = get_irq_regs();
3005
3006 if (!regs)
3007 regs = task_pt_regs(current);
3008
Peter Zijlstra78f13e92009-04-08 15:01:33 +02003009 __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0);
Peter Zijlstrae077df42009-03-19 20:26:17 +01003010}
Steven Whitehouseff7b1b42009-04-15 16:55:05 +01003011EXPORT_SYMBOL_GPL(perf_tpcounter_event);
Peter Zijlstrae077df42009-03-19 20:26:17 +01003012
3013extern int ftrace_profile_enable(int);
3014extern void ftrace_profile_disable(int);
3015
3016static void tp_perf_counter_destroy(struct perf_counter *counter)
3017{
Peter Zijlstraf4a2deb2009-03-23 18:22:06 +01003018 ftrace_profile_disable(perf_event_id(&counter->hw_event));
Peter Zijlstrae077df42009-03-19 20:26:17 +01003019}
3020
Robert Richter4aeb0b42009-04-29 12:47:03 +02003021static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
Peter Zijlstrae077df42009-03-19 20:26:17 +01003022{
Peter Zijlstraf4a2deb2009-03-23 18:22:06 +01003023 int event_id = perf_event_id(&counter->hw_event);
Peter Zijlstrae077df42009-03-19 20:26:17 +01003024 int ret;
3025
3026 ret = ftrace_profile_enable(event_id);
3027 if (ret)
3028 return NULL;
3029
3030 counter->destroy = tp_perf_counter_destroy;
Peter Zijlstrab8e83512009-03-19 20:26:18 +01003031 counter->hw.irq_period = counter->hw_event.irq_period;
Peter Zijlstrae077df42009-03-19 20:26:17 +01003032
3033 return &perf_ops_generic;
3034}
3035#else
Robert Richter4aeb0b42009-04-29 12:47:03 +02003036static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
Peter Zijlstrae077df42009-03-19 20:26:17 +01003037{
3038 return NULL;
3039}
3040#endif
3041
Robert Richter4aeb0b42009-04-29 12:47:03 +02003042static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
Ingo Molnar5c92d122008-12-11 13:21:10 +01003043{
Robert Richter4aeb0b42009-04-29 12:47:03 +02003044 const struct pmu *pmu = NULL;
Ingo Molnar5c92d122008-12-11 13:21:10 +01003045
Paul Mackerras0475f9e2009-02-11 14:35:35 +11003046 /*
3047 * Software counters (currently) can't in general distinguish
3048 * between user, kernel and hypervisor events.
3049 * However, context switches and cpu migrations are considered
3050 * to be kernel events, and page faults are never hypervisor
3051 * events.
3052 */
Peter Zijlstraf4a2deb2009-03-23 18:22:06 +01003053 switch (perf_event_id(&counter->hw_event)) {
Ingo Molnar5c92d122008-12-11 13:21:10 +01003054 case PERF_COUNT_CPU_CLOCK:
Robert Richter4aeb0b42009-04-29 12:47:03 +02003055 pmu = &perf_ops_cpu_clock;
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01003056
Ingo Molnar5c92d122008-12-11 13:21:10 +01003057 break;
Ingo Molnarbae43c92008-12-11 14:03:20 +01003058 case PERF_COUNT_TASK_CLOCK:
Paul Mackerras23a185c2009-02-09 22:42:47 +11003059 /*
3060 * If the user instantiates this as a per-cpu counter,
3061 * use the cpu_clock counter instead.
3062 */
3063 if (counter->ctx->task)
Robert Richter4aeb0b42009-04-29 12:47:03 +02003064 pmu = &perf_ops_task_clock;
Paul Mackerras23a185c2009-02-09 22:42:47 +11003065 else
Robert Richter4aeb0b42009-04-29 12:47:03 +02003066 pmu = &perf_ops_cpu_clock;
Peter Zijlstrad6d020e2009-03-13 12:21:35 +01003067
Ingo Molnarbae43c92008-12-11 14:03:20 +01003068 break;
Ingo Molnare06c61a2008-12-14 14:44:31 +01003069 case PERF_COUNT_PAGE_FAULTS:
Peter Zijlstraac17dc82009-03-13 12:21:34 +01003070 case PERF_COUNT_PAGE_FAULTS_MIN:
3071 case PERF_COUNT_PAGE_FAULTS_MAJ:
Ingo Molnar5d6a27d2008-12-14 12:28:33 +01003072 case PERF_COUNT_CONTEXT_SWITCHES:
Robert Richter4aeb0b42009-04-29 12:47:03 +02003073 pmu = &perf_ops_generic;
Ingo Molnar5d6a27d2008-12-14 12:28:33 +01003074 break;
Ingo Molnar6c594c22008-12-14 12:34:15 +01003075 case PERF_COUNT_CPU_MIGRATIONS:
Paul Mackerras0475f9e2009-02-11 14:35:35 +11003076 if (!counter->hw_event.exclude_kernel)
Robert Richter4aeb0b42009-04-29 12:47:03 +02003077 pmu = &perf_ops_cpu_migrations;
Ingo Molnar6c594c22008-12-14 12:34:15 +01003078 break;
Ingo Molnar5c92d122008-12-11 13:21:10 +01003079 }
Peter Zijlstra15dbf272009-03-13 12:21:32 +01003080
Robert Richter4aeb0b42009-04-29 12:47:03 +02003081 return pmu;
Ingo Molnar5c92d122008-12-11 13:21:10 +01003082}
3083
Thomas Gleixner0793a612008-12-04 20:12:29 +01003084/*
3085 * Allocate and initialize a counter structure
3086 */
3087static struct perf_counter *
Ingo Molnar04289bb2008-12-11 08:38:42 +01003088perf_counter_alloc(struct perf_counter_hw_event *hw_event,
3089 int cpu,
Paul Mackerras23a185c2009-02-09 22:42:47 +11003090 struct perf_counter_context *ctx,
Ingo Molnar9b51f662008-12-12 13:49:45 +01003091 struct perf_counter *group_leader,
3092 gfp_t gfpflags)
Thomas Gleixner0793a612008-12-04 20:12:29 +01003093{
Robert Richter4aeb0b42009-04-29 12:47:03 +02003094 const struct pmu *pmu;
Ingo Molnar621a01e2008-12-11 12:46:46 +01003095 struct perf_counter *counter;
Peter Zijlstra60db5e02009-05-15 15:19:28 +02003096 struct hw_perf_counter *hwc;
Paul Mackerrasd5d2bc0d2009-03-30 19:07:08 +02003097 long err;
Thomas Gleixner0793a612008-12-04 20:12:29 +01003098
Ingo Molnar9b51f662008-12-12 13:49:45 +01003099 counter = kzalloc(sizeof(*counter), gfpflags);
Thomas Gleixner0793a612008-12-04 20:12:29 +01003100 if (!counter)
Paul Mackerrasd5d2bc0d2009-03-30 19:07:08 +02003101 return ERR_PTR(-ENOMEM);
Thomas Gleixner0793a612008-12-04 20:12:29 +01003102
Ingo Molnar04289bb2008-12-11 08:38:42 +01003103 /*
3104 * Single counters are their own group leaders, with an
3105 * empty sibling list:
3106 */
3107 if (!group_leader)
3108 group_leader = counter;
3109
Peter Zijlstrafccc7142009-05-23 18:28:56 +02003110 mutex_init(&counter->child_mutex);
3111 INIT_LIST_HEAD(&counter->child_list);
3112
Ingo Molnar04289bb2008-12-11 08:38:42 +01003113 INIT_LIST_HEAD(&counter->list_entry);
Peter Zijlstra592903c2009-03-13 12:21:36 +01003114 INIT_LIST_HEAD(&counter->event_entry);
Ingo Molnar04289bb2008-12-11 08:38:42 +01003115 INIT_LIST_HEAD(&counter->sibling_list);
Thomas Gleixner0793a612008-12-04 20:12:29 +01003116 init_waitqueue_head(&counter->waitq);
3117
Peter Zijlstra7b732a72009-03-23 18:22:10 +01003118 mutex_init(&counter->mmap_mutex);
3119
Ingo Molnar9f66a382008-12-10 12:33:23 +01003120 counter->cpu = cpu;
3121 counter->hw_event = *hw_event;
Ingo Molnar04289bb2008-12-11 08:38:42 +01003122 counter->group_leader = group_leader;
Robert Richter4aeb0b42009-04-29 12:47:03 +02003123 counter->pmu = NULL;
Paul Mackerras23a185c2009-02-09 22:42:47 +11003124 counter->ctx = ctx;
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10003125 get_ctx(ctx);
Ingo Molnar621a01e2008-12-11 12:46:46 +01003126
Ingo Molnar235c7fc2008-12-21 14:43:25 +01003127 counter->state = PERF_COUNTER_STATE_INACTIVE;
Ingo Molnara86ed502008-12-17 00:43:10 +01003128 if (hw_event->disabled)
3129 counter->state = PERF_COUNTER_STATE_OFF;
3130
Robert Richter4aeb0b42009-04-29 12:47:03 +02003131 pmu = NULL;
Peter Zijlstrab8e83512009-03-19 20:26:18 +01003132
Peter Zijlstra60db5e02009-05-15 15:19:28 +02003133 hwc = &counter->hw;
3134 if (hw_event->freq && hw_event->irq_freq)
Peter Zijlstra2e569d32009-05-15 15:37:47 +02003135 hwc->irq_period = div64_u64(TICK_NSEC, hw_event->irq_freq);
Peter Zijlstra60db5e02009-05-15 15:19:28 +02003136 else
3137 hwc->irq_period = hw_event->irq_period;
3138
Peter Zijlstra2023b352009-05-05 17:50:26 +02003139 /*
3140 * we currently do not support PERF_RECORD_GROUP on inherited counters
3141 */
3142 if (hw_event->inherit && (hw_event->record_type & PERF_RECORD_GROUP))
3143 goto done;
3144
Peter Zijlstraf4a2deb2009-03-23 18:22:06 +01003145 if (perf_event_raw(hw_event)) {
Robert Richter4aeb0b42009-04-29 12:47:03 +02003146 pmu = hw_perf_counter_init(counter);
Peter Zijlstraf4a2deb2009-03-23 18:22:06 +01003147 goto done;
3148 }
3149
3150 switch (perf_event_type(hw_event)) {
Peter Zijlstrab8e83512009-03-19 20:26:18 +01003151 case PERF_TYPE_HARDWARE:
Robert Richter4aeb0b42009-04-29 12:47:03 +02003152 pmu = hw_perf_counter_init(counter);
Peter Zijlstrab8e83512009-03-19 20:26:18 +01003153 break;
3154
3155 case PERF_TYPE_SOFTWARE:
Robert Richter4aeb0b42009-04-29 12:47:03 +02003156 pmu = sw_perf_counter_init(counter);
Peter Zijlstrab8e83512009-03-19 20:26:18 +01003157 break;
3158
3159 case PERF_TYPE_TRACEPOINT:
Robert Richter4aeb0b42009-04-29 12:47:03 +02003160 pmu = tp_perf_counter_init(counter);
Peter Zijlstrab8e83512009-03-19 20:26:18 +01003161 break;
3162 }
Peter Zijlstraf4a2deb2009-03-23 18:22:06 +01003163done:
Paul Mackerrasd5d2bc0d2009-03-30 19:07:08 +02003164 err = 0;
Robert Richter4aeb0b42009-04-29 12:47:03 +02003165 if (!pmu)
Paul Mackerrasd5d2bc0d2009-03-30 19:07:08 +02003166 err = -EINVAL;
Robert Richter4aeb0b42009-04-29 12:47:03 +02003167 else if (IS_ERR(pmu))
3168 err = PTR_ERR(pmu);
Paul Mackerrasd5d2bc0d2009-03-30 19:07:08 +02003169
3170 if (err) {
3171 kfree(counter);
3172 return ERR_PTR(err);
3173 }
3174
Robert Richter4aeb0b42009-04-29 12:47:03 +02003175 counter->pmu = pmu;
Thomas Gleixner0793a612008-12-04 20:12:29 +01003176
Peter Zijlstra7fc23a52009-05-08 18:52:21 +02003177 atomic_inc(&nr_counters);
Peter Zijlstra9ee318a2009-04-09 10:53:44 +02003178 if (counter->hw_event.mmap)
3179 atomic_inc(&nr_mmap_tracking);
3180 if (counter->hw_event.munmap)
3181 atomic_inc(&nr_munmap_tracking);
3182 if (counter->hw_event.comm)
3183 atomic_inc(&nr_comm_tracking);
3184
Thomas Gleixner0793a612008-12-04 20:12:29 +01003185 return counter;
3186}
3187
3188/**
Paul Mackerras2743a5b2009-03-04 20:36:51 +11003189 * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
Ingo Molnar9f66a382008-12-10 12:33:23 +01003190 *
3191 * @hw_event_uptr: event type attributes for monitoring/sampling
Thomas Gleixner0793a612008-12-04 20:12:29 +01003192 * @pid: target pid
Ingo Molnar9f66a382008-12-10 12:33:23 +01003193 * @cpu: target cpu
3194 * @group_fd: group leader counter fd
Thomas Gleixner0793a612008-12-04 20:12:29 +01003195 */
Paul Mackerras2743a5b2009-03-04 20:36:51 +11003196SYSCALL_DEFINE5(perf_counter_open,
Paul Mackerrasf3dfd262009-02-26 22:43:46 +11003197 const struct perf_counter_hw_event __user *, hw_event_uptr,
Paul Mackerras2743a5b2009-03-04 20:36:51 +11003198 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
Thomas Gleixner0793a612008-12-04 20:12:29 +01003199{
Ingo Molnar04289bb2008-12-11 08:38:42 +01003200 struct perf_counter *counter, *group_leader;
Ingo Molnar9f66a382008-12-10 12:33:23 +01003201 struct perf_counter_hw_event hw_event;
Ingo Molnar04289bb2008-12-11 08:38:42 +01003202 struct perf_counter_context *ctx;
Ingo Molnar9b51f662008-12-12 13:49:45 +01003203 struct file *counter_file = NULL;
Ingo Molnar04289bb2008-12-11 08:38:42 +01003204 struct file *group_file = NULL;
3205 int fput_needed = 0;
Ingo Molnar9b51f662008-12-12 13:49:45 +01003206 int fput_needed2 = 0;
Thomas Gleixner0793a612008-12-04 20:12:29 +01003207 int ret;
3208
Paul Mackerras2743a5b2009-03-04 20:36:51 +11003209 /* for future expandability... */
3210 if (flags)
3211 return -EINVAL;
3212
Ingo Molnar9f66a382008-12-10 12:33:23 +01003213 if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
Thomas Gleixnereab656a2008-12-08 19:26:59 +01003214 return -EFAULT;
3215
Ingo Molnar04289bb2008-12-11 08:38:42 +01003216 /*
Ingo Molnarccff2862008-12-11 11:26:29 +01003217 * Get the target context (task or percpu):
3218 */
3219 ctx = find_get_context(pid, cpu);
3220 if (IS_ERR(ctx))
3221 return PTR_ERR(ctx);
3222
3223 /*
3224 * Look up the group leader (we will attach this counter to it):
Ingo Molnar04289bb2008-12-11 08:38:42 +01003225 */
3226 group_leader = NULL;
3227 if (group_fd != -1) {
3228 ret = -EINVAL;
3229 group_file = fget_light(group_fd, &fput_needed);
3230 if (!group_file)
Ingo Molnarccff2862008-12-11 11:26:29 +01003231 goto err_put_context;
Ingo Molnar04289bb2008-12-11 08:38:42 +01003232 if (group_file->f_op != &perf_fops)
Ingo Molnarccff2862008-12-11 11:26:29 +01003233 goto err_put_context;
Ingo Molnar04289bb2008-12-11 08:38:42 +01003234
3235 group_leader = group_file->private_data;
3236 /*
Ingo Molnarccff2862008-12-11 11:26:29 +01003237 * Do not allow a recursive hierarchy (this new sibling
3238 * becoming part of another group-sibling):
Ingo Molnar04289bb2008-12-11 08:38:42 +01003239 */
Ingo Molnarccff2862008-12-11 11:26:29 +01003240 if (group_leader->group_leader != group_leader)
3241 goto err_put_context;
3242 /*
3243 * Do not allow to attach to a group in a different
3244 * task or CPU context:
3245 */
3246 if (group_leader->ctx != ctx)
3247 goto err_put_context;
Paul Mackerras3b6f9e52009-01-14 21:00:30 +11003248 /*
3249 * Only a group leader can be exclusive or pinned
3250 */
3251 if (hw_event.exclusive || hw_event.pinned)
3252 goto err_put_context;
Ingo Molnar04289bb2008-12-11 08:38:42 +01003253 }
3254
Paul Mackerras23a185c2009-02-09 22:42:47 +11003255 counter = perf_counter_alloc(&hw_event, cpu, ctx, group_leader,
3256 GFP_KERNEL);
Paul Mackerrasd5d2bc0d2009-03-30 19:07:08 +02003257 ret = PTR_ERR(counter);
3258 if (IS_ERR(counter))
Thomas Gleixner0793a612008-12-04 20:12:29 +01003259 goto err_put_context;
3260
Thomas Gleixner0793a612008-12-04 20:12:29 +01003261 ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
3262 if (ret < 0)
Ingo Molnar9b51f662008-12-12 13:49:45 +01003263 goto err_free_put_context;
3264
3265 counter_file = fget_light(ret, &fput_needed2);
3266 if (!counter_file)
3267 goto err_free_put_context;
3268
3269 counter->filp = counter_file;
Paul Mackerrasd859e292009-01-17 18:10:22 +11003270 mutex_lock(&ctx->mutex);
Ingo Molnar9b51f662008-12-12 13:49:45 +01003271 perf_install_in_context(ctx, counter, cpu);
Paul Mackerrasd859e292009-01-17 18:10:22 +11003272 mutex_unlock(&ctx->mutex);
Ingo Molnar9b51f662008-12-12 13:49:45 +01003273
Peter Zijlstra082ff5a2009-05-23 18:29:00 +02003274 counter->owner = current;
3275 get_task_struct(current);
3276 mutex_lock(&current->perf_counter_mutex);
3277 list_add_tail(&counter->owner_entry, &current->perf_counter_list);
3278 mutex_unlock(&current->perf_counter_mutex);
3279
Ingo Molnar9b51f662008-12-12 13:49:45 +01003280 fput_light(counter_file, fput_needed2);
Thomas Gleixner0793a612008-12-04 20:12:29 +01003281
Ingo Molnar04289bb2008-12-11 08:38:42 +01003282out_fput:
3283 fput_light(group_file, fput_needed);
3284
Thomas Gleixner0793a612008-12-04 20:12:29 +01003285 return ret;
3286
Ingo Molnar9b51f662008-12-12 13:49:45 +01003287err_free_put_context:
Thomas Gleixner0793a612008-12-04 20:12:29 +01003288 kfree(counter);
3289
3290err_put_context:
3291 put_context(ctx);
3292
Ingo Molnar04289bb2008-12-11 08:38:42 +01003293 goto out_fput;
Thomas Gleixner0793a612008-12-04 20:12:29 +01003294}
3295
Ingo Molnar9b51f662008-12-12 13:49:45 +01003296/*
Ingo Molnar9b51f662008-12-12 13:49:45 +01003297 * inherit a counter from parent task to child task:
3298 */
Paul Mackerrasd859e292009-01-17 18:10:22 +11003299static struct perf_counter *
Ingo Molnar9b51f662008-12-12 13:49:45 +01003300inherit_counter(struct perf_counter *parent_counter,
3301 struct task_struct *parent,
3302 struct perf_counter_context *parent_ctx,
3303 struct task_struct *child,
Paul Mackerrasd859e292009-01-17 18:10:22 +11003304 struct perf_counter *group_leader,
Ingo Molnar9b51f662008-12-12 13:49:45 +01003305 struct perf_counter_context *child_ctx)
3306{
3307 struct perf_counter *child_counter;
3308
Paul Mackerrasd859e292009-01-17 18:10:22 +11003309 /*
3310 * Instead of creating recursive hierarchies of counters,
3311 * we link inherited counters back to the original parent,
3312 * which has a filp for sure, which we use as the reference
3313 * count:
3314 */
3315 if (parent_counter->parent)
3316 parent_counter = parent_counter->parent;
3317
Ingo Molnar9b51f662008-12-12 13:49:45 +01003318 child_counter = perf_counter_alloc(&parent_counter->hw_event,
Paul Mackerras23a185c2009-02-09 22:42:47 +11003319 parent_counter->cpu, child_ctx,
3320 group_leader, GFP_KERNEL);
Paul Mackerrasd5d2bc0d2009-03-30 19:07:08 +02003321 if (IS_ERR(child_counter))
3322 return child_counter;
Ingo Molnar9b51f662008-12-12 13:49:45 +01003323
3324 /*
Paul Mackerras564c2b22009-05-22 14:27:22 +10003325 * Make the child state follow the state of the parent counter,
3326 * not its hw_event.disabled bit. We hold the parent's mutex,
3327 * so we won't race with perf_counter_{en,dis}able_family.
3328 */
3329 if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
3330 child_counter->state = PERF_COUNTER_STATE_INACTIVE;
3331 else
3332 child_counter->state = PERF_COUNTER_STATE_OFF;
3333
3334 /*
Ingo Molnar9b51f662008-12-12 13:49:45 +01003335 * Link it up in the child's context:
3336 */
Paul Mackerras53cfbf52009-03-25 22:46:58 +11003337 add_counter_to_ctx(child_counter, child_ctx);
Ingo Molnar9b51f662008-12-12 13:49:45 +01003338
3339 child_counter->parent = parent_counter;
Ingo Molnar9b51f662008-12-12 13:49:45 +01003340 /*
3341 * inherit into child's child as well:
3342 */
3343 child_counter->hw_event.inherit = 1;
3344
3345 /*
3346 * Get a reference to the parent filp - we will fput it
3347 * when the child counter exits. This is safe to do because
3348 * we are in the parent and we know that the filp still
3349 * exists and has a nonzero count:
3350 */
3351 atomic_long_inc(&parent_counter->filp->f_count);
3352
Paul Mackerrasd859e292009-01-17 18:10:22 +11003353 /*
3354 * Link this into the parent counter's child list
3355 */
Peter Zijlstrafccc7142009-05-23 18:28:56 +02003356 mutex_lock(&parent_counter->child_mutex);
Paul Mackerrasd859e292009-01-17 18:10:22 +11003357 list_add_tail(&child_counter->child_list, &parent_counter->child_list);
Peter Zijlstrafccc7142009-05-23 18:28:56 +02003358 mutex_unlock(&parent_counter->child_mutex);
Paul Mackerrasd859e292009-01-17 18:10:22 +11003359
3360 return child_counter;
3361}
3362
3363static int inherit_group(struct perf_counter *parent_counter,
3364 struct task_struct *parent,
3365 struct perf_counter_context *parent_ctx,
3366 struct task_struct *child,
3367 struct perf_counter_context *child_ctx)
3368{
3369 struct perf_counter *leader;
3370 struct perf_counter *sub;
Paul Mackerrasd5d2bc0d2009-03-30 19:07:08 +02003371 struct perf_counter *child_ctr;
Paul Mackerrasd859e292009-01-17 18:10:22 +11003372
3373 leader = inherit_counter(parent_counter, parent, parent_ctx,
3374 child, NULL, child_ctx);
Paul Mackerrasd5d2bc0d2009-03-30 19:07:08 +02003375 if (IS_ERR(leader))
3376 return PTR_ERR(leader);
Paul Mackerrasd859e292009-01-17 18:10:22 +11003377 list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) {
Paul Mackerrasd5d2bc0d2009-03-30 19:07:08 +02003378 child_ctr = inherit_counter(sub, parent, parent_ctx,
3379 child, leader, child_ctx);
3380 if (IS_ERR(child_ctr))
3381 return PTR_ERR(child_ctr);
Paul Mackerrasd859e292009-01-17 18:10:22 +11003382 }
Ingo Molnar9b51f662008-12-12 13:49:45 +01003383 return 0;
3384}
3385
Paul Mackerrasd859e292009-01-17 18:10:22 +11003386static void sync_child_counter(struct perf_counter *child_counter,
3387 struct perf_counter *parent_counter)
3388{
Peter Zijlstra8bc20952009-05-15 20:45:59 +02003389 u64 child_val;
Paul Mackerrasd859e292009-01-17 18:10:22 +11003390
Paul Mackerrasd859e292009-01-17 18:10:22 +11003391 child_val = atomic64_read(&child_counter->count);
3392
3393 /*
3394 * Add back the child's count to the parent's count:
3395 */
3396 atomic64_add(child_val, &parent_counter->count);
Paul Mackerras53cfbf52009-03-25 22:46:58 +11003397 atomic64_add(child_counter->total_time_enabled,
3398 &parent_counter->child_total_time_enabled);
3399 atomic64_add(child_counter->total_time_running,
3400 &parent_counter->child_total_time_running);
Paul Mackerrasd859e292009-01-17 18:10:22 +11003401
3402 /*
3403 * Remove this counter from the parent's list
3404 */
Peter Zijlstrafccc7142009-05-23 18:28:56 +02003405 mutex_lock(&parent_counter->child_mutex);
Paul Mackerrasd859e292009-01-17 18:10:22 +11003406 list_del_init(&child_counter->child_list);
Peter Zijlstrafccc7142009-05-23 18:28:56 +02003407 mutex_unlock(&parent_counter->child_mutex);
Paul Mackerrasd859e292009-01-17 18:10:22 +11003408
3409 /*
3410 * Release the parent counter, if this was the last
3411 * reference to it.
3412 */
3413 fput(parent_counter->filp);
3414}
3415
Ingo Molnar9b51f662008-12-12 13:49:45 +01003416static void
3417__perf_counter_exit_task(struct task_struct *child,
3418 struct perf_counter *child_counter,
3419 struct perf_counter_context *child_ctx)
3420{
3421 struct perf_counter *parent_counter;
Ingo Molnar9b51f662008-12-12 13:49:45 +01003422
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10003423 update_counter_times(child_counter);
Peter Zijlstraaa9c67f2009-05-23 18:28:59 +02003424 perf_counter_remove_from_context(child_counter);
Ingo Molnar0cc0c022008-12-14 23:20:36 +01003425
Ingo Molnar9b51f662008-12-12 13:49:45 +01003426 parent_counter = child_counter->parent;
3427 /*
3428 * It can happen that parent exits first, and has counters
3429 * that are still around due to the child reference. These
3430 * counters need to be zapped - but otherwise linger.
3431 */
Paul Mackerrasd859e292009-01-17 18:10:22 +11003432 if (parent_counter) {
3433 sync_child_counter(child_counter, parent_counter);
Peter Zijlstraf1600952009-03-19 20:26:16 +01003434 free_counter(child_counter);
Paul Mackerras4bcf3492009-02-11 13:53:19 +01003435 }
Ingo Molnar9b51f662008-12-12 13:49:45 +01003436}
3437
3438/*
Paul Mackerrasd859e292009-01-17 18:10:22 +11003439 * When a child task exits, feed back counter values to parent counters.
Ingo Molnar9b51f662008-12-12 13:49:45 +01003440 *
Paul Mackerrasd859e292009-01-17 18:10:22 +11003441 * Note: we may be running in child context, but the PID is not hashed
Ingo Molnar9b51f662008-12-12 13:49:45 +01003442 * anymore so new counters will not be added.
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10003443 * (XXX not sure that is true when we get called from flush_old_exec.
3444 * -- paulus)
Ingo Molnar9b51f662008-12-12 13:49:45 +01003445 */
3446void perf_counter_exit_task(struct task_struct *child)
3447{
3448 struct perf_counter *child_counter, *tmp;
3449 struct perf_counter_context *child_ctx;
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10003450 unsigned long flags;
Ingo Molnar9b51f662008-12-12 13:49:45 +01003451
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10003452 child_ctx = child->perf_counter_ctxp;
Ingo Molnar9b51f662008-12-12 13:49:45 +01003453
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10003454 if (likely(!child_ctx))
Ingo Molnar9b51f662008-12-12 13:49:45 +01003455 return;
3456
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10003457 local_irq_save(flags);
3458 __perf_counter_task_sched_out(child_ctx);
3459 child->perf_counter_ctxp = NULL;
3460 local_irq_restore(flags);
3461
3462 mutex_lock(&child_ctx->mutex);
3463
Peter Zijlstra8bc20952009-05-15 20:45:59 +02003464again:
Ingo Molnar9b51f662008-12-12 13:49:45 +01003465 list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
3466 list_entry)
3467 __perf_counter_exit_task(child, child_counter, child_ctx);
Peter Zijlstra8bc20952009-05-15 20:45:59 +02003468
3469 /*
3470 * If the last counter was a group counter, it will have appended all
3471 * its siblings to the list, but we obtained 'tmp' before that which
3472 * will still point to the list head terminating the iteration.
3473 */
3474 if (!list_empty(&child_ctx->counter_list))
3475 goto again;
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10003476
3477 mutex_unlock(&child_ctx->mutex);
3478
3479 put_ctx(child_ctx);
Ingo Molnar9b51f662008-12-12 13:49:45 +01003480}
3481
3482/*
3483 * Initialize the perf_counter context in task_struct
3484 */
Peter Zijlstra6ab423e2009-05-25 14:45:27 +02003485int perf_counter_init_task(struct task_struct *child)
Ingo Molnar9b51f662008-12-12 13:49:45 +01003486{
3487 struct perf_counter_context *child_ctx, *parent_ctx;
Paul Mackerrasd859e292009-01-17 18:10:22 +11003488 struct perf_counter *counter;
Ingo Molnar9b51f662008-12-12 13:49:45 +01003489 struct task_struct *parent = current;
Paul Mackerras564c2b22009-05-22 14:27:22 +10003490 int inherited_all = 1;
Peter Zijlstra6ab423e2009-05-25 14:45:27 +02003491 int ret = 0;
Ingo Molnar9b51f662008-12-12 13:49:45 +01003492
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10003493 child->perf_counter_ctxp = NULL;
Ingo Molnar9b51f662008-12-12 13:49:45 +01003494
Peter Zijlstra082ff5a2009-05-23 18:29:00 +02003495 mutex_init(&child->perf_counter_mutex);
3496 INIT_LIST_HEAD(&child->perf_counter_list);
3497
Peter Zijlstra6ab423e2009-05-25 14:45:27 +02003498 parent_ctx = parent->perf_counter_ctxp;
3499 if (likely(!parent_ctx || !parent_ctx->nr_counters))
3500 return 0;
3501
Ingo Molnar9b51f662008-12-12 13:49:45 +01003502 /*
3503 * This is executed from the parent task context, so inherit
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10003504 * counters that have been marked for cloning.
3505 * First allocate and initialize a context for the child.
Ingo Molnar9b51f662008-12-12 13:49:45 +01003506 */
3507
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10003508 child_ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
3509 if (!child_ctx)
Peter Zijlstra6ab423e2009-05-25 14:45:27 +02003510 return -ENOMEM;
Paul Mackerrasa63eaf32009-05-22 14:17:31 +10003511
3512 __perf_counter_init_context(child_ctx, child);
3513 child->perf_counter_ctxp = child_ctx;
3514
Ingo Molnar9b51f662008-12-12 13:49:45 +01003515 /*
3516 * Lock the parent list. No need to lock the child - not PID
3517 * hashed yet and not running, so nobody can access it.
3518 */
Paul Mackerrasd859e292009-01-17 18:10:22 +11003519 mutex_lock(&parent_ctx->mutex);
Ingo Molnar9b51f662008-12-12 13:49:45 +01003520
3521 /*
3522 * We dont have to disable NMIs - we are only looking at
3523 * the list, not manipulating it:
3524 */
Peter Zijlstrad7b629a2009-05-20 12:21:19 +02003525 list_for_each_entry_rcu(counter, &parent_ctx->event_list, event_entry) {
3526 if (counter != counter->group_leader)
3527 continue;
3528
Paul Mackerras564c2b22009-05-22 14:27:22 +10003529 if (!counter->hw_event.inherit) {
3530 inherited_all = 0;
Ingo Molnar9b51f662008-12-12 13:49:45 +01003531 continue;
Paul Mackerras564c2b22009-05-22 14:27:22 +10003532 }
Ingo Molnar9b51f662008-12-12 13:49:45 +01003533
Peter Zijlstra6ab423e2009-05-25 14:45:27 +02003534 ret = inherit_group(counter, parent, parent_ctx,
3535 child, child_ctx);
3536 if (ret) {
Paul Mackerras564c2b22009-05-22 14:27:22 +10003537 inherited_all = 0;
Ingo Molnar9b51f662008-12-12 13:49:45 +01003538 break;
Paul Mackerras564c2b22009-05-22 14:27:22 +10003539 }
3540 }
3541
3542 if (inherited_all) {
3543 /*
3544 * Mark the child context as a clone of the parent
3545 * context, or of whatever the parent is a clone of.
3546 */
3547 if (parent_ctx->parent_ctx) {
3548 child_ctx->parent_ctx = parent_ctx->parent_ctx;
3549 child_ctx->parent_gen = parent_ctx->parent_gen;
3550 } else {
3551 child_ctx->parent_ctx = parent_ctx;
3552 child_ctx->parent_gen = parent_ctx->generation;
3553 }
3554 get_ctx(child_ctx->parent_ctx);
Ingo Molnar9b51f662008-12-12 13:49:45 +01003555 }
3556
Paul Mackerrasd859e292009-01-17 18:10:22 +11003557 mutex_unlock(&parent_ctx->mutex);
Peter Zijlstra6ab423e2009-05-25 14:45:27 +02003558
3559 return ret;
Ingo Molnar9b51f662008-12-12 13:49:45 +01003560}
3561
Ingo Molnar04289bb2008-12-11 08:38:42 +01003562static void __cpuinit perf_counter_init_cpu(int cpu)
Thomas Gleixner0793a612008-12-04 20:12:29 +01003563{
Ingo Molnar04289bb2008-12-11 08:38:42 +01003564 struct perf_cpu_context *cpuctx;
Thomas Gleixner0793a612008-12-04 20:12:29 +01003565
Ingo Molnar04289bb2008-12-11 08:38:42 +01003566 cpuctx = &per_cpu(perf_cpu_context, cpu);
3567 __perf_counter_init_context(&cpuctx->ctx, NULL);
Thomas Gleixner0793a612008-12-04 20:12:29 +01003568
Ingo Molnar1dce8d92009-05-04 19:23:18 +02003569 spin_lock(&perf_resource_lock);
Ingo Molnar04289bb2008-12-11 08:38:42 +01003570 cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu;
Ingo Molnar1dce8d92009-05-04 19:23:18 +02003571 spin_unlock(&perf_resource_lock);
Ingo Molnar04289bb2008-12-11 08:38:42 +01003572
Paul Mackerras01d02872009-01-14 13:44:19 +11003573 hw_perf_counter_setup(cpu);
Thomas Gleixner0793a612008-12-04 20:12:29 +01003574}
3575
3576#ifdef CONFIG_HOTPLUG_CPU
Ingo Molnar04289bb2008-12-11 08:38:42 +01003577static void __perf_counter_exit_cpu(void *info)
Thomas Gleixner0793a612008-12-04 20:12:29 +01003578{
3579 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
3580 struct perf_counter_context *ctx = &cpuctx->ctx;
3581 struct perf_counter *counter, *tmp;
3582
Ingo Molnar04289bb2008-12-11 08:38:42 +01003583 list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry)
3584 __perf_counter_remove_from_context(counter);
Thomas Gleixner0793a612008-12-04 20:12:29 +01003585}
Ingo Molnar04289bb2008-12-11 08:38:42 +01003586static void perf_counter_exit_cpu(int cpu)
Thomas Gleixner0793a612008-12-04 20:12:29 +01003587{
Paul Mackerrasd859e292009-01-17 18:10:22 +11003588 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
3589 struct perf_counter_context *ctx = &cpuctx->ctx;
3590
3591 mutex_lock(&ctx->mutex);
Ingo Molnar04289bb2008-12-11 08:38:42 +01003592 smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1);
Paul Mackerrasd859e292009-01-17 18:10:22 +11003593 mutex_unlock(&ctx->mutex);
Thomas Gleixner0793a612008-12-04 20:12:29 +01003594}
3595#else
Ingo Molnar04289bb2008-12-11 08:38:42 +01003596static inline void perf_counter_exit_cpu(int cpu) { }
Thomas Gleixner0793a612008-12-04 20:12:29 +01003597#endif
3598
3599static int __cpuinit
3600perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
3601{
3602 unsigned int cpu = (long)hcpu;
3603
3604 switch (action) {
3605
3606 case CPU_UP_PREPARE:
3607 case CPU_UP_PREPARE_FROZEN:
Ingo Molnar04289bb2008-12-11 08:38:42 +01003608 perf_counter_init_cpu(cpu);
Thomas Gleixner0793a612008-12-04 20:12:29 +01003609 break;
3610
3611 case CPU_DOWN_PREPARE:
3612 case CPU_DOWN_PREPARE_FROZEN:
Ingo Molnar04289bb2008-12-11 08:38:42 +01003613 perf_counter_exit_cpu(cpu);
Thomas Gleixner0793a612008-12-04 20:12:29 +01003614 break;
3615
3616 default:
3617 break;
3618 }
3619
3620 return NOTIFY_OK;
3621}
3622
3623static struct notifier_block __cpuinitdata perf_cpu_nb = {
3624 .notifier_call = perf_cpu_notify,
3625};
3626
Ingo Molnar0d905bc2009-05-04 19:13:30 +02003627void __init perf_counter_init(void)
Thomas Gleixner0793a612008-12-04 20:12:29 +01003628{
3629 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
3630 (void *)(long)smp_processor_id());
3631 register_cpu_notifier(&perf_cpu_nb);
Thomas Gleixner0793a612008-12-04 20:12:29 +01003632}
Thomas Gleixner0793a612008-12-04 20:12:29 +01003633
3634static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf)
3635{
3636 return sprintf(buf, "%d\n", perf_reserved_percpu);
3637}
3638
3639static ssize_t
3640perf_set_reserve_percpu(struct sysdev_class *class,
3641 const char *buf,
3642 size_t count)
3643{
3644 struct perf_cpu_context *cpuctx;
3645 unsigned long val;
3646 int err, cpu, mpt;
3647
3648 err = strict_strtoul(buf, 10, &val);
3649 if (err)
3650 return err;
3651 if (val > perf_max_counters)
3652 return -EINVAL;
3653
Ingo Molnar1dce8d92009-05-04 19:23:18 +02003654 spin_lock(&perf_resource_lock);
Thomas Gleixner0793a612008-12-04 20:12:29 +01003655 perf_reserved_percpu = val;
3656 for_each_online_cpu(cpu) {
3657 cpuctx = &per_cpu(perf_cpu_context, cpu);
3658 spin_lock_irq(&cpuctx->ctx.lock);
3659 mpt = min(perf_max_counters - cpuctx->ctx.nr_counters,
3660 perf_max_counters - perf_reserved_percpu);
3661 cpuctx->max_pertask = mpt;
3662 spin_unlock_irq(&cpuctx->ctx.lock);
3663 }
Ingo Molnar1dce8d92009-05-04 19:23:18 +02003664 spin_unlock(&perf_resource_lock);
Thomas Gleixner0793a612008-12-04 20:12:29 +01003665
3666 return count;
3667}
3668
3669static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf)
3670{
3671 return sprintf(buf, "%d\n", perf_overcommit);
3672}
3673
3674static ssize_t
3675perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count)
3676{
3677 unsigned long val;
3678 int err;
3679
3680 err = strict_strtoul(buf, 10, &val);
3681 if (err)
3682 return err;
3683 if (val > 1)
3684 return -EINVAL;
3685
Ingo Molnar1dce8d92009-05-04 19:23:18 +02003686 spin_lock(&perf_resource_lock);
Thomas Gleixner0793a612008-12-04 20:12:29 +01003687 perf_overcommit = val;
Ingo Molnar1dce8d92009-05-04 19:23:18 +02003688 spin_unlock(&perf_resource_lock);
Thomas Gleixner0793a612008-12-04 20:12:29 +01003689
3690 return count;
3691}
3692
3693static SYSDEV_CLASS_ATTR(
3694 reserve_percpu,
3695 0644,
3696 perf_show_reserve_percpu,
3697 perf_set_reserve_percpu
3698 );
3699
3700static SYSDEV_CLASS_ATTR(
3701 overcommit,
3702 0644,
3703 perf_show_overcommit,
3704 perf_set_overcommit
3705 );
3706
3707static struct attribute *perfclass_attrs[] = {
3708 &attr_reserve_percpu.attr,
3709 &attr_overcommit.attr,
3710 NULL
3711};
3712
3713static struct attribute_group perfclass_attr_group = {
3714 .attrs = perfclass_attrs,
3715 .name = "perf_counters",
3716};
3717
3718static int __init perf_counter_sysfs_init(void)
3719{
3720 return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
3721 &perfclass_attr_group);
3722}
3723device_initcall(perf_counter_sysfs_init);