Blame - kernel/perf_counter.c - android_kernel_htc_msm8960

blob: ad62965828d3d4b715d5998c294e8f598fc2e3b0 [file] [log] [blame]

Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1	/*
				2	* Performance counter core code
				3	*
				4	* Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
				5	* Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
				6	*
				7	* For licencing details see kernel-base/COPYING
				8	*/
				9
				10	#include <linux/fs.h>
				11	#include <linux/cpu.h>
				12	#include <linux/smp.h>
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	13	#include <linux/file.h>
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	14	#include <linux/poll.h>
				15	#include <linux/sysfs.h>
				16	#include <linux/ptrace.h>
				17	#include <linux/percpu.h>
				18	#include <linux/uaccess.h>
				19	#include <linux/syscalls.h>
				20	#include <linux/anon_inodes.h>
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	21	#include <linux/kernel_stat.h>
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	22	#include <linux/perf_counter.h>
Paul Mackerras	23a185c	2009-02-09 22:42:47 +1100	[diff] [blame]	23	#include <linux/mm.h>
				24	#include <linux/vmstat.h>
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	25
				26	/*
				27	* Each CPU has a list of per CPU counters:
				28	*/
				29	DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
				30
Ingo Molnar	088e285	2008-12-14 20:21:00 +0100	[diff] [blame]	31	int perf_max_counters __read_mostly = 1;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	32	static int perf_reserved_percpu __read_mostly;
				33	static int perf_overcommit __read_mostly = 1;
				34
				35	/*
				36	* Mutex for (sysadmin-configurable) counter reservations:
				37	*/
				38	static DEFINE_MUTEX(perf_resource_mutex);
				39
				40	/*
				41	* Architecture provided APIs - weak aliases:
				42	*/
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	43	extern __weak const struct hw_perf_counter_ops *
Ingo Molnar	621a01e	2008-12-11 12:46:46 +0100	[diff] [blame]	44	hw_perf_counter_init(struct perf_counter *counter)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	45	{
Paul Mackerras	ff6f054	2009-01-09 16:19:25 +1100	[diff] [blame]	46	return NULL;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	47	}
				48
Ingo Molnar	01b2838	2008-12-11 13:45:51 +0100	[diff] [blame]	49	u64 __weak hw_perf_save_disable(void) { return 0; }
Yinghai Lu	01ea1cc	2008-12-26 21:05:06 -0800	[diff] [blame]	50	void __weak hw_perf_restore(u64 ctrl) { barrier(); }
Paul Mackerras	01d0287	2009-01-14 13:44:19 +1100	[diff] [blame]	51	void __weak hw_perf_counter_setup(int cpu) { barrier(); }
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	52	int __weak hw_perf_group_sched_in(struct perf_counter *group_leader,
				53	struct perf_cpu_context *cpuctx,
				54	struct perf_counter_context *ctx, int cpu)
				55	{
				56	return 0;
				57	}
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	58
Paul Mackerras	4eb96fc	2009-01-09 17:24:34 +1100	[diff] [blame]	59	void __weak perf_counter_print_debug(void) { }
				60
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	61	static void
				62	list_add_counter(struct perf_counter counter, struct perf_counter_context ctx)
				63	{
				64	struct perf_counter *group_leader = counter->group_leader;
				65
				66	/*
				67	* Depending on whether it is a standalone or sibling counter,
				68	* add it straight to the context's counter list, or to the group
				69	* leader's sibling list:
				70	*/
				71	if (counter->group_leader == counter)
				72	list_add_tail(&counter->list_entry, &ctx->counter_list);
				73	else
				74	list_add_tail(&counter->list_entry, &group_leader->sibling_list);
				75	}
				76
				77	static void
				78	list_del_counter(struct perf_counter counter, struct perf_counter_context ctx)
				79	{
				80	struct perf_counter sibling, tmp;
				81
				82	list_del_init(&counter->list_entry);
				83
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	84	/*
				85	* If this was a group counter with sibling counters then
				86	* upgrade the siblings to singleton counters by adding them
				87	* to the context list directly:
				88	*/
				89	list_for_each_entry_safe(sibling, tmp,
				90	&counter->sibling_list, list_entry) {
				91
				92	list_del_init(&sibling->list_entry);
				93	list_add_tail(&sibling->list_entry, &ctx->counter_list);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	94	sibling->group_leader = sibling;
				95	}
				96	}
				97
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	98	static void
				99	counter_sched_out(struct perf_counter *counter,
				100	struct perf_cpu_context *cpuctx,
				101	struct perf_counter_context *ctx)
				102	{
				103	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
				104	return;
				105
				106	counter->state = PERF_COUNTER_STATE_INACTIVE;
				107	counter->hw_ops->disable(counter);
				108	counter->oncpu = -1;
				109
				110	if (!is_software_counter(counter))
				111	cpuctx->active_oncpu--;
				112	ctx->nr_active--;
				113	if (counter->hw_event.exclusive \|\| !cpuctx->active_oncpu)
				114	cpuctx->exclusive = 0;
				115	}
				116
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	117	static void
				118	group_sched_out(struct perf_counter *group_counter,
				119	struct perf_cpu_context *cpuctx,
				120	struct perf_counter_context *ctx)
				121	{
				122	struct perf_counter *counter;
				123
				124	if (group_counter->state != PERF_COUNTER_STATE_ACTIVE)
				125	return;
				126
				127	counter_sched_out(group_counter, cpuctx, ctx);
				128
				129	/*
				130	* Schedule out siblings (if any):
				131	*/
				132	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
				133	counter_sched_out(counter, cpuctx, ctx);
				134
				135	if (group_counter->hw_event.exclusive)
				136	cpuctx->exclusive = 0;
				137	}
				138
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	139	/*
				140	* Cross CPU call to remove a performance counter
				141	*
				142	* We disable the counter on the hardware level first. After that we
				143	* remove it from the context list.
				144	*/
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	145	static void __perf_counter_remove_from_context(void *info)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	146	{
				147	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
				148	struct perf_counter *counter = info;
				149	struct perf_counter_context *ctx = counter->ctx;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	150	unsigned long flags;
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	151	u64 perf_flags;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	152
				153	/*
				154	* If this is a task context, we need to check whether it is
				155	* the current task context of this cpu. If not it has been
				156	* scheduled out before the smp call arrived.
				157	*/
				158	if (ctx->task && cpuctx->task_ctx != ctx)
				159	return;
				160
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	161	curr_rq_lock_irq_save(&flags);
				162	spin_lock(&ctx->lock);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	163
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	164	counter_sched_out(counter, cpuctx, ctx);
				165
				166	counter->task = NULL;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	167	ctx->nr_counters--;
				168
				169	/*
				170	* Protect the list operation against NMI by disabling the
				171	* counters on a global level. NOP for non NMI based counters.
				172	*/
Ingo Molnar	01b2838	2008-12-11 13:45:51 +0100	[diff] [blame]	173	perf_flags = hw_perf_save_disable();
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	174	list_del_counter(counter, ctx);
Ingo Molnar	01b2838	2008-12-11 13:45:51 +0100	[diff] [blame]	175	hw_perf_restore(perf_flags);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	176
				177	if (!ctx->task) {
				178	/*
				179	* Allow more per task counters with respect to the
				180	* reservation:
				181	*/
				182	cpuctx->max_pertask =
				183	min(perf_max_counters - ctx->nr_counters,
				184	perf_max_counters - perf_reserved_percpu);
				185	}
				186
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	187	spin_unlock(&ctx->lock);
				188	curr_rq_unlock_irq_restore(&flags);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	189	}
				190
				191
				192	/*
				193	* Remove the counter from a task's (or a CPU's) list of counters.
				194	*
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	195	* Must be called with counter->mutex and ctx->mutex held.
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	196	*
				197	* CPU counters are removed with a smp call. For task counters we only
				198	* call when the task is on a CPU.
				199	*/
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	200	static void perf_counter_remove_from_context(struct perf_counter *counter)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	201	{
				202	struct perf_counter_context *ctx = counter->ctx;
				203	struct task_struct *task = ctx->task;
				204
				205	if (!task) {
				206	/*
				207	* Per cpu counters are removed via an smp call and
				208	* the removal is always sucessful.
				209	*/
				210	smp_call_function_single(counter->cpu,
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	211	__perf_counter_remove_from_context,
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	212	counter, 1);
				213	return;
				214	}
				215
				216	retry:
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	217	task_oncpu_function_call(task, __perf_counter_remove_from_context,
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	218	counter);
				219
				220	spin_lock_irq(&ctx->lock);
				221	/*
				222	* If the context is active we need to retry the smp call.
				223	*/
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	224	if (ctx->nr_active && !list_empty(&counter->list_entry)) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	225	spin_unlock_irq(&ctx->lock);
				226	goto retry;
				227	}
				228
				229	/*
				230	* The lock prevents that this context is scheduled in so we
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	231	* can remove the counter safely, if the call above did not
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	232	* succeed.
				233	*/
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	234	if (!list_empty(&counter->list_entry)) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	235	ctx->nr_counters--;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	236	list_del_counter(counter, ctx);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	237	counter->task = NULL;
				238	}
				239	spin_unlock_irq(&ctx->lock);
				240	}
				241
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	242	/*
				243	* Cross CPU call to disable a performance counter
				244	*/
				245	static void __perf_counter_disable(void *info)
				246	{
				247	struct perf_counter *counter = info;
				248	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
				249	struct perf_counter_context *ctx = counter->ctx;
				250	unsigned long flags;
				251
				252	/*
				253	* If this is a per-task counter, need to check whether this
				254	* counter's task is the current task on this cpu.
				255	*/
				256	if (ctx->task && cpuctx->task_ctx != ctx)
				257	return;
				258
				259	curr_rq_lock_irq_save(&flags);
				260	spin_lock(&ctx->lock);
				261
				262	/*
				263	* If the counter is on, turn it off.
				264	* If it is in error state, leave it in error state.
				265	*/
				266	if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
				267	if (counter == counter->group_leader)
				268	group_sched_out(counter, cpuctx, ctx);
				269	else
				270	counter_sched_out(counter, cpuctx, ctx);
				271	counter->state = PERF_COUNTER_STATE_OFF;
				272	}
				273
				274	spin_unlock(&ctx->lock);
				275	curr_rq_unlock_irq_restore(&flags);
				276	}
				277
				278	/*
				279	* Disable a counter.
				280	*/
				281	static void perf_counter_disable(struct perf_counter *counter)
				282	{
				283	struct perf_counter_context *ctx = counter->ctx;
				284	struct task_struct *task = ctx->task;
				285
				286	if (!task) {
				287	/*
				288	* Disable the counter on the cpu that it's on
				289	*/
				290	smp_call_function_single(counter->cpu, __perf_counter_disable,
				291	counter, 1);
				292	return;
				293	}
				294
				295	retry:
				296	task_oncpu_function_call(task, __perf_counter_disable, counter);
				297
				298	spin_lock_irq(&ctx->lock);
				299	/*
				300	* If the counter is still active, we need to retry the cross-call.
				301	*/
				302	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
				303	spin_unlock_irq(&ctx->lock);
				304	goto retry;
				305	}
				306
				307	/*
				308	* Since we have the lock this context can't be scheduled
				309	* in, so we can change the state safely.
				310	*/
				311	if (counter->state == PERF_COUNTER_STATE_INACTIVE)
				312	counter->state = PERF_COUNTER_STATE_OFF;
				313
				314	spin_unlock_irq(&ctx->lock);
				315	}
				316
				317	/*
				318	* Disable a counter and all its children.
				319	*/
				320	static void perf_counter_disable_family(struct perf_counter *counter)
				321	{
				322	struct perf_counter *child;
				323
				324	perf_counter_disable(counter);
				325
				326	/*
				327	* Lock the mutex to protect the list of children
				328	*/
				329	mutex_lock(&counter->mutex);
				330	list_for_each_entry(child, &counter->child_list, child_list)
				331	perf_counter_disable(child);
				332	mutex_unlock(&counter->mutex);
				333	}
				334
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	335	static int
				336	counter_sched_in(struct perf_counter *counter,
				337	struct perf_cpu_context *cpuctx,
				338	struct perf_counter_context *ctx,
				339	int cpu)
				340	{
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	341	if (counter->state <= PERF_COUNTER_STATE_OFF)
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	342	return 0;
				343
				344	counter->state = PERF_COUNTER_STATE_ACTIVE;
				345	counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */
				346	/*
				347	* The new state must be visible before we turn it on in the hardware:
				348	*/
				349	smp_wmb();
				350
				351	if (counter->hw_ops->enable(counter)) {
				352	counter->state = PERF_COUNTER_STATE_INACTIVE;
				353	counter->oncpu = -1;
				354	return -EAGAIN;
				355	}
				356
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	357	if (!is_software_counter(counter))
				358	cpuctx->active_oncpu++;
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	359	ctx->nr_active++;
				360
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	361	if (counter->hw_event.exclusive)
				362	cpuctx->exclusive = 1;
				363
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	364	return 0;
				365	}
				366
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	367	/*
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	368	* Return 1 for a group consisting entirely of software counters,
				369	* 0 if the group contains any hardware counters.
				370	*/
				371	static int is_software_only_group(struct perf_counter *leader)
				372	{
				373	struct perf_counter *counter;
				374
				375	if (!is_software_counter(leader))
				376	return 0;
				377	list_for_each_entry(counter, &leader->sibling_list, list_entry)
				378	if (!is_software_counter(counter))
				379	return 0;
				380	return 1;
				381	}
				382
				383	/*
				384	* Work out whether we can put this counter group on the CPU now.
				385	*/
				386	static int group_can_go_on(struct perf_counter *counter,
				387	struct perf_cpu_context *cpuctx,
				388	int can_add_hw)
				389	{
				390	/*
				391	* Groups consisting entirely of software counters can always go on.
				392	*/
				393	if (is_software_only_group(counter))
				394	return 1;
				395	/*
				396	* If an exclusive group is already on, no other hardware
				397	* counters can go on.
				398	*/
				399	if (cpuctx->exclusive)
				400	return 0;
				401	/*
				402	* If this group is exclusive and there are already
				403	* counters on the CPU, it can't go on.
				404	*/
				405	if (counter->hw_event.exclusive && cpuctx->active_oncpu)
				406	return 0;
				407	/*
				408	* Otherwise, try to add it if all previous groups were able
				409	* to go on.
				410	*/
				411	return can_add_hw;
				412	}
				413
				414	/*
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	415	* Cross CPU call to install and enable a performance counter
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	416	*/
				417	static void __perf_install_in_context(void *info)
				418	{
				419	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
				420	struct perf_counter *counter = info;
				421	struct perf_counter_context *ctx = counter->ctx;
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	422	struct perf_counter *leader = counter->group_leader;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	423	int cpu = smp_processor_id();
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	424	unsigned long flags;
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	425	u64 perf_flags;
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	426	int err;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	427
				428	/*
				429	* If this is a task context, we need to check whether it is
				430	* the current task context of this cpu. If not it has been
				431	* scheduled out before the smp call arrived.
				432	*/
				433	if (ctx->task && cpuctx->task_ctx != ctx)
				434	return;
				435
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	436	curr_rq_lock_irq_save(&flags);
				437	spin_lock(&ctx->lock);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	438
				439	/*
				440	* Protect the list operation against NMI by disabling the
				441	* counters on a global level. NOP for non NMI based counters.
				442	*/
Ingo Molnar	01b2838	2008-12-11 13:45:51 +0100	[diff] [blame]	443	perf_flags = hw_perf_save_disable();
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	444
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	445	list_add_counter(counter, ctx);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	446	ctx->nr_counters++;
Paul Mackerras	c07c99b	2009-02-13 22:10:34 +1100	[diff] [blame^]	447	counter->prev_state = PERF_COUNTER_STATE_OFF;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	448
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	449	/*
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	450	* Don't put the counter on if it is disabled or if
				451	* it is in a group and the group isn't on.
				452	*/
				453	if (counter->state != PERF_COUNTER_STATE_INACTIVE \|\|
				454	(leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE))
				455	goto unlock;
				456
				457	/*
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	458	* An exclusive counter can't go on if there are already active
				459	* hardware counters, and no hardware counter can go on if there
				460	* is already an exclusive counter on.
				461	*/
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	462	if (!group_can_go_on(counter, cpuctx, 1))
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	463	err = -EEXIST;
				464	else
				465	err = counter_sched_in(counter, cpuctx, ctx, cpu);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	466
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	467	if (err) {
				468	/*
				469	* This counter couldn't go on. If it is in a group
				470	* then we have to pull the whole group off.
				471	* If the counter group is pinned then put it in error state.
				472	*/
				473	if (leader != counter)
				474	group_sched_out(leader, cpuctx, ctx);
				475	if (leader->hw_event.pinned)
				476	leader->state = PERF_COUNTER_STATE_ERROR;
				477	}
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	478
				479	if (!err && !ctx->task && cpuctx->max_pertask)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	480	cpuctx->max_pertask--;
				481
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	482	unlock:
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	483	hw_perf_restore(perf_flags);
				484
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	485	spin_unlock(&ctx->lock);
				486	curr_rq_unlock_irq_restore(&flags);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	487	}
				488
				489	/*
				490	* Attach a performance counter to a context
				491	*
				492	* First we add the counter to the list with the hardware enable bit
				493	* in counter->hw_config cleared.
				494	*
				495	* If the counter is attached to a task which is on a CPU we use a smp
				496	* call to enable it in the task context. The task might have been
				497	* scheduled away, but we check this in the smp call again.
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	498	*
				499	* Must be called with ctx->mutex held.
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	500	*/
				501	static void
				502	perf_install_in_context(struct perf_counter_context *ctx,
				503	struct perf_counter *counter,
				504	int cpu)
				505	{
				506	struct task_struct *task = ctx->task;
				507
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	508	if (!task) {
				509	/*
				510	* Per cpu counters are installed via an smp call and
				511	* the install is always sucessful.
				512	*/
				513	smp_call_function_single(cpu, __perf_install_in_context,
				514	counter, 1);
				515	return;
				516	}
				517
				518	counter->task = task;
				519	retry:
				520	task_oncpu_function_call(task, __perf_install_in_context,
				521	counter);
				522
				523	spin_lock_irq(&ctx->lock);
				524	/*
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	525	* we need to retry the smp call.
				526	*/
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	527	if (ctx->is_active && list_empty(&counter->list_entry)) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	528	spin_unlock_irq(&ctx->lock);
				529	goto retry;
				530	}
				531
				532	/*
				533	* The lock prevents that this context is scheduled in so we
				534	* can add the counter safely, if it the call above did not
				535	* succeed.
				536	*/
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	537	if (list_empty(&counter->list_entry)) {
				538	list_add_counter(counter, ctx);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	539	ctx->nr_counters++;
				540	}
				541	spin_unlock_irq(&ctx->lock);
				542	}
				543
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	544	/*
				545	* Cross CPU call to enable a performance counter
				546	*/
				547	static void __perf_counter_enable(void *info)
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	548	{
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	549	struct perf_counter *counter = info;
				550	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
				551	struct perf_counter_context *ctx = counter->ctx;
				552	struct perf_counter *leader = counter->group_leader;
				553	unsigned long flags;
				554	int err;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	555
				556	/*
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	557	* If this is a per-task counter, need to check whether this
				558	* counter's task is the current task on this cpu.
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	559	*/
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	560	if (ctx->task && cpuctx->task_ctx != ctx)
				561	return;
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	562
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	563	curr_rq_lock_irq_save(&flags);
				564	spin_lock(&ctx->lock);
				565
Paul Mackerras	c07c99b	2009-02-13 22:10:34 +1100	[diff] [blame^]	566	counter->prev_state = counter->state;
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	567	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
				568	goto unlock;
				569	counter->state = PERF_COUNTER_STATE_INACTIVE;
				570
				571	/*
				572	* If the counter is in a group and isn't the group leader,
				573	* then don't put it on unless the group is on.
				574	*/
				575	if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)
				576	goto unlock;
				577
				578	if (!group_can_go_on(counter, cpuctx, 1))
				579	err = -EEXIST;
				580	else
				581	err = counter_sched_in(counter, cpuctx, ctx,
				582	smp_processor_id());
				583
				584	if (err) {
				585	/*
				586	* If this counter can't go on and it's part of a
				587	* group, then the whole group has to come off.
				588	*/
				589	if (leader != counter)
				590	group_sched_out(leader, cpuctx, ctx);
				591	if (leader->hw_event.pinned)
				592	leader->state = PERF_COUNTER_STATE_ERROR;
				593	}
				594
				595	unlock:
				596	spin_unlock(&ctx->lock);
				597	curr_rq_unlock_irq_restore(&flags);
				598	}
				599
				600	/*
				601	* Enable a counter.
				602	*/
				603	static void perf_counter_enable(struct perf_counter *counter)
				604	{
				605	struct perf_counter_context *ctx = counter->ctx;
				606	struct task_struct *task = ctx->task;
				607
				608	if (!task) {
				609	/*
				610	* Enable the counter on the cpu that it's on
				611	*/
				612	smp_call_function_single(counter->cpu, __perf_counter_enable,
				613	counter, 1);
				614	return;
				615	}
				616
				617	spin_lock_irq(&ctx->lock);
				618	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
				619	goto out;
				620
				621	/*
				622	* If the counter is in error state, clear that first.
				623	* That way, if we see the counter in error state below, we
				624	* know that it has gone back into error state, as distinct
				625	* from the task having been scheduled away before the
				626	* cross-call arrived.
				627	*/
				628	if (counter->state == PERF_COUNTER_STATE_ERROR)
				629	counter->state = PERF_COUNTER_STATE_OFF;
				630
				631	retry:
				632	spin_unlock_irq(&ctx->lock);
				633	task_oncpu_function_call(task, __perf_counter_enable, counter);
				634
				635	spin_lock_irq(&ctx->lock);
				636
				637	/*
				638	* If the context is active and the counter is still off,
				639	* we need to retry the cross-call.
				640	*/
				641	if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF)
				642	goto retry;
				643
				644	/*
				645	* Since we have the lock this context can't be scheduled
				646	* in, so we can change the state safely.
				647	*/
				648	if (counter->state == PERF_COUNTER_STATE_OFF)
				649	counter->state = PERF_COUNTER_STATE_INACTIVE;
				650	out:
				651	spin_unlock_irq(&ctx->lock);
				652	}
				653
				654	/*
				655	* Enable a counter and all its children.
				656	*/
				657	static void perf_counter_enable_family(struct perf_counter *counter)
				658	{
				659	struct perf_counter *child;
				660
				661	perf_counter_enable(counter);
				662
				663	/*
				664	* Lock the mutex to protect the list of children
				665	*/
				666	mutex_lock(&counter->mutex);
				667	list_for_each_entry(child, &counter->child_list, child_list)
				668	perf_counter_enable(child);
				669	mutex_unlock(&counter->mutex);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	670	}
				671
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	672	void __perf_counter_sched_out(struct perf_counter_context *ctx,
				673	struct perf_cpu_context *cpuctx)
				674	{
				675	struct perf_counter *counter;
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	676	u64 flags;
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	677
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	678	spin_lock(&ctx->lock);
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	679	ctx->is_active = 0;
				680	if (likely(!ctx->nr_counters))
				681	goto out;
				682
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	683	flags = hw_perf_save_disable();
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	684	if (ctx->nr_active) {
				685	list_for_each_entry(counter, &ctx->counter_list, list_entry)
				686	group_sched_out(counter, cpuctx, ctx);
				687	}
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	688	hw_perf_restore(flags);
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	689	out:
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	690	spin_unlock(&ctx->lock);
				691	}
				692
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	693	/*
				694	* Called from scheduler to remove the counters of the current task,
				695	* with interrupts disabled.
				696	*
				697	* We stop each counter and update the counter value in counter->count.
				698	*
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	699	* This does not protect us against NMI, but disable()
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	700	* sets the disabled bit in the control field of counter _before_
				701	* accessing the counter control register. If a NMI hits, then it will
				702	* not restart the counter.
				703	*/
				704	void perf_counter_task_sched_out(struct task_struct *task, int cpu)
				705	{
				706	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
				707	struct perf_counter_context *ctx = &task->perf_counter_ctx;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	708
				709	if (likely(!cpuctx->task_ctx))
				710	return;
				711
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	712	__perf_counter_sched_out(ctx, cpuctx);
				713
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	714	cpuctx->task_ctx = NULL;
				715	}
				716
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	717	static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	718	{
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	719	__perf_counter_sched_out(&cpuctx->ctx, cpuctx);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	720	}
				721
Ingo Molnar	7995888	2008-12-17 08:54:56 +0100	[diff] [blame]	722	static int
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	723	group_sched_in(struct perf_counter *group_counter,
				724	struct perf_cpu_context *cpuctx,
				725	struct perf_counter_context *ctx,
				726	int cpu)
				727	{
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	728	struct perf_counter counter, partial_group;
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	729	int ret;
				730
				731	if (group_counter->state == PERF_COUNTER_STATE_OFF)
				732	return 0;
				733
				734	ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu);
				735	if (ret)
				736	return ret < 0 ? ret : 0;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	737
Paul Mackerras	c07c99b	2009-02-13 22:10:34 +1100	[diff] [blame^]	738	group_counter->prev_state = group_counter->state;
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	739	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
				740	return -EAGAIN;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	741
				742	/*
				743	* Schedule in siblings as one group (if any):
				744	*/
Ingo Molnar	7995888	2008-12-17 08:54:56 +0100	[diff] [blame]	745	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
Paul Mackerras	c07c99b	2009-02-13 22:10:34 +1100	[diff] [blame^]	746	counter->prev_state = counter->state;
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	747	if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
				748	partial_group = counter;
				749	goto group_error;
				750	}
Ingo Molnar	7995888	2008-12-17 08:54:56 +0100	[diff] [blame]	751	}
				752
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	753	return 0;
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	754
				755	group_error:
				756	/*
				757	* Groups can be scheduled in as one unit only, so undo any
				758	* partial group before returning:
				759	*/
				760	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
				761	if (counter == partial_group)
				762	break;
				763	counter_sched_out(counter, cpuctx, ctx);
				764	}
				765	counter_sched_out(group_counter, cpuctx, ctx);
				766
				767	return -EAGAIN;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	768	}
				769
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	770	static void
				771	__perf_counter_sched_in(struct perf_counter_context *ctx,
				772	struct perf_cpu_context *cpuctx, int cpu)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	773	{
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	774	struct perf_counter *counter;
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	775	u64 flags;
Paul Mackerras	dd0e6ba	2009-01-12 15:11:00 +1100	[diff] [blame]	776	int can_add_hw = 1;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	777
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	778	spin_lock(&ctx->lock);
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	779	ctx->is_active = 1;
				780	if (likely(!ctx->nr_counters))
				781	goto out;
				782
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	783	flags = hw_perf_save_disable();
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	784
				785	/*
				786	* First go through the list and put on any pinned groups
				787	* in order to give them the best chance of going on.
				788	*/
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	789	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	790	if (counter->state <= PERF_COUNTER_STATE_OFF \|\|
				791	!counter->hw_event.pinned)
				792	continue;
				793	if (counter->cpu != -1 && counter->cpu != cpu)
				794	continue;
				795
				796	if (group_can_go_on(counter, cpuctx, 1))
				797	group_sched_in(counter, cpuctx, ctx, cpu);
				798
				799	/*
				800	* If this pinned group hasn't been scheduled,
				801	* put it in error state.
				802	*/
				803	if (counter->state == PERF_COUNTER_STATE_INACTIVE)
				804	counter->state = PERF_COUNTER_STATE_ERROR;
				805	}
				806
				807	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
				808	/*
				809	* Ignore counters in OFF or ERROR state, and
				810	* ignore pinned counters since we did them already.
				811	*/
				812	if (counter->state <= PERF_COUNTER_STATE_OFF \|\|
				813	counter->hw_event.pinned)
				814	continue;
				815
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	816	/*
				817	* Listen to the 'cpu' scheduling filter constraint
				818	* of counters:
				819	*/
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	820	if (counter->cpu != -1 && counter->cpu != cpu)
				821	continue;
				822
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	823	if (group_can_go_on(counter, cpuctx, can_add_hw)) {
Paul Mackerras	dd0e6ba	2009-01-12 15:11:00 +1100	[diff] [blame]	824	if (group_sched_in(counter, cpuctx, ctx, cpu))
				825	can_add_hw = 0;
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	826	}
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	827	}
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	828	hw_perf_restore(flags);
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	829	out:
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	830	spin_unlock(&ctx->lock);
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	831	}
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	832
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	833	/*
				834	* Called from scheduler to add the counters of the current task
				835	* with interrupts disabled.
				836	*
				837	* We restore the counter value and then enable it.
				838	*
				839	* This does not protect us against NMI, but enable()
				840	* sets the enabled bit in the control field of counter _before_
				841	* accessing the counter control register. If a NMI hits, then it will
				842	* keep the counter running.
				843	*/
				844	void perf_counter_task_sched_in(struct task_struct *task, int cpu)
				845	{
				846	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
				847	struct perf_counter_context *ctx = &task->perf_counter_ctx;
				848
				849	__perf_counter_sched_in(ctx, cpuctx, cpu);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	850	cpuctx->task_ctx = ctx;
				851	}
				852
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	853	static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
				854	{
				855	struct perf_counter_context *ctx = &cpuctx->ctx;
				856
				857	__perf_counter_sched_in(ctx, cpuctx, cpu);
				858	}
				859
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	860	int perf_counter_task_disable(void)
				861	{
				862	struct task_struct *curr = current;
				863	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
				864	struct perf_counter *counter;
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	865	unsigned long flags;
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	866	u64 perf_flags;
				867	int cpu;
				868
				869	if (likely(!ctx->nr_counters))
				870	return 0;
				871
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	872	curr_rq_lock_irq_save(&flags);
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	873	cpu = smp_processor_id();
				874
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	875	/* force the update of the task clock: */
				876	__task_delta_exec(curr, 1);
				877
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	878	perf_counter_task_sched_out(curr, cpu);
				879
				880	spin_lock(&ctx->lock);
				881
				882	/*
				883	* Disable all the counters:
				884	*/
				885	perf_flags = hw_perf_save_disable();
				886
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	887	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
				888	if (counter->state != PERF_COUNTER_STATE_ERROR)
				889	counter->state = PERF_COUNTER_STATE_OFF;
				890	}
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	891
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	892	hw_perf_restore(perf_flags);
				893
				894	spin_unlock(&ctx->lock);
				895
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	896	curr_rq_unlock_irq_restore(&flags);
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	897
				898	return 0;
				899	}
				900
				901	int perf_counter_task_enable(void)
				902	{
				903	struct task_struct *curr = current;
				904	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
				905	struct perf_counter *counter;
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	906	unsigned long flags;
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	907	u64 perf_flags;
				908	int cpu;
				909
				910	if (likely(!ctx->nr_counters))
				911	return 0;
				912
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	913	curr_rq_lock_irq_save(&flags);
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	914	cpu = smp_processor_id();
				915
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	916	/* force the update of the task clock: */
				917	__task_delta_exec(curr, 1);
				918
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	919	perf_counter_task_sched_out(curr, cpu);
				920
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	921	spin_lock(&ctx->lock);
				922
				923	/*
				924	* Disable all the counters:
				925	*/
				926	perf_flags = hw_perf_save_disable();
				927
				928	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	929	if (counter->state > PERF_COUNTER_STATE_OFF)
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	930	continue;
Ingo Molnar	6a93070	2008-12-11 15:17:03 +0100	[diff] [blame]	931	counter->state = PERF_COUNTER_STATE_INACTIVE;
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	932	counter->hw_event.disabled = 0;
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	933	}
				934	hw_perf_restore(perf_flags);
				935
				936	spin_unlock(&ctx->lock);
				937
				938	perf_counter_task_sched_in(curr, cpu);
				939
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	940	curr_rq_unlock_irq_restore(&flags);
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	941
				942	return 0;
				943	}
				944
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	945	/*
				946	* Round-robin a context's counters:
				947	*/
				948	static void rotate_ctx(struct perf_counter_context *ctx)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	949	{
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	950	struct perf_counter *counter;
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	951	u64 perf_flags;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	952
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	953	if (!ctx->nr_counters)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	954	return;
				955
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	956	spin_lock(&ctx->lock);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	957	/*
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	958	* Rotate the first entry last (works just fine for group counters too):
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	959	*/
Ingo Molnar	01b2838	2008-12-11 13:45:51 +0100	[diff] [blame]	960	perf_flags = hw_perf_save_disable();
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	961	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
				962	list_del(&counter->list_entry);
				963	list_add_tail(&counter->list_entry, &ctx->counter_list);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	964	break;
				965	}
Ingo Molnar	01b2838	2008-12-11 13:45:51 +0100	[diff] [blame]	966	hw_perf_restore(perf_flags);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	967
				968	spin_unlock(&ctx->lock);
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	969	}
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	970
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	971	void perf_counter_task_tick(struct task_struct *curr, int cpu)
				972	{
				973	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
				974	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
				975	const int rotate_percpu = 0;
				976
				977	if (rotate_percpu)
				978	perf_counter_cpu_sched_out(cpuctx);
				979	perf_counter_task_sched_out(curr, cpu);
				980
				981	if (rotate_percpu)
				982	rotate_ctx(&cpuctx->ctx);
				983	rotate_ctx(ctx);
				984
				985	if (rotate_percpu)
				986	perf_counter_cpu_sched_in(cpuctx, cpu);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	987	perf_counter_task_sched_in(curr, cpu);
				988	}
				989
				990	/*
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	991	* Cross CPU call to read the hardware counter
				992	*/
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	993	static void __read(void *info)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	994	{
Ingo Molnar	621a01e	2008-12-11 12:46:46 +0100	[diff] [blame]	995	struct perf_counter *counter = info;
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	996	unsigned long flags;
Ingo Molnar	621a01e	2008-12-11 12:46:46 +0100	[diff] [blame]	997
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	998	curr_rq_lock_irq_save(&flags);
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	999	counter->hw_ops->read(counter);
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	1000	curr_rq_unlock_irq_restore(&flags);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1001	}
				1002
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1003	static u64 perf_counter_read(struct perf_counter *counter)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1004	{
				1005	/*
				1006	* If counter is enabled and currently active on a CPU, update the
				1007	* value in the counter structure:
				1008	*/
Ingo Molnar	6a93070	2008-12-11 15:17:03 +0100	[diff] [blame]	1009	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1010	smp_call_function_single(counter->oncpu,
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	1011	__read, counter, 1);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1012	}
				1013
Ingo Molnar	ee06094	2008-12-13 09:00:03 +0100	[diff] [blame]	1014	return atomic64_read(&counter->count);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1015	}
				1016
				1017	/*
				1018	* Cross CPU call to switch performance data pointers
				1019	*/
				1020	static void __perf_switch_irq_data(void *info)
				1021	{
				1022	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
				1023	struct perf_counter *counter = info;
				1024	struct perf_counter_context *ctx = counter->ctx;
				1025	struct perf_data *oldirqdata = counter->irqdata;
				1026
				1027	/*
				1028	* If this is a task context, we need to check whether it is
				1029	* the current task context of this cpu. If not it has been
				1030	* scheduled out before the smp call arrived.
				1031	*/
				1032	if (ctx->task) {
				1033	if (cpuctx->task_ctx != ctx)
				1034	return;
				1035	spin_lock(&ctx->lock);
				1036	}
				1037
				1038	/* Change the pointer NMI safe */
				1039	atomic_long_set((atomic_long_t *)&counter->irqdata,
				1040	(unsigned long) counter->usrdata);
				1041	counter->usrdata = oldirqdata;
				1042
				1043	if (ctx->task)
				1044	spin_unlock(&ctx->lock);
				1045	}
				1046
				1047	static struct perf_data perf_switch_irq_data(struct perf_counter counter)
				1048	{
				1049	struct perf_counter_context *ctx = counter->ctx;
				1050	struct perf_data *oldirqdata = counter->irqdata;
				1051	struct task_struct *task = ctx->task;
				1052
				1053	if (!task) {
				1054	smp_call_function_single(counter->cpu,
				1055	__perf_switch_irq_data,
				1056	counter, 1);
				1057	return counter->usrdata;
				1058	}
				1059
				1060	retry:
				1061	spin_lock_irq(&ctx->lock);
Ingo Molnar	6a93070	2008-12-11 15:17:03 +0100	[diff] [blame]	1062	if (counter->state != PERF_COUNTER_STATE_ACTIVE) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1063	counter->irqdata = counter->usrdata;
				1064	counter->usrdata = oldirqdata;
				1065	spin_unlock_irq(&ctx->lock);
				1066	return oldirqdata;
				1067	}
				1068	spin_unlock_irq(&ctx->lock);
				1069	task_oncpu_function_call(task, __perf_switch_irq_data, counter);
				1070	/* Might have failed, because task was scheduled out */
				1071	if (counter->irqdata == oldirqdata)
				1072	goto retry;
				1073
				1074	return counter->usrdata;
				1075	}
				1076
				1077	static void put_context(struct perf_counter_context *ctx)
				1078	{
				1079	if (ctx->task)
				1080	put_task_struct(ctx->task);
				1081	}
				1082
				1083	static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
				1084	{
				1085	struct perf_cpu_context *cpuctx;
				1086	struct perf_counter_context *ctx;
				1087	struct task_struct *task;
				1088
				1089	/*
				1090	* If cpu is not a wildcard then this is a percpu counter:
				1091	*/
				1092	if (cpu != -1) {
				1093	/* Must be root to operate on a CPU counter: */
				1094	if (!capable(CAP_SYS_ADMIN))
				1095	return ERR_PTR(-EACCES);
				1096
				1097	if (cpu < 0 \|\| cpu > num_possible_cpus())
				1098	return ERR_PTR(-EINVAL);
				1099
				1100	/*
				1101	* We could be clever and allow to attach a counter to an
				1102	* offline CPU and activate it when the CPU comes up, but
				1103	* that's for later.
				1104	*/
				1105	if (!cpu_isset(cpu, cpu_online_map))
				1106	return ERR_PTR(-ENODEV);
				1107
				1108	cpuctx = &per_cpu(perf_cpu_context, cpu);
				1109	ctx = &cpuctx->ctx;
				1110
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1111	return ctx;
				1112	}
				1113
				1114	rcu_read_lock();
				1115	if (!pid)
				1116	task = current;
				1117	else
				1118	task = find_task_by_vpid(pid);
				1119	if (task)
				1120	get_task_struct(task);
				1121	rcu_read_unlock();
				1122
				1123	if (!task)
				1124	return ERR_PTR(-ESRCH);
				1125
				1126	ctx = &task->perf_counter_ctx;
				1127	ctx->task = task;
				1128
				1129	/* Reuse ptrace permission checks for now. */
				1130	if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
				1131	put_context(ctx);
				1132	return ERR_PTR(-EACCES);
				1133	}
				1134
				1135	return ctx;
				1136	}
				1137
				1138	/*
				1139	* Called when the last reference to the file is gone.
				1140	*/
				1141	static int perf_release(struct inode inode, struct file file)
				1142	{
				1143	struct perf_counter *counter = file->private_data;
				1144	struct perf_counter_context *ctx = counter->ctx;
				1145
				1146	file->private_data = NULL;
				1147
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1148	mutex_lock(&ctx->mutex);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1149	mutex_lock(&counter->mutex);
				1150
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1151	perf_counter_remove_from_context(counter);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1152
				1153	mutex_unlock(&counter->mutex);
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1154	mutex_unlock(&ctx->mutex);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1155
				1156	kfree(counter);
Mike Galbraith	5af7591	2009-02-11 10:53:37 +0100	[diff] [blame]	1157	put_context(ctx);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1158
				1159	return 0;
				1160	}
				1161
				1162	/*
				1163	* Read the performance counter - simple non blocking version for now
				1164	*/
				1165	static ssize_t
				1166	perf_read_hw(struct perf_counter counter, char __user buf, size_t count)
				1167	{
				1168	u64 cntval;
				1169
				1170	if (count != sizeof(cntval))
				1171	return -EINVAL;
				1172
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	1173	/*
				1174	* Return end-of-file for a read on a counter that is in
				1175	* error state (i.e. because it was pinned but it couldn't be
				1176	* scheduled on to the CPU at some point).
				1177	*/
				1178	if (counter->state == PERF_COUNTER_STATE_ERROR)
				1179	return 0;
				1180
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1181	mutex_lock(&counter->mutex);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1182	cntval = perf_counter_read(counter);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1183	mutex_unlock(&counter->mutex);
				1184
				1185	return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval);
				1186	}
				1187
				1188	static ssize_t
				1189	perf_copy_usrdata(struct perf_data usrdata, char __user buf, size_t count)
				1190	{
				1191	if (!usrdata->len)
				1192	return 0;
				1193
				1194	count = min(count, (size_t)usrdata->len);
				1195	if (copy_to_user(buf, usrdata->data + usrdata->rd_idx, count))
				1196	return -EFAULT;
				1197
				1198	/* Adjust the counters */
				1199	usrdata->len -= count;
				1200	if (!usrdata->len)
				1201	usrdata->rd_idx = 0;
				1202	else
				1203	usrdata->rd_idx += count;
				1204
				1205	return count;
				1206	}
				1207
				1208	static ssize_t
				1209	perf_read_irq_data(struct perf_counter *counter,
				1210	char __user *buf,
				1211	size_t count,
				1212	int nonblocking)
				1213	{
				1214	struct perf_data irqdata, usrdata;
				1215	DECLARE_WAITQUEUE(wait, current);
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	1216	ssize_t res, res2;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1217
				1218	irqdata = counter->irqdata;
				1219	usrdata = counter->usrdata;
				1220
				1221	if (usrdata->len + irqdata->len >= count)
				1222	goto read_pending;
				1223
				1224	if (nonblocking)
				1225	return -EAGAIN;
				1226
				1227	spin_lock_irq(&counter->waitq.lock);
				1228	__add_wait_queue(&counter->waitq, &wait);
				1229	for (;;) {
				1230	set_current_state(TASK_INTERRUPTIBLE);
				1231	if (usrdata->len + irqdata->len >= count)
				1232	break;
				1233
				1234	if (signal_pending(current))
				1235	break;
				1236
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	1237	if (counter->state == PERF_COUNTER_STATE_ERROR)
				1238	break;
				1239
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1240	spin_unlock_irq(&counter->waitq.lock);
				1241	schedule();
				1242	spin_lock_irq(&counter->waitq.lock);
				1243	}
				1244	__remove_wait_queue(&counter->waitq, &wait);
				1245	__set_current_state(TASK_RUNNING);
				1246	spin_unlock_irq(&counter->waitq.lock);
				1247
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	1248	if (usrdata->len + irqdata->len < count &&
				1249	counter->state != PERF_COUNTER_STATE_ERROR)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1250	return -ERESTARTSYS;
				1251	read_pending:
				1252	mutex_lock(&counter->mutex);
				1253
				1254	/* Drain pending data first: */
				1255	res = perf_copy_usrdata(usrdata, buf, count);
				1256	if (res < 0 \|\| res == count)
				1257	goto out;
				1258
				1259	/* Switch irq buffer: */
				1260	usrdata = perf_switch_irq_data(counter);
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	1261	res2 = perf_copy_usrdata(usrdata, buf + res, count - res);
				1262	if (res2 < 0) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1263	if (!res)
				1264	res = -EFAULT;
				1265	} else {
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	1266	res += res2;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1267	}
				1268	out:
				1269	mutex_unlock(&counter->mutex);
				1270
				1271	return res;
				1272	}
				1273
				1274	static ssize_t
				1275	perf_read(struct file file, char __user buf, size_t count, loff_t *ppos)
				1276	{
				1277	struct perf_counter *counter = file->private_data;
				1278
Ingo Molnar	9f66a38	2008-12-10 12:33:23 +0100	[diff] [blame]	1279	switch (counter->hw_event.record_type) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1280	case PERF_RECORD_SIMPLE:
				1281	return perf_read_hw(counter, buf, count);
				1282
				1283	case PERF_RECORD_IRQ:
				1284	case PERF_RECORD_GROUP:
				1285	return perf_read_irq_data(counter, buf, count,
				1286	file->f_flags & O_NONBLOCK);
				1287	}
				1288	return -EINVAL;
				1289	}
				1290
				1291	static unsigned int perf_poll(struct file file, poll_table wait)
				1292	{
				1293	struct perf_counter *counter = file->private_data;
				1294	unsigned int events = 0;
				1295	unsigned long flags;
				1296
				1297	poll_wait(file, &counter->waitq, wait);
				1298
				1299	spin_lock_irqsave(&counter->waitq.lock, flags);
				1300	if (counter->usrdata->len \|\| counter->irqdata->len)
				1301	events \|= POLLIN;
				1302	spin_unlock_irqrestore(&counter->waitq.lock, flags);
				1303
				1304	return events;
				1305	}
				1306
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1307	static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
				1308	{
				1309	struct perf_counter *counter = file->private_data;
				1310	int err = 0;
				1311
				1312	switch (cmd) {
				1313	case PERF_COUNTER_IOC_ENABLE:
				1314	perf_counter_enable_family(counter);
				1315	break;
				1316	case PERF_COUNTER_IOC_DISABLE:
				1317	perf_counter_disable_family(counter);
				1318	break;
				1319	default:
				1320	err = -ENOTTY;
				1321	}
				1322	return err;
				1323	}
				1324
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1325	static const struct file_operations perf_fops = {
				1326	.release = perf_release,
				1327	.read = perf_read,
				1328	.poll = perf_poll,
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1329	.unlocked_ioctl = perf_ioctl,
				1330	.compat_ioctl = perf_ioctl,
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1331	};
				1332
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1333	static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1334	{
Paul Mackerras	9abf8a0	2009-01-09 16:26:43 +1100	[diff] [blame]	1335	int cpu = raw_smp_processor_id();
				1336
				1337	atomic64_set(&counter->hw.prev_count, cpu_clock(cpu));
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1338	return 0;
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1339	}
				1340
Paul Mackerras	9abf8a0	2009-01-09 16:26:43 +1100	[diff] [blame]	1341	static void cpu_clock_perf_counter_update(struct perf_counter *counter)
				1342	{
				1343	int cpu = raw_smp_processor_id();
				1344	s64 prev;
				1345	u64 now;
				1346
				1347	now = cpu_clock(cpu);
				1348	prev = atomic64_read(&counter->hw.prev_count);
				1349	atomic64_set(&counter->hw.prev_count, now);
				1350	atomic64_add(now - prev, &counter->count);
				1351	}
				1352
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1353	static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
				1354	{
Paul Mackerras	9abf8a0	2009-01-09 16:26:43 +1100	[diff] [blame]	1355	cpu_clock_perf_counter_update(counter);
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1356	}
				1357
				1358	static void cpu_clock_perf_counter_read(struct perf_counter *counter)
				1359	{
Paul Mackerras	9abf8a0	2009-01-09 16:26:43 +1100	[diff] [blame]	1360	cpu_clock_perf_counter_update(counter);
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1361	}
				1362
				1363	static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	1364	.enable = cpu_clock_perf_counter_enable,
				1365	.disable = cpu_clock_perf_counter_disable,
				1366	.read = cpu_clock_perf_counter_read,
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1367	};
				1368
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	1369	/*
				1370	* Called from within the scheduler:
				1371	*/
				1372	static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update)
Ingo Molnar	bae43c9	2008-12-11 14:03:20 +0100	[diff] [blame]	1373	{
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	1374	struct task_struct *curr = counter->task;
				1375	u64 delta;
				1376
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	1377	delta = __task_delta_exec(curr, update);
				1378
				1379	return curr->se.sum_exec_runtime + delta;
				1380	}
				1381
				1382	static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now)
				1383	{
				1384	u64 prev;
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1385	s64 delta;
Ingo Molnar	bae43c9	2008-12-11 14:03:20 +0100	[diff] [blame]	1386
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1387	prev = atomic64_read(&counter->hw.prev_count);
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1388
				1389	atomic64_set(&counter->hw.prev_count, now);
				1390
				1391	delta = now - prev;
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1392
				1393	atomic64_add(delta, &counter->count);
Ingo Molnar	bae43c9	2008-12-11 14:03:20 +0100	[diff] [blame]	1394	}
				1395
				1396	static void task_clock_perf_counter_read(struct perf_counter *counter)
				1397	{
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	1398	u64 now = task_clock_perf_counter_val(counter, 1);
				1399
				1400	task_clock_perf_counter_update(counter, now);
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1401	}
				1402
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1403	static int task_clock_perf_counter_enable(struct perf_counter *counter)
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1404	{
Paul Mackerras	c07c99b	2009-02-13 22:10:34 +1100	[diff] [blame^]	1405	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
				1406	atomic64_set(&counter->hw.prev_count,
				1407	task_clock_perf_counter_val(counter, 0));
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1408
				1409	return 0;
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1410	}
				1411
				1412	static void task_clock_perf_counter_disable(struct perf_counter *counter)
				1413	{
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	1414	u64 now = task_clock_perf_counter_val(counter, 0);
				1415
				1416	task_clock_perf_counter_update(counter, now);
Ingo Molnar	bae43c9	2008-12-11 14:03:20 +0100	[diff] [blame]	1417	}
				1418
				1419	static const struct hw_perf_counter_ops perf_ops_task_clock = {
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	1420	.enable = task_clock_perf_counter_enable,
				1421	.disable = task_clock_perf_counter_disable,
				1422	.read = task_clock_perf_counter_read,
Ingo Molnar	bae43c9	2008-12-11 14:03:20 +0100	[diff] [blame]	1423	};
				1424
Paul Mackerras	23a185c	2009-02-09 22:42:47 +1100	[diff] [blame]	1425	#ifdef CONFIG_VM_EVENT_COUNTERS
				1426	#define cpu_page_faults() __get_cpu_var(vm_event_states).event[PGFAULT]
				1427	#else
				1428	#define cpu_page_faults() 0
				1429	#endif
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1430
Paul Mackerras	23a185c	2009-02-09 22:42:47 +1100	[diff] [blame]	1431	static u64 get_page_faults(struct perf_counter *counter)
				1432	{
				1433	struct task_struct *curr = counter->ctx->task;
				1434
				1435	if (curr)
				1436	return curr->maj_flt + curr->min_flt;
				1437	return cpu_page_faults();
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1438	}
				1439
				1440	static void page_faults_perf_counter_update(struct perf_counter *counter)
				1441	{
				1442	u64 prev, now;
				1443	s64 delta;
				1444
				1445	prev = atomic64_read(&counter->hw.prev_count);
Paul Mackerras	23a185c	2009-02-09 22:42:47 +1100	[diff] [blame]	1446	now = get_page_faults(counter);
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1447
				1448	atomic64_set(&counter->hw.prev_count, now);
				1449
				1450	delta = now - prev;
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1451
				1452	atomic64_add(delta, &counter->count);
				1453	}
				1454
				1455	static void page_faults_perf_counter_read(struct perf_counter *counter)
				1456	{
				1457	page_faults_perf_counter_update(counter);
				1458	}
				1459
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1460	static int page_faults_perf_counter_enable(struct perf_counter *counter)
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1461	{
Paul Mackerras	c07c99b	2009-02-13 22:10:34 +1100	[diff] [blame^]	1462	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
				1463	atomic64_set(&counter->hw.prev_count, get_page_faults(counter));
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1464	return 0;
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1465	}
				1466
				1467	static void page_faults_perf_counter_disable(struct perf_counter *counter)
				1468	{
				1469	page_faults_perf_counter_update(counter);
				1470	}
				1471
				1472	static const struct hw_perf_counter_ops perf_ops_page_faults = {
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	1473	.enable = page_faults_perf_counter_enable,
				1474	.disable = page_faults_perf_counter_disable,
				1475	.read = page_faults_perf_counter_read,
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1476	};
				1477
Paul Mackerras	23a185c	2009-02-09 22:42:47 +1100	[diff] [blame]	1478	static u64 get_context_switches(struct perf_counter *counter)
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1479	{
Paul Mackerras	23a185c	2009-02-09 22:42:47 +1100	[diff] [blame]	1480	struct task_struct *curr = counter->ctx->task;
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1481
Paul Mackerras	23a185c	2009-02-09 22:42:47 +1100	[diff] [blame]	1482	if (curr)
				1483	return curr->nvcsw + curr->nivcsw;
				1484	return cpu_nr_switches(smp_processor_id());
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1485	}
				1486
				1487	static void context_switches_perf_counter_update(struct perf_counter *counter)
				1488	{
				1489	u64 prev, now;
				1490	s64 delta;
				1491
				1492	prev = atomic64_read(&counter->hw.prev_count);
Paul Mackerras	23a185c	2009-02-09 22:42:47 +1100	[diff] [blame]	1493	now = get_context_switches(counter);
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1494
				1495	atomic64_set(&counter->hw.prev_count, now);
				1496
				1497	delta = now - prev;
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1498
				1499	atomic64_add(delta, &counter->count);
				1500	}
				1501
				1502	static void context_switches_perf_counter_read(struct perf_counter *counter)
				1503	{
				1504	context_switches_perf_counter_update(counter);
				1505	}
				1506
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1507	static int context_switches_perf_counter_enable(struct perf_counter *counter)
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1508	{
Paul Mackerras	c07c99b	2009-02-13 22:10:34 +1100	[diff] [blame^]	1509	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
				1510	atomic64_set(&counter->hw.prev_count,
				1511	get_context_switches(counter));
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1512	return 0;
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1513	}
				1514
				1515	static void context_switches_perf_counter_disable(struct perf_counter *counter)
				1516	{
				1517	context_switches_perf_counter_update(counter);
				1518	}
				1519
				1520	static const struct hw_perf_counter_ops perf_ops_context_switches = {
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	1521	.enable = context_switches_perf_counter_enable,
				1522	.disable = context_switches_perf_counter_disable,
				1523	.read = context_switches_perf_counter_read,
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1524	};
				1525
Paul Mackerras	23a185c	2009-02-09 22:42:47 +1100	[diff] [blame]	1526	static inline u64 get_cpu_migrations(struct perf_counter *counter)
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1527	{
Paul Mackerras	23a185c	2009-02-09 22:42:47 +1100	[diff] [blame]	1528	struct task_struct *curr = counter->ctx->task;
				1529
				1530	if (curr)
				1531	return curr->se.nr_migrations;
				1532	return cpu_nr_migrations(smp_processor_id());
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1533	}
				1534
				1535	static void cpu_migrations_perf_counter_update(struct perf_counter *counter)
				1536	{
				1537	u64 prev, now;
				1538	s64 delta;
				1539
				1540	prev = atomic64_read(&counter->hw.prev_count);
Paul Mackerras	23a185c	2009-02-09 22:42:47 +1100	[diff] [blame]	1541	now = get_cpu_migrations(counter);
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1542
				1543	atomic64_set(&counter->hw.prev_count, now);
				1544
				1545	delta = now - prev;
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1546
				1547	atomic64_add(delta, &counter->count);
				1548	}
				1549
				1550	static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
				1551	{
				1552	cpu_migrations_perf_counter_update(counter);
				1553	}
				1554
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1555	static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1556	{
Paul Mackerras	c07c99b	2009-02-13 22:10:34 +1100	[diff] [blame^]	1557	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
				1558	atomic64_set(&counter->hw.prev_count,
				1559	get_cpu_migrations(counter));
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1560	return 0;
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1561	}
				1562
				1563	static void cpu_migrations_perf_counter_disable(struct perf_counter *counter)
				1564	{
				1565	cpu_migrations_perf_counter_update(counter);
				1566	}
				1567
				1568	static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	1569	.enable = cpu_migrations_perf_counter_enable,
				1570	.disable = cpu_migrations_perf_counter_disable,
				1571	.read = cpu_migrations_perf_counter_read,
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1572	};
				1573
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1574	static const struct hw_perf_counter_ops *
				1575	sw_perf_counter_init(struct perf_counter *counter)
				1576	{
				1577	const struct hw_perf_counter_ops *hw_ops = NULL;
				1578
Paul Mackerras	0475f9e	2009-02-11 14:35:35 +1100	[diff] [blame]	1579	/*
				1580	* Software counters (currently) can't in general distinguish
				1581	* between user, kernel and hypervisor events.
				1582	* However, context switches and cpu migrations are considered
				1583	* to be kernel events, and page faults are never hypervisor
				1584	* events.
				1585	*/
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1586	switch (counter->hw_event.type) {
				1587	case PERF_COUNT_CPU_CLOCK:
Paul Mackerras	0475f9e	2009-02-11 14:35:35 +1100	[diff] [blame]	1588	if (!(counter->hw_event.exclude_user \|\|
				1589	counter->hw_event.exclude_kernel \|\|
				1590	counter->hw_event.exclude_hv))
				1591	hw_ops = &perf_ops_cpu_clock;
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1592	break;
Ingo Molnar	bae43c9	2008-12-11 14:03:20 +0100	[diff] [blame]	1593	case PERF_COUNT_TASK_CLOCK:
Paul Mackerras	0475f9e	2009-02-11 14:35:35 +1100	[diff] [blame]	1594	if (counter->hw_event.exclude_user \|\|
				1595	counter->hw_event.exclude_kernel \|\|
				1596	counter->hw_event.exclude_hv)
				1597	break;
Paul Mackerras	23a185c	2009-02-09 22:42:47 +1100	[diff] [blame]	1598	/*
				1599	* If the user instantiates this as a per-cpu counter,
				1600	* use the cpu_clock counter instead.
				1601	*/
				1602	if (counter->ctx->task)
				1603	hw_ops = &perf_ops_task_clock;
				1604	else
				1605	hw_ops = &perf_ops_cpu_clock;
Ingo Molnar	bae43c9	2008-12-11 14:03:20 +0100	[diff] [blame]	1606	break;
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1607	case PERF_COUNT_PAGE_FAULTS:
Paul Mackerras	0475f9e	2009-02-11 14:35:35 +1100	[diff] [blame]	1608	if (!(counter->hw_event.exclude_user \|\|
				1609	counter->hw_event.exclude_kernel))
				1610	hw_ops = &perf_ops_page_faults;
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1611	break;
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1612	case PERF_COUNT_CONTEXT_SWITCHES:
Paul Mackerras	0475f9e	2009-02-11 14:35:35 +1100	[diff] [blame]	1613	if (!counter->hw_event.exclude_kernel)
				1614	hw_ops = &perf_ops_context_switches;
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1615	break;
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1616	case PERF_COUNT_CPU_MIGRATIONS:
Paul Mackerras	0475f9e	2009-02-11 14:35:35 +1100	[diff] [blame]	1617	if (!counter->hw_event.exclude_kernel)
				1618	hw_ops = &perf_ops_cpu_migrations;
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1619	break;
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1620	default:
				1621	break;
				1622	}
				1623	return hw_ops;
				1624	}
				1625
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1626	/*
				1627	* Allocate and initialize a counter structure
				1628	*/
				1629	static struct perf_counter *
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1630	perf_counter_alloc(struct perf_counter_hw_event *hw_event,
				1631	int cpu,
Paul Mackerras	23a185c	2009-02-09 22:42:47 +1100	[diff] [blame]	1632	struct perf_counter_context *ctx,
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1633	struct perf_counter *group_leader,
				1634	gfp_t gfpflags)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1635	{
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1636	const struct hw_perf_counter_ops *hw_ops;
Ingo Molnar	621a01e	2008-12-11 12:46:46 +0100	[diff] [blame]	1637	struct perf_counter *counter;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1638
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1639	counter = kzalloc(sizeof(*counter), gfpflags);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1640	if (!counter)
				1641	return NULL;
				1642
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1643	/*
				1644	* Single counters are their own group leaders, with an
				1645	* empty sibling list:
				1646	*/
				1647	if (!group_leader)
				1648	group_leader = counter;
				1649
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1650	mutex_init(&counter->mutex);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1651	INIT_LIST_HEAD(&counter->list_entry);
				1652	INIT_LIST_HEAD(&counter->sibling_list);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1653	init_waitqueue_head(&counter->waitq);
				1654
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1655	INIT_LIST_HEAD(&counter->child_list);
				1656
Ingo Molnar	9f66a38	2008-12-10 12:33:23 +0100	[diff] [blame]	1657	counter->irqdata = &counter->data[0];
				1658	counter->usrdata = &counter->data[1];
				1659	counter->cpu = cpu;
				1660	counter->hw_event = *hw_event;
				1661	counter->wakeup_pending = 0;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1662	counter->group_leader = group_leader;
Ingo Molnar	621a01e	2008-12-11 12:46:46 +0100	[diff] [blame]	1663	counter->hw_ops = NULL;
Paul Mackerras	23a185c	2009-02-09 22:42:47 +1100	[diff] [blame]	1664	counter->ctx = ctx;
Ingo Molnar	621a01e	2008-12-11 12:46:46 +0100	[diff] [blame]	1665
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	1666	counter->state = PERF_COUNTER_STATE_INACTIVE;
Ingo Molnar	a86ed50	2008-12-17 00:43:10 +0100	[diff] [blame]	1667	if (hw_event->disabled)
				1668	counter->state = PERF_COUNTER_STATE_OFF;
				1669
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1670	hw_ops = NULL;
				1671	if (!hw_event->raw && hw_event->type < 0)
				1672	hw_ops = sw_perf_counter_init(counter);
Paul Mackerras	23a185c	2009-02-09 22:42:47 +1100	[diff] [blame]	1673	else
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1674	hw_ops = hw_perf_counter_init(counter);
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1675
Ingo Molnar	621a01e	2008-12-11 12:46:46 +0100	[diff] [blame]	1676	if (!hw_ops) {
				1677	kfree(counter);
				1678	return NULL;
				1679	}
				1680	counter->hw_ops = hw_ops;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1681
				1682	return counter;
				1683	}
				1684
				1685	/**
Ingo Molnar	9f66a38	2008-12-10 12:33:23 +0100	[diff] [blame]	1686	* sys_perf_task_open - open a performance counter, associate it to a task/cpu
				1687	*
				1688	* @hw_event_uptr: event type attributes for monitoring/sampling
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1689	* @pid: target pid
Ingo Molnar	9f66a38	2008-12-10 12:33:23 +0100	[diff] [blame]	1690	* @cpu: target cpu
				1691	* @group_fd: group leader counter fd
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1692	*/
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	1693	asmlinkage int
				1694	sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
				1695	pid_t pid, int cpu, int group_fd)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1696	{
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1697	struct perf_counter counter, group_leader;
Ingo Molnar	9f66a38	2008-12-10 12:33:23 +0100	[diff] [blame]	1698	struct perf_counter_hw_event hw_event;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1699	struct perf_counter_context *ctx;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1700	struct file *counter_file = NULL;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1701	struct file *group_file = NULL;
				1702	int fput_needed = 0;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1703	int fput_needed2 = 0;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1704	int ret;
				1705
Ingo Molnar	9f66a38	2008-12-10 12:33:23 +0100	[diff] [blame]	1706	if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
Thomas Gleixner	eab656a	2008-12-08 19:26:59 +0100	[diff] [blame]	1707	return -EFAULT;
				1708
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1709	/*
Ingo Molnar	ccff286	2008-12-11 11:26:29 +0100	[diff] [blame]	1710	* Get the target context (task or percpu):
				1711	*/
				1712	ctx = find_get_context(pid, cpu);
				1713	if (IS_ERR(ctx))
				1714	return PTR_ERR(ctx);
				1715
				1716	/*
				1717	* Look up the group leader (we will attach this counter to it):
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1718	*/
				1719	group_leader = NULL;
				1720	if (group_fd != -1) {
				1721	ret = -EINVAL;
				1722	group_file = fget_light(group_fd, &fput_needed);
				1723	if (!group_file)
Ingo Molnar	ccff286	2008-12-11 11:26:29 +0100	[diff] [blame]	1724	goto err_put_context;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1725	if (group_file->f_op != &perf_fops)
Ingo Molnar	ccff286	2008-12-11 11:26:29 +0100	[diff] [blame]	1726	goto err_put_context;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1727
				1728	group_leader = group_file->private_data;
				1729	/*
Ingo Molnar	ccff286	2008-12-11 11:26:29 +0100	[diff] [blame]	1730	* Do not allow a recursive hierarchy (this new sibling
				1731	* becoming part of another group-sibling):
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1732	*/
Ingo Molnar	ccff286	2008-12-11 11:26:29 +0100	[diff] [blame]	1733	if (group_leader->group_leader != group_leader)
				1734	goto err_put_context;
				1735	/*
				1736	* Do not allow to attach to a group in a different
				1737	* task or CPU context:
				1738	*/
				1739	if (group_leader->ctx != ctx)
				1740	goto err_put_context;
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	1741	/*
				1742	* Only a group leader can be exclusive or pinned
				1743	*/
				1744	if (hw_event.exclusive \|\| hw_event.pinned)
				1745	goto err_put_context;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1746	}
				1747
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1748	ret = -EINVAL;
Paul Mackerras	23a185c	2009-02-09 22:42:47 +1100	[diff] [blame]	1749	counter = perf_counter_alloc(&hw_event, cpu, ctx, group_leader,
				1750	GFP_KERNEL);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1751	if (!counter)
				1752	goto err_put_context;
				1753
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1754	ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
				1755	if (ret < 0)
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1756	goto err_free_put_context;
				1757
				1758	counter_file = fget_light(ret, &fput_needed2);
				1759	if (!counter_file)
				1760	goto err_free_put_context;
				1761
				1762	counter->filp = counter_file;
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1763	mutex_lock(&ctx->mutex);
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1764	perf_install_in_context(ctx, counter, cpu);
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1765	mutex_unlock(&ctx->mutex);
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1766
				1767	fput_light(counter_file, fput_needed2);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1768
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1769	out_fput:
				1770	fput_light(group_file, fput_needed);
				1771
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1772	return ret;
				1773
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1774	err_free_put_context:
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1775	kfree(counter);
				1776
				1777	err_put_context:
				1778	put_context(ctx);
				1779
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1780	goto out_fput;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1781	}
				1782
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1783	/*
				1784	* Initialize the perf_counter context in a task_struct:
				1785	*/
				1786	static void
				1787	__perf_counter_init_context(struct perf_counter_context *ctx,
				1788	struct task_struct *task)
				1789	{
				1790	memset(ctx, 0, sizeof(*ctx));
				1791	spin_lock_init(&ctx->lock);
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1792	mutex_init(&ctx->mutex);
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1793	INIT_LIST_HEAD(&ctx->counter_list);
				1794	ctx->task = task;
				1795	}
				1796
				1797	/*
				1798	* inherit a counter from parent task to child task:
				1799	*/
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1800	static struct perf_counter *
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1801	inherit_counter(struct perf_counter *parent_counter,
				1802	struct task_struct *parent,
				1803	struct perf_counter_context *parent_ctx,
				1804	struct task_struct *child,
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1805	struct perf_counter *group_leader,
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1806	struct perf_counter_context *child_ctx)
				1807	{
				1808	struct perf_counter *child_counter;
				1809
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1810	/*
				1811	* Instead of creating recursive hierarchies of counters,
				1812	* we link inherited counters back to the original parent,
				1813	* which has a filp for sure, which we use as the reference
				1814	* count:
				1815	*/
				1816	if (parent_counter->parent)
				1817	parent_counter = parent_counter->parent;
				1818
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1819	child_counter = perf_counter_alloc(&parent_counter->hw_event,
Paul Mackerras	23a185c	2009-02-09 22:42:47 +1100	[diff] [blame]	1820	parent_counter->cpu, child_ctx,
				1821	group_leader, GFP_KERNEL);
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1822	if (!child_counter)
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1823	return NULL;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1824
				1825	/*
				1826	* Link it up in the child's context:
				1827	*/
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1828	child_counter->task = child;
				1829	list_add_counter(child_counter, child_ctx);
				1830	child_ctx->nr_counters++;
				1831
				1832	child_counter->parent = parent_counter;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1833	/*
				1834	* inherit into child's child as well:
				1835	*/
				1836	child_counter->hw_event.inherit = 1;
				1837
				1838	/*
				1839	* Get a reference to the parent filp - we will fput it
				1840	* when the child counter exits. This is safe to do because
				1841	* we are in the parent and we know that the filp still
				1842	* exists and has a nonzero count:
				1843	*/
				1844	atomic_long_inc(&parent_counter->filp->f_count);
				1845
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1846	/*
				1847	* Link this into the parent counter's child list
				1848	*/
				1849	mutex_lock(&parent_counter->mutex);
				1850	list_add_tail(&child_counter->child_list, &parent_counter->child_list);
				1851
				1852	/*
				1853	* Make the child state follow the state of the parent counter,
				1854	* not its hw_event.disabled bit. We hold the parent's mutex,
				1855	* so we won't race with perf_counter_{en,dis}able_family.
				1856	*/
				1857	if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
				1858	child_counter->state = PERF_COUNTER_STATE_INACTIVE;
				1859	else
				1860	child_counter->state = PERF_COUNTER_STATE_OFF;
				1861
				1862	mutex_unlock(&parent_counter->mutex);
				1863
				1864	return child_counter;
				1865	}
				1866
				1867	static int inherit_group(struct perf_counter *parent_counter,
				1868	struct task_struct *parent,
				1869	struct perf_counter_context *parent_ctx,
				1870	struct task_struct *child,
				1871	struct perf_counter_context *child_ctx)
				1872	{
				1873	struct perf_counter *leader;
				1874	struct perf_counter *sub;
				1875
				1876	leader = inherit_counter(parent_counter, parent, parent_ctx,
				1877	child, NULL, child_ctx);
				1878	if (!leader)
				1879	return -ENOMEM;
				1880	list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) {
				1881	if (!inherit_counter(sub, parent, parent_ctx,
				1882	child, leader, child_ctx))
				1883	return -ENOMEM;
				1884	}
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1885	return 0;
				1886	}
				1887
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1888	static void sync_child_counter(struct perf_counter *child_counter,
				1889	struct perf_counter *parent_counter)
				1890	{
				1891	u64 parent_val, child_val;
				1892
				1893	parent_val = atomic64_read(&parent_counter->count);
				1894	child_val = atomic64_read(&child_counter->count);
				1895
				1896	/*
				1897	* Add back the child's count to the parent's count:
				1898	*/
				1899	atomic64_add(child_val, &parent_counter->count);
				1900
				1901	/*
				1902	* Remove this counter from the parent's list
				1903	*/
				1904	mutex_lock(&parent_counter->mutex);
				1905	list_del_init(&child_counter->child_list);
				1906	mutex_unlock(&parent_counter->mutex);
				1907
				1908	/*
				1909	* Release the parent counter, if this was the last
				1910	* reference to it.
				1911	*/
				1912	fput(parent_counter->filp);
				1913	}
				1914
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1915	static void
				1916	__perf_counter_exit_task(struct task_struct *child,
				1917	struct perf_counter *child_counter,
				1918	struct perf_counter_context *child_ctx)
				1919	{
				1920	struct perf_counter *parent_counter;
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1921	struct perf_counter sub, tmp;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1922
				1923	/*
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	1924	* If we do not self-reap then we have to wait for the
				1925	* child task to unschedule (it will happen for sure),
				1926	* so that its counter is at its final count. (This
				1927	* condition triggers rarely - child tasks usually get
				1928	* off their CPU before the parent has a chance to
				1929	* get this far into the reaping action)
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1930	*/
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	1931	if (child != current) {
				1932	wait_task_inactive(child, 0);
				1933	list_del_init(&child_counter->list_entry);
				1934	} else {
Ingo Molnar	0cc0c02	2008-12-14 23:20:36 +0100	[diff] [blame]	1935	struct perf_cpu_context *cpuctx;
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	1936	unsigned long flags;
				1937	u64 perf_flags;
				1938
				1939	/*
				1940	* Disable and unlink this counter.
				1941	*
				1942	* Be careful about zapping the list - IRQ/NMI context
				1943	* could still be processing it:
				1944	*/
				1945	curr_rq_lock_irq_save(&flags);
				1946	perf_flags = hw_perf_save_disable();
Ingo Molnar	0cc0c02	2008-12-14 23:20:36 +0100	[diff] [blame]	1947
				1948	cpuctx = &__get_cpu_var(perf_cpu_context);
				1949
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1950	group_sched_out(child_counter, cpuctx, child_ctx);
Ingo Molnar	0cc0c02	2008-12-14 23:20:36 +0100	[diff] [blame]	1951
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	1952	list_del_init(&child_counter->list_entry);
				1953
				1954	child_ctx->nr_counters--;
				1955
				1956	hw_perf_restore(perf_flags);
				1957	curr_rq_unlock_irq_restore(&flags);
Ingo Molnar	0cc0c02	2008-12-14 23:20:36 +0100	[diff] [blame]	1958	}
				1959
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1960	parent_counter = child_counter->parent;
				1961	/*
				1962	* It can happen that parent exits first, and has counters
				1963	* that are still around due to the child reference. These
				1964	* counters need to be zapped - but otherwise linger.
				1965	*/
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1966	if (parent_counter) {
				1967	sync_child_counter(child_counter, parent_counter);
				1968	list_for_each_entry_safe(sub, tmp, &child_counter->sibling_list,
				1969	list_entry) {
Paul Mackerras	4bcf349	2009-02-11 13:53:19 +0100	[diff] [blame]	1970	if (sub->parent) {
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1971	sync_child_counter(sub, sub->parent);
Paul Mackerras	4bcf349	2009-02-11 13:53:19 +0100	[diff] [blame]	1972	kfree(sub);
				1973	}
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1974	}
Mike Galbraith	65d3708	2009-01-29 14:06:52 +0100	[diff] [blame]	1975	kfree(child_counter);
Paul Mackerras	4bcf349	2009-02-11 13:53:19 +0100	[diff] [blame]	1976	}
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1977	}
				1978
				1979	/*
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1980	* When a child task exits, feed back counter values to parent counters.
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1981	*
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	1982	* Note: we may be running in child context, but the PID is not hashed
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1983	* anymore so new counters will not be added.
				1984	*/
				1985	void perf_counter_exit_task(struct task_struct *child)
				1986	{
				1987	struct perf_counter child_counter, tmp;
				1988	struct perf_counter_context *child_ctx;
				1989
				1990	child_ctx = &child->perf_counter_ctx;
				1991
				1992	if (likely(!child_ctx->nr_counters))
				1993	return;
				1994
				1995	list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
				1996	list_entry)
				1997	__perf_counter_exit_task(child, child_counter, child_ctx);
				1998	}
				1999
				2000	/*
				2001	* Initialize the perf_counter context in task_struct
				2002	*/
				2003	void perf_counter_init_task(struct task_struct *child)
				2004	{
				2005	struct perf_counter_context child_ctx, parent_ctx;
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	2006	struct perf_counter *counter;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	2007	struct task_struct *parent = current;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	2008
				2009	child_ctx = &child->perf_counter_ctx;
				2010	parent_ctx = &parent->perf_counter_ctx;
				2011
				2012	__perf_counter_init_context(child_ctx, child);
				2013
				2014	/*
				2015	* This is executed from the parent task context, so inherit
				2016	* counters that have been marked for cloning:
				2017	*/
				2018
				2019	if (likely(!parent_ctx->nr_counters))
				2020	return;
				2021
				2022	/*
				2023	* Lock the parent list. No need to lock the child - not PID
				2024	* hashed yet and not running, so nobody can access it.
				2025	*/
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	2026	mutex_lock(&parent_ctx->mutex);
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	2027
				2028	/*
				2029	* We dont have to disable NMIs - we are only looking at
				2030	* the list, not manipulating it:
				2031	*/
				2032	list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) {
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	2033	if (!counter->hw_event.inherit)
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	2034	continue;
				2035
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	2036	if (inherit_group(counter, parent,
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	2037	parent_ctx, child, child_ctx))
				2038	break;
				2039	}
				2040
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	2041	mutex_unlock(&parent_ctx->mutex);
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	2042	}
				2043
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2044	static void __cpuinit perf_counter_init_cpu(int cpu)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2045	{
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2046	struct perf_cpu_context *cpuctx;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2047
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2048	cpuctx = &per_cpu(perf_cpu_context, cpu);
				2049	__perf_counter_init_context(&cpuctx->ctx, NULL);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2050
				2051	mutex_lock(&perf_resource_mutex);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2052	cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2053	mutex_unlock(&perf_resource_mutex);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2054
Paul Mackerras	01d0287	2009-01-14 13:44:19 +1100	[diff] [blame]	2055	hw_perf_counter_setup(cpu);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2056	}
				2057
				2058	#ifdef CONFIG_HOTPLUG_CPU
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2059	static void __perf_counter_exit_cpu(void *info)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2060	{
				2061	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
				2062	struct perf_counter_context *ctx = &cpuctx->ctx;
				2063	struct perf_counter counter, tmp;
				2064
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2065	list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry)
				2066	__perf_counter_remove_from_context(counter);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2067	}
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2068	static void perf_counter_exit_cpu(int cpu)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2069	{
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	2070	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
				2071	struct perf_counter_context *ctx = &cpuctx->ctx;
				2072
				2073	mutex_lock(&ctx->mutex);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2074	smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1);
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame]	2075	mutex_unlock(&ctx->mutex);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2076	}
				2077	#else
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2078	static inline void perf_counter_exit_cpu(int cpu) { }
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2079	#endif
				2080
				2081	static int __cpuinit
				2082	perf_cpu_notify(struct notifier_block self, unsigned long action, void hcpu)
				2083	{
				2084	unsigned int cpu = (long)hcpu;
				2085
				2086	switch (action) {
				2087
				2088	case CPU_UP_PREPARE:
				2089	case CPU_UP_PREPARE_FROZEN:
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2090	perf_counter_init_cpu(cpu);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2091	break;
				2092
				2093	case CPU_DOWN_PREPARE:
				2094	case CPU_DOWN_PREPARE_FROZEN:
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2095	perf_counter_exit_cpu(cpu);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2096	break;
				2097
				2098	default:
				2099	break;
				2100	}
				2101
				2102	return NOTIFY_OK;
				2103	}
				2104
				2105	static struct notifier_block __cpuinitdata perf_cpu_nb = {
				2106	.notifier_call = perf_cpu_notify,
				2107	};
				2108
				2109	static int __init perf_counter_init(void)
				2110	{
				2111	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
				2112	(void *)(long)smp_processor_id());
				2113	register_cpu_notifier(&perf_cpu_nb);
				2114
				2115	return 0;
				2116	}
				2117	early_initcall(perf_counter_init);
				2118
				2119	static ssize_t perf_show_reserve_percpu(struct sysdev_class class, char buf)
				2120	{
				2121	return sprintf(buf, "%d\n", perf_reserved_percpu);
				2122	}
				2123
				2124	static ssize_t
				2125	perf_set_reserve_percpu(struct sysdev_class *class,
				2126	const char *buf,
				2127	size_t count)
				2128	{
				2129	struct perf_cpu_context *cpuctx;
				2130	unsigned long val;
				2131	int err, cpu, mpt;
				2132
				2133	err = strict_strtoul(buf, 10, &val);
				2134	if (err)
				2135	return err;
				2136	if (val > perf_max_counters)
				2137	return -EINVAL;
				2138
				2139	mutex_lock(&perf_resource_mutex);
				2140	perf_reserved_percpu = val;
				2141	for_each_online_cpu(cpu) {
				2142	cpuctx = &per_cpu(perf_cpu_context, cpu);
				2143	spin_lock_irq(&cpuctx->ctx.lock);
				2144	mpt = min(perf_max_counters - cpuctx->ctx.nr_counters,
				2145	perf_max_counters - perf_reserved_percpu);
				2146	cpuctx->max_pertask = mpt;
				2147	spin_unlock_irq(&cpuctx->ctx.lock);
				2148	}
				2149	mutex_unlock(&perf_resource_mutex);
				2150
				2151	return count;
				2152	}
				2153
				2154	static ssize_t perf_show_overcommit(struct sysdev_class class, char buf)
				2155	{
				2156	return sprintf(buf, "%d\n", perf_overcommit);
				2157	}
				2158
				2159	static ssize_t
				2160	perf_set_overcommit(struct sysdev_class class, const char buf, size_t count)
				2161	{
				2162	unsigned long val;
				2163	int err;
				2164
				2165	err = strict_strtoul(buf, 10, &val);
				2166	if (err)
				2167	return err;
				2168	if (val > 1)
				2169	return -EINVAL;
				2170
				2171	mutex_lock(&perf_resource_mutex);
				2172	perf_overcommit = val;
				2173	mutex_unlock(&perf_resource_mutex);
				2174
				2175	return count;
				2176	}
				2177
				2178	static SYSDEV_CLASS_ATTR(
				2179	reserve_percpu,
				2180	0644,
				2181	perf_show_reserve_percpu,
				2182	perf_set_reserve_percpu
				2183	);
				2184
				2185	static SYSDEV_CLASS_ATTR(
				2186	overcommit,
				2187	0644,
				2188	perf_show_overcommit,
				2189	perf_set_overcommit
				2190	);
				2191
				2192	static struct attribute *perfclass_attrs[] = {
				2193	&attr_reserve_percpu.attr,
				2194	&attr_overcommit.attr,
				2195	NULL
				2196	};
				2197
				2198	static struct attribute_group perfclass_attr_group = {
				2199	.attrs = perfclass_attrs,
				2200	.name = "perf_counters",
				2201	};
				2202
				2203	static int __init perf_counter_sysfs_init(void)
				2204	{
				2205	return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
				2206	&perfclass_attr_group);
				2207	}
				2208	device_initcall(perf_counter_sysfs_init);