Blame - kernel/perf_counter.c - android_kernel_htc_msm8960

blob: faf671b29566114f15da4a7cc0bb0aed17ef597d [file] [log] [blame]

Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1	/*
				2	* Performance counter core code
				3	*
				4	* Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
				5	* Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
				6	*
				7	* For licencing details see kernel-base/COPYING
				8	*/
				9
				10	#include <linux/fs.h>
				11	#include <linux/cpu.h>
				12	#include <linux/smp.h>
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	13	#include <linux/file.h>
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	14	#include <linux/poll.h>
				15	#include <linux/sysfs.h>
				16	#include <linux/ptrace.h>
				17	#include <linux/percpu.h>
				18	#include <linux/uaccess.h>
				19	#include <linux/syscalls.h>
				20	#include <linux/anon_inodes.h>
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	21	#include <linux/kernel_stat.h>
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	22	#include <linux/perf_counter.h>
				23
				24	/*
				25	* Each CPU has a list of per CPU counters:
				26	*/
				27	DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
				28
Ingo Molnar	088e285	2008-12-14 20:21:00 +0100	[diff] [blame]	29	int perf_max_counters __read_mostly = 1;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	30	static int perf_reserved_percpu __read_mostly;
				31	static int perf_overcommit __read_mostly = 1;
				32
				33	/*
				34	* Mutex for (sysadmin-configurable) counter reservations:
				35	*/
				36	static DEFINE_MUTEX(perf_resource_mutex);
				37
				38	/*
				39	* Architecture provided APIs - weak aliases:
				40	*/
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	41	extern __weak const struct hw_perf_counter_ops *
Ingo Molnar	621a01e	2008-12-11 12:46:46 +0100	[diff] [blame]	42	hw_perf_counter_init(struct perf_counter *counter)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	43	{
Paul Mackerras	ff6f054	2009-01-09 16:19:25 +1100	[diff] [blame]	44	return NULL;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	45	}
				46
Ingo Molnar	01b2838	2008-12-11 13:45:51 +0100	[diff] [blame]	47	u64 __weak hw_perf_save_disable(void) { return 0; }
Yinghai Lu	01ea1cc	2008-12-26 21:05:06 -0800	[diff] [blame]	48	void __weak hw_perf_restore(u64 ctrl) { barrier(); }
Paul Mackerras	01d0287	2009-01-14 13:44:19 +1100	[diff] [blame]	49	void __weak hw_perf_counter_setup(int cpu) { barrier(); }
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	50	int __weak hw_perf_group_sched_in(struct perf_counter *group_leader,
				51	struct perf_cpu_context *cpuctx,
				52	struct perf_counter_context *ctx, int cpu)
				53	{
				54	return 0;
				55	}
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	56
Paul Mackerras	4eb96fc	2009-01-09 17:24:34 +1100	[diff] [blame]	57	void __weak perf_counter_print_debug(void) { }
				58
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	59	static void
				60	list_add_counter(struct perf_counter counter, struct perf_counter_context ctx)
				61	{
				62	struct perf_counter *group_leader = counter->group_leader;
				63
				64	/*
				65	* Depending on whether it is a standalone or sibling counter,
				66	* add it straight to the context's counter list, or to the group
				67	* leader's sibling list:
				68	*/
				69	if (counter->group_leader == counter)
				70	list_add_tail(&counter->list_entry, &ctx->counter_list);
				71	else
				72	list_add_tail(&counter->list_entry, &group_leader->sibling_list);
				73	}
				74
				75	static void
				76	list_del_counter(struct perf_counter counter, struct perf_counter_context ctx)
				77	{
				78	struct perf_counter sibling, tmp;
				79
				80	list_del_init(&counter->list_entry);
				81
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	82	/*
				83	* If this was a group counter with sibling counters then
				84	* upgrade the siblings to singleton counters by adding them
				85	* to the context list directly:
				86	*/
				87	list_for_each_entry_safe(sibling, tmp,
				88	&counter->sibling_list, list_entry) {
				89
				90	list_del_init(&sibling->list_entry);
				91	list_add_tail(&sibling->list_entry, &ctx->counter_list);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	92	sibling->group_leader = sibling;
				93	}
				94	}
				95
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	96	static void
				97	counter_sched_out(struct perf_counter *counter,
				98	struct perf_cpu_context *cpuctx,
				99	struct perf_counter_context *ctx)
				100	{
				101	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
				102	return;
				103
				104	counter->state = PERF_COUNTER_STATE_INACTIVE;
				105	counter->hw_ops->disable(counter);
				106	counter->oncpu = -1;
				107
				108	if (!is_software_counter(counter))
				109	cpuctx->active_oncpu--;
				110	ctx->nr_active--;
				111	if (counter->hw_event.exclusive \|\| !cpuctx->active_oncpu)
				112	cpuctx->exclusive = 0;
				113	}
				114
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	115	/*
				116	* Cross CPU call to remove a performance counter
				117	*
				118	* We disable the counter on the hardware level first. After that we
				119	* remove it from the context list.
				120	*/
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	121	static void __perf_counter_remove_from_context(void *info)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	122	{
				123	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
				124	struct perf_counter *counter = info;
				125	struct perf_counter_context *ctx = counter->ctx;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	126	unsigned long flags;
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	127	u64 perf_flags;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	128
				129	/*
				130	* If this is a task context, we need to check whether it is
				131	* the current task context of this cpu. If not it has been
				132	* scheduled out before the smp call arrived.
				133	*/
				134	if (ctx->task && cpuctx->task_ctx != ctx)
				135	return;
				136
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	137	curr_rq_lock_irq_save(&flags);
				138	spin_lock(&ctx->lock);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	139
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	140	counter_sched_out(counter, cpuctx, ctx);
				141
				142	counter->task = NULL;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	143	ctx->nr_counters--;
				144
				145	/*
				146	* Protect the list operation against NMI by disabling the
				147	* counters on a global level. NOP for non NMI based counters.
				148	*/
Ingo Molnar	01b2838	2008-12-11 13:45:51 +0100	[diff] [blame]	149	perf_flags = hw_perf_save_disable();
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	150	list_del_counter(counter, ctx);
Ingo Molnar	01b2838	2008-12-11 13:45:51 +0100	[diff] [blame]	151	hw_perf_restore(perf_flags);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	152
				153	if (!ctx->task) {
				154	/*
				155	* Allow more per task counters with respect to the
				156	* reservation:
				157	*/
				158	cpuctx->max_pertask =
				159	min(perf_max_counters - ctx->nr_counters,
				160	perf_max_counters - perf_reserved_percpu);
				161	}
				162
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	163	spin_unlock(&ctx->lock);
				164	curr_rq_unlock_irq_restore(&flags);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	165	}
				166
				167
				168	/*
				169	* Remove the counter from a task's (or a CPU's) list of counters.
				170	*
				171	* Must be called with counter->mutex held.
				172	*
				173	* CPU counters are removed with a smp call. For task counters we only
				174	* call when the task is on a CPU.
				175	*/
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	176	static void perf_counter_remove_from_context(struct perf_counter *counter)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	177	{
				178	struct perf_counter_context *ctx = counter->ctx;
				179	struct task_struct *task = ctx->task;
				180
				181	if (!task) {
				182	/*
				183	* Per cpu counters are removed via an smp call and
				184	* the removal is always sucessful.
				185	*/
				186	smp_call_function_single(counter->cpu,
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	187	__perf_counter_remove_from_context,
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	188	counter, 1);
				189	return;
				190	}
				191
				192	retry:
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	193	task_oncpu_function_call(task, __perf_counter_remove_from_context,
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	194	counter);
				195
				196	spin_lock_irq(&ctx->lock);
				197	/*
				198	* If the context is active we need to retry the smp call.
				199	*/
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	200	if (ctx->nr_active && !list_empty(&counter->list_entry)) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	201	spin_unlock_irq(&ctx->lock);
				202	goto retry;
				203	}
				204
				205	/*
				206	* The lock prevents that this context is scheduled in so we
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	207	* can remove the counter safely, if the call above did not
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	208	* succeed.
				209	*/
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	210	if (!list_empty(&counter->list_entry)) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	211	ctx->nr_counters--;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	212	list_del_counter(counter, ctx);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	213	counter->task = NULL;
				214	}
				215	spin_unlock_irq(&ctx->lock);
				216	}
				217
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	218	static int
				219	counter_sched_in(struct perf_counter *counter,
				220	struct perf_cpu_context *cpuctx,
				221	struct perf_counter_context *ctx,
				222	int cpu)
				223	{
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	224	if (counter->state <= PERF_COUNTER_STATE_OFF)
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	225	return 0;
				226
				227	counter->state = PERF_COUNTER_STATE_ACTIVE;
				228	counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */
				229	/*
				230	* The new state must be visible before we turn it on in the hardware:
				231	*/
				232	smp_wmb();
				233
				234	if (counter->hw_ops->enable(counter)) {
				235	counter->state = PERF_COUNTER_STATE_INACTIVE;
				236	counter->oncpu = -1;
				237	return -EAGAIN;
				238	}
				239
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	240	if (!is_software_counter(counter))
				241	cpuctx->active_oncpu++;
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	242	ctx->nr_active++;
				243
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	244	if (counter->hw_event.exclusive)
				245	cpuctx->exclusive = 1;
				246
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	247	return 0;
				248	}
				249
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	250	/*
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	251	* Return 1 for a group consisting entirely of software counters,
				252	* 0 if the group contains any hardware counters.
				253	*/
				254	static int is_software_only_group(struct perf_counter *leader)
				255	{
				256	struct perf_counter *counter;
				257
				258	if (!is_software_counter(leader))
				259	return 0;
				260	list_for_each_entry(counter, &leader->sibling_list, list_entry)
				261	if (!is_software_counter(counter))
				262	return 0;
				263	return 1;
				264	}
				265
				266	/*
				267	* Work out whether we can put this counter group on the CPU now.
				268	*/
				269	static int group_can_go_on(struct perf_counter *counter,
				270	struct perf_cpu_context *cpuctx,
				271	int can_add_hw)
				272	{
				273	/*
				274	* Groups consisting entirely of software counters can always go on.
				275	*/
				276	if (is_software_only_group(counter))
				277	return 1;
				278	/*
				279	* If an exclusive group is already on, no other hardware
				280	* counters can go on.
				281	*/
				282	if (cpuctx->exclusive)
				283	return 0;
				284	/*
				285	* If this group is exclusive and there are already
				286	* counters on the CPU, it can't go on.
				287	*/
				288	if (counter->hw_event.exclusive && cpuctx->active_oncpu)
				289	return 0;
				290	/*
				291	* Otherwise, try to add it if all previous groups were able
				292	* to go on.
				293	*/
				294	return can_add_hw;
				295	}
				296
				297	/*
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	298	* Cross CPU call to install and enable a performance counter
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	299	*/
				300	static void __perf_install_in_context(void *info)
				301	{
				302	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
				303	struct perf_counter *counter = info;
				304	struct perf_counter_context *ctx = counter->ctx;
				305	int cpu = smp_processor_id();
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	306	unsigned long flags;
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	307	u64 perf_flags;
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	308	int err;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	309
				310	/*
				311	* If this is a task context, we need to check whether it is
				312	* the current task context of this cpu. If not it has been
				313	* scheduled out before the smp call arrived.
				314	*/
				315	if (ctx->task && cpuctx->task_ctx != ctx)
				316	return;
				317
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	318	curr_rq_lock_irq_save(&flags);
				319	spin_lock(&ctx->lock);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	320
				321	/*
				322	* Protect the list operation against NMI by disabling the
				323	* counters on a global level. NOP for non NMI based counters.
				324	*/
Ingo Molnar	01b2838	2008-12-11 13:45:51 +0100	[diff] [blame]	325	perf_flags = hw_perf_save_disable();
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	326
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	327	list_add_counter(counter, ctx);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	328	ctx->nr_counters++;
				329
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	330	/*
				331	* An exclusive counter can't go on if there are already active
				332	* hardware counters, and no hardware counter can go on if there
				333	* is already an exclusive counter on.
				334	*/
				335	if (counter->state == PERF_COUNTER_STATE_INACTIVE &&
				336	!group_can_go_on(counter, cpuctx, 1))
				337	err = -EEXIST;
				338	else
				339	err = counter_sched_in(counter, cpuctx, ctx, cpu);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	340
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	341	if (err && counter->hw_event.pinned)
				342	counter->state = PERF_COUNTER_STATE_ERROR;
				343
				344	if (!err && !ctx->task && cpuctx->max_pertask)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	345	cpuctx->max_pertask--;
				346
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	347	hw_perf_restore(perf_flags);
				348
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	349	spin_unlock(&ctx->lock);
				350	curr_rq_unlock_irq_restore(&flags);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	351	}
				352
				353	/*
				354	* Attach a performance counter to a context
				355	*
				356	* First we add the counter to the list with the hardware enable bit
				357	* in counter->hw_config cleared.
				358	*
				359	* If the counter is attached to a task which is on a CPU we use a smp
				360	* call to enable it in the task context. The task might have been
				361	* scheduled away, but we check this in the smp call again.
				362	*/
				363	static void
				364	perf_install_in_context(struct perf_counter_context *ctx,
				365	struct perf_counter *counter,
				366	int cpu)
				367	{
				368	struct task_struct *task = ctx->task;
				369
				370	counter->ctx = ctx;
				371	if (!task) {
				372	/*
				373	* Per cpu counters are installed via an smp call and
				374	* the install is always sucessful.
				375	*/
				376	smp_call_function_single(cpu, __perf_install_in_context,
				377	counter, 1);
				378	return;
				379	}
				380
				381	counter->task = task;
				382	retry:
				383	task_oncpu_function_call(task, __perf_install_in_context,
				384	counter);
				385
				386	spin_lock_irq(&ctx->lock);
				387	/*
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	388	* we need to retry the smp call.
				389	*/
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	390	if (ctx->nr_active && list_empty(&counter->list_entry)) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	391	spin_unlock_irq(&ctx->lock);
				392	goto retry;
				393	}
				394
				395	/*
				396	* The lock prevents that this context is scheduled in so we
				397	* can add the counter safely, if it the call above did not
				398	* succeed.
				399	*/
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	400	if (list_empty(&counter->list_entry)) {
				401	list_add_counter(counter, ctx);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	402	ctx->nr_counters++;
				403	}
				404	spin_unlock_irq(&ctx->lock);
				405	}
				406
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	407	static void
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	408	group_sched_out(struct perf_counter *group_counter,
				409	struct perf_cpu_context *cpuctx,
				410	struct perf_counter_context *ctx)
				411	{
				412	struct perf_counter *counter;
				413
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	414	if (group_counter->state != PERF_COUNTER_STATE_ACTIVE)
				415	return;
				416
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	417	counter_sched_out(group_counter, cpuctx, ctx);
				418
				419	/*
				420	* Schedule out siblings (if any):
				421	*/
				422	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
				423	counter_sched_out(counter, cpuctx, ctx);
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	424
				425	if (group_counter->hw_event.exclusive)
				426	cpuctx->exclusive = 0;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	427	}
				428
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	429	void __perf_counter_sched_out(struct perf_counter_context *ctx,
				430	struct perf_cpu_context *cpuctx)
				431	{
				432	struct perf_counter *counter;
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	433	u64 flags;
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	434
				435	if (likely(!ctx->nr_counters))
				436	return;
				437
				438	spin_lock(&ctx->lock);
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	439	flags = hw_perf_save_disable();
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	440	if (ctx->nr_active) {
				441	list_for_each_entry(counter, &ctx->counter_list, list_entry)
				442	group_sched_out(counter, cpuctx, ctx);
				443	}
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	444	hw_perf_restore(flags);
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	445	spin_unlock(&ctx->lock);
				446	}
				447
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	448	/*
				449	* Called from scheduler to remove the counters of the current task,
				450	* with interrupts disabled.
				451	*
				452	* We stop each counter and update the counter value in counter->count.
				453	*
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	454	* This does not protect us against NMI, but disable()
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	455	* sets the disabled bit in the control field of counter _before_
				456	* accessing the counter control register. If a NMI hits, then it will
				457	* not restart the counter.
				458	*/
				459	void perf_counter_task_sched_out(struct task_struct *task, int cpu)
				460	{
				461	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
				462	struct perf_counter_context *ctx = &task->perf_counter_ctx;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	463
				464	if (likely(!cpuctx->task_ctx))
				465	return;
				466
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	467	__perf_counter_sched_out(ctx, cpuctx);
				468
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	469	cpuctx->task_ctx = NULL;
				470	}
				471
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	472	static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	473	{
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	474	__perf_counter_sched_out(&cpuctx->ctx, cpuctx);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	475	}
				476
Ingo Molnar	7995888	2008-12-17 08:54:56 +0100	[diff] [blame]	477	static int
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	478	group_sched_in(struct perf_counter *group_counter,
				479	struct perf_cpu_context *cpuctx,
				480	struct perf_counter_context *ctx,
				481	int cpu)
				482	{
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	483	struct perf_counter counter, partial_group;
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	484	int ret;
				485
				486	if (group_counter->state == PERF_COUNTER_STATE_OFF)
				487	return 0;
				488
				489	ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu);
				490	if (ret)
				491	return ret < 0 ? ret : 0;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	492
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	493	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
				494	return -EAGAIN;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	495
				496	/*
				497	* Schedule in siblings as one group (if any):
				498	*/
Ingo Molnar	7995888	2008-12-17 08:54:56 +0100	[diff] [blame]	499	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	500	if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
				501	partial_group = counter;
				502	goto group_error;
				503	}
Ingo Molnar	7995888	2008-12-17 08:54:56 +0100	[diff] [blame]	504	}
				505
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	506	return 0;
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	507
				508	group_error:
				509	/*
				510	* Groups can be scheduled in as one unit only, so undo any
				511	* partial group before returning:
				512	*/
				513	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
				514	if (counter == partial_group)
				515	break;
				516	counter_sched_out(counter, cpuctx, ctx);
				517	}
				518	counter_sched_out(group_counter, cpuctx, ctx);
				519
				520	return -EAGAIN;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	521	}
				522
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	523	static void
				524	__perf_counter_sched_in(struct perf_counter_context *ctx,
				525	struct perf_cpu_context *cpuctx, int cpu)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	526	{
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	527	struct perf_counter *counter;
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	528	u64 flags;
Paul Mackerras	dd0e6ba	2009-01-12 15:11:00 +1100	[diff] [blame]	529	int can_add_hw = 1;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	530
				531	if (likely(!ctx->nr_counters))
				532	return;
				533
				534	spin_lock(&ctx->lock);
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	535	flags = hw_perf_save_disable();
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	536
				537	/*
				538	* First go through the list and put on any pinned groups
				539	* in order to give them the best chance of going on.
				540	*/
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	541	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	542	if (counter->state <= PERF_COUNTER_STATE_OFF \|\|
				543	!counter->hw_event.pinned)
				544	continue;
				545	if (counter->cpu != -1 && counter->cpu != cpu)
				546	continue;
				547
				548	if (group_can_go_on(counter, cpuctx, 1))
				549	group_sched_in(counter, cpuctx, ctx, cpu);
				550
				551	/*
				552	* If this pinned group hasn't been scheduled,
				553	* put it in error state.
				554	*/
				555	if (counter->state == PERF_COUNTER_STATE_INACTIVE)
				556	counter->state = PERF_COUNTER_STATE_ERROR;
				557	}
				558
				559	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
				560	/*
				561	* Ignore counters in OFF or ERROR state, and
				562	* ignore pinned counters since we did them already.
				563	*/
				564	if (counter->state <= PERF_COUNTER_STATE_OFF \|\|
				565	counter->hw_event.pinned)
				566	continue;
				567
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	568	/*
				569	* Listen to the 'cpu' scheduling filter constraint
				570	* of counters:
				571	*/
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	572	if (counter->cpu != -1 && counter->cpu != cpu)
				573	continue;
				574
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	575	if (group_can_go_on(counter, cpuctx, can_add_hw)) {
Paul Mackerras	dd0e6ba	2009-01-12 15:11:00 +1100	[diff] [blame]	576	if (group_sched_in(counter, cpuctx, ctx, cpu))
				577	can_add_hw = 0;
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	578	}
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	579	}
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	580	hw_perf_restore(flags);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	581	spin_unlock(&ctx->lock);
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	582	}
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	583
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	584	/*
				585	* Called from scheduler to add the counters of the current task
				586	* with interrupts disabled.
				587	*
				588	* We restore the counter value and then enable it.
				589	*
				590	* This does not protect us against NMI, but enable()
				591	* sets the enabled bit in the control field of counter _before_
				592	* accessing the counter control register. If a NMI hits, then it will
				593	* keep the counter running.
				594	*/
				595	void perf_counter_task_sched_in(struct task_struct *task, int cpu)
				596	{
				597	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
				598	struct perf_counter_context *ctx = &task->perf_counter_ctx;
				599
				600	__perf_counter_sched_in(ctx, cpuctx, cpu);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	601	cpuctx->task_ctx = ctx;
				602	}
				603
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	604	static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
				605	{
				606	struct perf_counter_context *ctx = &cpuctx->ctx;
				607
				608	__perf_counter_sched_in(ctx, cpuctx, cpu);
				609	}
				610
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	611	int perf_counter_task_disable(void)
				612	{
				613	struct task_struct *curr = current;
				614	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
				615	struct perf_counter *counter;
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	616	unsigned long flags;
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	617	u64 perf_flags;
				618	int cpu;
				619
				620	if (likely(!ctx->nr_counters))
				621	return 0;
				622
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	623	curr_rq_lock_irq_save(&flags);
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	624	cpu = smp_processor_id();
				625
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	626	/* force the update of the task clock: */
				627	__task_delta_exec(curr, 1);
				628
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	629	perf_counter_task_sched_out(curr, cpu);
				630
				631	spin_lock(&ctx->lock);
				632
				633	/*
				634	* Disable all the counters:
				635	*/
				636	perf_flags = hw_perf_save_disable();
				637
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	638	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
				639	if (counter->state != PERF_COUNTER_STATE_ERROR)
				640	counter->state = PERF_COUNTER_STATE_OFF;
				641	}
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	642
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	643	hw_perf_restore(perf_flags);
				644
				645	spin_unlock(&ctx->lock);
				646
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	647	curr_rq_unlock_irq_restore(&flags);
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	648
				649	return 0;
				650	}
				651
				652	int perf_counter_task_enable(void)
				653	{
				654	struct task_struct *curr = current;
				655	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
				656	struct perf_counter *counter;
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	657	unsigned long flags;
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	658	u64 perf_flags;
				659	int cpu;
				660
				661	if (likely(!ctx->nr_counters))
				662	return 0;
				663
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	664	curr_rq_lock_irq_save(&flags);
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	665	cpu = smp_processor_id();
				666
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	667	/* force the update of the task clock: */
				668	__task_delta_exec(curr, 1);
				669
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	670	perf_counter_task_sched_out(curr, cpu);
				671
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	672	spin_lock(&ctx->lock);
				673
				674	/*
				675	* Disable all the counters:
				676	*/
				677	perf_flags = hw_perf_save_disable();
				678
				679	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	680	if (counter->state > PERF_COUNTER_STATE_OFF)
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	681	continue;
Ingo Molnar	6a93070	2008-12-11 15:17:03 +0100	[diff] [blame]	682	counter->state = PERF_COUNTER_STATE_INACTIVE;
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	683	counter->hw_event.disabled = 0;
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	684	}
				685	hw_perf_restore(perf_flags);
				686
				687	spin_unlock(&ctx->lock);
				688
				689	perf_counter_task_sched_in(curr, cpu);
				690
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	691	curr_rq_unlock_irq_restore(&flags);
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	692
				693	return 0;
				694	}
				695
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	696	/*
				697	* Round-robin a context's counters:
				698	*/
				699	static void rotate_ctx(struct perf_counter_context *ctx)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	700	{
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	701	struct perf_counter *counter;
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	702	u64 perf_flags;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	703
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	704	if (!ctx->nr_counters)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	705	return;
				706
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	707	spin_lock(&ctx->lock);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	708	/*
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	709	* Rotate the first entry last (works just fine for group counters too):
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	710	*/
Ingo Molnar	01b2838	2008-12-11 13:45:51 +0100	[diff] [blame]	711	perf_flags = hw_perf_save_disable();
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	712	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
				713	list_del(&counter->list_entry);
				714	list_add_tail(&counter->list_entry, &ctx->counter_list);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	715	break;
				716	}
Ingo Molnar	01b2838	2008-12-11 13:45:51 +0100	[diff] [blame]	717	hw_perf_restore(perf_flags);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	718
				719	spin_unlock(&ctx->lock);
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	720	}
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	721
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	722	void perf_counter_task_tick(struct task_struct *curr, int cpu)
				723	{
				724	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
				725	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
				726	const int rotate_percpu = 0;
				727
				728	if (rotate_percpu)
				729	perf_counter_cpu_sched_out(cpuctx);
				730	perf_counter_task_sched_out(curr, cpu);
				731
				732	if (rotate_percpu)
				733	rotate_ctx(&cpuctx->ctx);
				734	rotate_ctx(ctx);
				735
				736	if (rotate_percpu)
				737	perf_counter_cpu_sched_in(cpuctx, cpu);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	738	perf_counter_task_sched_in(curr, cpu);
				739	}
				740
				741	/*
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	742	* Cross CPU call to read the hardware counter
				743	*/
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	744	static void __read(void *info)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	745	{
Ingo Molnar	621a01e	2008-12-11 12:46:46 +0100	[diff] [blame]	746	struct perf_counter *counter = info;
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	747	unsigned long flags;
Ingo Molnar	621a01e	2008-12-11 12:46:46 +0100	[diff] [blame]	748
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	749	curr_rq_lock_irq_save(&flags);
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	750	counter->hw_ops->read(counter);
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	751	curr_rq_unlock_irq_restore(&flags);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	752	}
				753
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	754	static u64 perf_counter_read(struct perf_counter *counter)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	755	{
				756	/*
				757	* If counter is enabled and currently active on a CPU, update the
				758	* value in the counter structure:
				759	*/
Ingo Molnar	6a93070	2008-12-11 15:17:03 +0100	[diff] [blame]	760	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	761	smp_call_function_single(counter->oncpu,
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	762	__read, counter, 1);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	763	}
				764
Ingo Molnar	ee06094	2008-12-13 09:00:03 +0100	[diff] [blame]	765	return atomic64_read(&counter->count);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	766	}
				767
				768	/*
				769	* Cross CPU call to switch performance data pointers
				770	*/
				771	static void __perf_switch_irq_data(void *info)
				772	{
				773	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
				774	struct perf_counter *counter = info;
				775	struct perf_counter_context *ctx = counter->ctx;
				776	struct perf_data *oldirqdata = counter->irqdata;
				777
				778	/*
				779	* If this is a task context, we need to check whether it is
				780	* the current task context of this cpu. If not it has been
				781	* scheduled out before the smp call arrived.
				782	*/
				783	if (ctx->task) {
				784	if (cpuctx->task_ctx != ctx)
				785	return;
				786	spin_lock(&ctx->lock);
				787	}
				788
				789	/* Change the pointer NMI safe */
				790	atomic_long_set((atomic_long_t *)&counter->irqdata,
				791	(unsigned long) counter->usrdata);
				792	counter->usrdata = oldirqdata;
				793
				794	if (ctx->task)
				795	spin_unlock(&ctx->lock);
				796	}
				797
				798	static struct perf_data perf_switch_irq_data(struct perf_counter counter)
				799	{
				800	struct perf_counter_context *ctx = counter->ctx;
				801	struct perf_data *oldirqdata = counter->irqdata;
				802	struct task_struct *task = ctx->task;
				803
				804	if (!task) {
				805	smp_call_function_single(counter->cpu,
				806	__perf_switch_irq_data,
				807	counter, 1);
				808	return counter->usrdata;
				809	}
				810
				811	retry:
				812	spin_lock_irq(&ctx->lock);
Ingo Molnar	6a93070	2008-12-11 15:17:03 +0100	[diff] [blame]	813	if (counter->state != PERF_COUNTER_STATE_ACTIVE) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	814	counter->irqdata = counter->usrdata;
				815	counter->usrdata = oldirqdata;
				816	spin_unlock_irq(&ctx->lock);
				817	return oldirqdata;
				818	}
				819	spin_unlock_irq(&ctx->lock);
				820	task_oncpu_function_call(task, __perf_switch_irq_data, counter);
				821	/* Might have failed, because task was scheduled out */
				822	if (counter->irqdata == oldirqdata)
				823	goto retry;
				824
				825	return counter->usrdata;
				826	}
				827
				828	static void put_context(struct perf_counter_context *ctx)
				829	{
				830	if (ctx->task)
				831	put_task_struct(ctx->task);
				832	}
				833
				834	static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
				835	{
				836	struct perf_cpu_context *cpuctx;
				837	struct perf_counter_context *ctx;
				838	struct task_struct *task;
				839
				840	/*
				841	* If cpu is not a wildcard then this is a percpu counter:
				842	*/
				843	if (cpu != -1) {
				844	/* Must be root to operate on a CPU counter: */
				845	if (!capable(CAP_SYS_ADMIN))
				846	return ERR_PTR(-EACCES);
				847
				848	if (cpu < 0 \|\| cpu > num_possible_cpus())
				849	return ERR_PTR(-EINVAL);
				850
				851	/*
				852	* We could be clever and allow to attach a counter to an
				853	* offline CPU and activate it when the CPU comes up, but
				854	* that's for later.
				855	*/
				856	if (!cpu_isset(cpu, cpu_online_map))
				857	return ERR_PTR(-ENODEV);
				858
				859	cpuctx = &per_cpu(perf_cpu_context, cpu);
				860	ctx = &cpuctx->ctx;
				861
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	862	return ctx;
				863	}
				864
				865	rcu_read_lock();
				866	if (!pid)
				867	task = current;
				868	else
				869	task = find_task_by_vpid(pid);
				870	if (task)
				871	get_task_struct(task);
				872	rcu_read_unlock();
				873
				874	if (!task)
				875	return ERR_PTR(-ESRCH);
				876
				877	ctx = &task->perf_counter_ctx;
				878	ctx->task = task;
				879
				880	/* Reuse ptrace permission checks for now. */
				881	if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
				882	put_context(ctx);
				883	return ERR_PTR(-EACCES);
				884	}
				885
				886	return ctx;
				887	}
				888
				889	/*
				890	* Called when the last reference to the file is gone.
				891	*/
				892	static int perf_release(struct inode inode, struct file file)
				893	{
				894	struct perf_counter *counter = file->private_data;
				895	struct perf_counter_context *ctx = counter->ctx;
				896
				897	file->private_data = NULL;
				898
				899	mutex_lock(&counter->mutex);
				900
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	901	perf_counter_remove_from_context(counter);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	902	put_context(ctx);
				903
				904	mutex_unlock(&counter->mutex);
				905
				906	kfree(counter);
				907
				908	return 0;
				909	}
				910
				911	/*
				912	* Read the performance counter - simple non blocking version for now
				913	*/
				914	static ssize_t
				915	perf_read_hw(struct perf_counter counter, char __user buf, size_t count)
				916	{
				917	u64 cntval;
				918
				919	if (count != sizeof(cntval))
				920	return -EINVAL;
				921
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	922	/*
				923	* Return end-of-file for a read on a counter that is in
				924	* error state (i.e. because it was pinned but it couldn't be
				925	* scheduled on to the CPU at some point).
				926	*/
				927	if (counter->state == PERF_COUNTER_STATE_ERROR)
				928	return 0;
				929
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	930	mutex_lock(&counter->mutex);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	931	cntval = perf_counter_read(counter);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	932	mutex_unlock(&counter->mutex);
				933
				934	return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval);
				935	}
				936
				937	static ssize_t
				938	perf_copy_usrdata(struct perf_data usrdata, char __user buf, size_t count)
				939	{
				940	if (!usrdata->len)
				941	return 0;
				942
				943	count = min(count, (size_t)usrdata->len);
				944	if (copy_to_user(buf, usrdata->data + usrdata->rd_idx, count))
				945	return -EFAULT;
				946
				947	/* Adjust the counters */
				948	usrdata->len -= count;
				949	if (!usrdata->len)
				950	usrdata->rd_idx = 0;
				951	else
				952	usrdata->rd_idx += count;
				953
				954	return count;
				955	}
				956
				957	static ssize_t
				958	perf_read_irq_data(struct perf_counter *counter,
				959	char __user *buf,
				960	size_t count,
				961	int nonblocking)
				962	{
				963	struct perf_data irqdata, usrdata;
				964	DECLARE_WAITQUEUE(wait, current);
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	965	ssize_t res, res2;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	966
				967	irqdata = counter->irqdata;
				968	usrdata = counter->usrdata;
				969
				970	if (usrdata->len + irqdata->len >= count)
				971	goto read_pending;
				972
				973	if (nonblocking)
				974	return -EAGAIN;
				975
				976	spin_lock_irq(&counter->waitq.lock);
				977	__add_wait_queue(&counter->waitq, &wait);
				978	for (;;) {
				979	set_current_state(TASK_INTERRUPTIBLE);
				980	if (usrdata->len + irqdata->len >= count)
				981	break;
				982
				983	if (signal_pending(current))
				984	break;
				985
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	986	if (counter->state == PERF_COUNTER_STATE_ERROR)
				987	break;
				988
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	989	spin_unlock_irq(&counter->waitq.lock);
				990	schedule();
				991	spin_lock_irq(&counter->waitq.lock);
				992	}
				993	__remove_wait_queue(&counter->waitq, &wait);
				994	__set_current_state(TASK_RUNNING);
				995	spin_unlock_irq(&counter->waitq.lock);
				996
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	997	if (usrdata->len + irqdata->len < count &&
				998	counter->state != PERF_COUNTER_STATE_ERROR)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	999	return -ERESTARTSYS;
				1000	read_pending:
				1001	mutex_lock(&counter->mutex);
				1002
				1003	/* Drain pending data first: */
				1004	res = perf_copy_usrdata(usrdata, buf, count);
				1005	if (res < 0 \|\| res == count)
				1006	goto out;
				1007
				1008	/* Switch irq buffer: */
				1009	usrdata = perf_switch_irq_data(counter);
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	1010	res2 = perf_copy_usrdata(usrdata, buf + res, count - res);
				1011	if (res2 < 0) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1012	if (!res)
				1013	res = -EFAULT;
				1014	} else {
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	1015	res += res2;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1016	}
				1017	out:
				1018	mutex_unlock(&counter->mutex);
				1019
				1020	return res;
				1021	}
				1022
				1023	static ssize_t
				1024	perf_read(struct file file, char __user buf, size_t count, loff_t *ppos)
				1025	{
				1026	struct perf_counter *counter = file->private_data;
				1027
Ingo Molnar	9f66a38	2008-12-10 12:33:23 +0100	[diff] [blame]	1028	switch (counter->hw_event.record_type) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1029	case PERF_RECORD_SIMPLE:
				1030	return perf_read_hw(counter, buf, count);
				1031
				1032	case PERF_RECORD_IRQ:
				1033	case PERF_RECORD_GROUP:
				1034	return perf_read_irq_data(counter, buf, count,
				1035	file->f_flags & O_NONBLOCK);
				1036	}
				1037	return -EINVAL;
				1038	}
				1039
				1040	static unsigned int perf_poll(struct file file, poll_table wait)
				1041	{
				1042	struct perf_counter *counter = file->private_data;
				1043	unsigned int events = 0;
				1044	unsigned long flags;
				1045
				1046	poll_wait(file, &counter->waitq, wait);
				1047
				1048	spin_lock_irqsave(&counter->waitq.lock, flags);
				1049	if (counter->usrdata->len \|\| counter->irqdata->len)
				1050	events \|= POLLIN;
				1051	spin_unlock_irqrestore(&counter->waitq.lock, flags);
				1052
				1053	return events;
				1054	}
				1055
				1056	static const struct file_operations perf_fops = {
				1057	.release = perf_release,
				1058	.read = perf_read,
				1059	.poll = perf_poll,
				1060	};
				1061
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1062	static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1063	{
Paul Mackerras	9abf8a0	2009-01-09 16:26:43 +1100	[diff] [blame]	1064	int cpu = raw_smp_processor_id();
				1065
				1066	atomic64_set(&counter->hw.prev_count, cpu_clock(cpu));
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1067	return 0;
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1068	}
				1069
Paul Mackerras	9abf8a0	2009-01-09 16:26:43 +1100	[diff] [blame]	1070	static void cpu_clock_perf_counter_update(struct perf_counter *counter)
				1071	{
				1072	int cpu = raw_smp_processor_id();
				1073	s64 prev;
				1074	u64 now;
				1075
				1076	now = cpu_clock(cpu);
				1077	prev = atomic64_read(&counter->hw.prev_count);
				1078	atomic64_set(&counter->hw.prev_count, now);
				1079	atomic64_add(now - prev, &counter->count);
				1080	}
				1081
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1082	static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
				1083	{
Paul Mackerras	9abf8a0	2009-01-09 16:26:43 +1100	[diff] [blame]	1084	cpu_clock_perf_counter_update(counter);
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1085	}
				1086
				1087	static void cpu_clock_perf_counter_read(struct perf_counter *counter)
				1088	{
Paul Mackerras	9abf8a0	2009-01-09 16:26:43 +1100	[diff] [blame]	1089	cpu_clock_perf_counter_update(counter);
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1090	}
				1091
				1092	static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	1093	.enable = cpu_clock_perf_counter_enable,
				1094	.disable = cpu_clock_perf_counter_disable,
				1095	.read = cpu_clock_perf_counter_read,
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1096	};
				1097
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	1098	/*
				1099	* Called from within the scheduler:
				1100	*/
				1101	static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update)
Ingo Molnar	bae43c9	2008-12-11 14:03:20 +0100	[diff] [blame]	1102	{
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	1103	struct task_struct *curr = counter->task;
				1104	u64 delta;
				1105
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	1106	delta = __task_delta_exec(curr, update);
				1107
				1108	return curr->se.sum_exec_runtime + delta;
				1109	}
				1110
				1111	static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now)
				1112	{
				1113	u64 prev;
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1114	s64 delta;
Ingo Molnar	bae43c9	2008-12-11 14:03:20 +0100	[diff] [blame]	1115
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1116	prev = atomic64_read(&counter->hw.prev_count);
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1117
				1118	atomic64_set(&counter->hw.prev_count, now);
				1119
				1120	delta = now - prev;
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1121
				1122	atomic64_add(delta, &counter->count);
Ingo Molnar	bae43c9	2008-12-11 14:03:20 +0100	[diff] [blame]	1123	}
				1124
				1125	static void task_clock_perf_counter_read(struct perf_counter *counter)
				1126	{
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	1127	u64 now = task_clock_perf_counter_val(counter, 1);
				1128
				1129	task_clock_perf_counter_update(counter, now);
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1130	}
				1131
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1132	static int task_clock_perf_counter_enable(struct perf_counter *counter)
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1133	{
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	1134	u64 now = task_clock_perf_counter_val(counter, 0);
				1135
				1136	atomic64_set(&counter->hw.prev_count, now);
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1137
				1138	return 0;
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1139	}
				1140
				1141	static void task_clock_perf_counter_disable(struct perf_counter *counter)
				1142	{
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	1143	u64 now = task_clock_perf_counter_val(counter, 0);
				1144
				1145	task_clock_perf_counter_update(counter, now);
Ingo Molnar	bae43c9	2008-12-11 14:03:20 +0100	[diff] [blame]	1146	}
				1147
				1148	static const struct hw_perf_counter_ops perf_ops_task_clock = {
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	1149	.enable = task_clock_perf_counter_enable,
				1150	.disable = task_clock_perf_counter_disable,
				1151	.read = task_clock_perf_counter_read,
Ingo Molnar	bae43c9	2008-12-11 14:03:20 +0100	[diff] [blame]	1152	};
				1153
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1154	static u64 get_page_faults(void)
				1155	{
				1156	struct task_struct *curr = current;
				1157
				1158	return curr->maj_flt + curr->min_flt;
				1159	}
				1160
				1161	static void page_faults_perf_counter_update(struct perf_counter *counter)
				1162	{
				1163	u64 prev, now;
				1164	s64 delta;
				1165
				1166	prev = atomic64_read(&counter->hw.prev_count);
				1167	now = get_page_faults();
				1168
				1169	atomic64_set(&counter->hw.prev_count, now);
				1170
				1171	delta = now - prev;
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1172
				1173	atomic64_add(delta, &counter->count);
				1174	}
				1175
				1176	static void page_faults_perf_counter_read(struct perf_counter *counter)
				1177	{
				1178	page_faults_perf_counter_update(counter);
				1179	}
				1180
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1181	static int page_faults_perf_counter_enable(struct perf_counter *counter)
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1182	{
				1183	/*
				1184	* page-faults is a per-task value already,
				1185	* so we dont have to clear it on switch-in.
				1186	*/
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1187
				1188	return 0;
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1189	}
				1190
				1191	static void page_faults_perf_counter_disable(struct perf_counter *counter)
				1192	{
				1193	page_faults_perf_counter_update(counter);
				1194	}
				1195
				1196	static const struct hw_perf_counter_ops perf_ops_page_faults = {
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	1197	.enable = page_faults_perf_counter_enable,
				1198	.disable = page_faults_perf_counter_disable,
				1199	.read = page_faults_perf_counter_read,
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1200	};
				1201
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1202	static u64 get_context_switches(void)
				1203	{
				1204	struct task_struct *curr = current;
				1205
				1206	return curr->nvcsw + curr->nivcsw;
				1207	}
				1208
				1209	static void context_switches_perf_counter_update(struct perf_counter *counter)
				1210	{
				1211	u64 prev, now;
				1212	s64 delta;
				1213
				1214	prev = atomic64_read(&counter->hw.prev_count);
				1215	now = get_context_switches();
				1216
				1217	atomic64_set(&counter->hw.prev_count, now);
				1218
				1219	delta = now - prev;
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1220
				1221	atomic64_add(delta, &counter->count);
				1222	}
				1223
				1224	static void context_switches_perf_counter_read(struct perf_counter *counter)
				1225	{
				1226	context_switches_perf_counter_update(counter);
				1227	}
				1228
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1229	static int context_switches_perf_counter_enable(struct perf_counter *counter)
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1230	{
				1231	/*
				1232	* ->nvcsw + curr->nivcsw is a per-task value already,
				1233	* so we dont have to clear it on switch-in.
				1234	*/
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1235
				1236	return 0;
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1237	}
				1238
				1239	static void context_switches_perf_counter_disable(struct perf_counter *counter)
				1240	{
				1241	context_switches_perf_counter_update(counter);
				1242	}
				1243
				1244	static const struct hw_perf_counter_ops perf_ops_context_switches = {
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	1245	.enable = context_switches_perf_counter_enable,
				1246	.disable = context_switches_perf_counter_disable,
				1247	.read = context_switches_perf_counter_read,
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1248	};
				1249
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1250	static inline u64 get_cpu_migrations(void)
				1251	{
				1252	return current->se.nr_migrations;
				1253	}
				1254
				1255	static void cpu_migrations_perf_counter_update(struct perf_counter *counter)
				1256	{
				1257	u64 prev, now;
				1258	s64 delta;
				1259
				1260	prev = atomic64_read(&counter->hw.prev_count);
				1261	now = get_cpu_migrations();
				1262
				1263	atomic64_set(&counter->hw.prev_count, now);
				1264
				1265	delta = now - prev;
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1266
				1267	atomic64_add(delta, &counter->count);
				1268	}
				1269
				1270	static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
				1271	{
				1272	cpu_migrations_perf_counter_update(counter);
				1273	}
				1274
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1275	static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1276	{
				1277	/*
				1278	* se.nr_migrations is a per-task value already,
				1279	* so we dont have to clear it on switch-in.
				1280	*/
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1281
				1282	return 0;
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1283	}
				1284
				1285	static void cpu_migrations_perf_counter_disable(struct perf_counter *counter)
				1286	{
				1287	cpu_migrations_perf_counter_update(counter);
				1288	}
				1289
				1290	static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	1291	.enable = cpu_migrations_perf_counter_enable,
				1292	.disable = cpu_migrations_perf_counter_disable,
				1293	.read = cpu_migrations_perf_counter_read,
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1294	};
				1295
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1296	static const struct hw_perf_counter_ops *
				1297	sw_perf_counter_init(struct perf_counter *counter)
				1298	{
				1299	const struct hw_perf_counter_ops *hw_ops = NULL;
				1300
				1301	switch (counter->hw_event.type) {
				1302	case PERF_COUNT_CPU_CLOCK:
				1303	hw_ops = &perf_ops_cpu_clock;
				1304	break;
Ingo Molnar	bae43c9	2008-12-11 14:03:20 +0100	[diff] [blame]	1305	case PERF_COUNT_TASK_CLOCK:
				1306	hw_ops = &perf_ops_task_clock;
				1307	break;
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1308	case PERF_COUNT_PAGE_FAULTS:
				1309	hw_ops = &perf_ops_page_faults;
				1310	break;
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1311	case PERF_COUNT_CONTEXT_SWITCHES:
				1312	hw_ops = &perf_ops_context_switches;
				1313	break;
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1314	case PERF_COUNT_CPU_MIGRATIONS:
				1315	hw_ops = &perf_ops_cpu_migrations;
				1316	break;
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1317	default:
				1318	break;
				1319	}
				1320	return hw_ops;
				1321	}
				1322
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1323	/*
				1324	* Allocate and initialize a counter structure
				1325	*/
				1326	static struct perf_counter *
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1327	perf_counter_alloc(struct perf_counter_hw_event *hw_event,
				1328	int cpu,
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1329	struct perf_counter *group_leader,
				1330	gfp_t gfpflags)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1331	{
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1332	const struct hw_perf_counter_ops *hw_ops;
Ingo Molnar	621a01e	2008-12-11 12:46:46 +0100	[diff] [blame]	1333	struct perf_counter *counter;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1334
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1335	counter = kzalloc(sizeof(*counter), gfpflags);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1336	if (!counter)
				1337	return NULL;
				1338
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1339	/*
				1340	* Single counters are their own group leaders, with an
				1341	* empty sibling list:
				1342	*/
				1343	if (!group_leader)
				1344	group_leader = counter;
				1345
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1346	mutex_init(&counter->mutex);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1347	INIT_LIST_HEAD(&counter->list_entry);
				1348	INIT_LIST_HEAD(&counter->sibling_list);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1349	init_waitqueue_head(&counter->waitq);
				1350
Ingo Molnar	9f66a38	2008-12-10 12:33:23 +0100	[diff] [blame]	1351	counter->irqdata = &counter->data[0];
				1352	counter->usrdata = &counter->data[1];
				1353	counter->cpu = cpu;
				1354	counter->hw_event = *hw_event;
				1355	counter->wakeup_pending = 0;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1356	counter->group_leader = group_leader;
Ingo Molnar	621a01e	2008-12-11 12:46:46 +0100	[diff] [blame]	1357	counter->hw_ops = NULL;
				1358
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	1359	counter->state = PERF_COUNTER_STATE_INACTIVE;
Ingo Molnar	a86ed50	2008-12-17 00:43:10 +0100	[diff] [blame]	1360	if (hw_event->disabled)
				1361	counter->state = PERF_COUNTER_STATE_OFF;
				1362
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1363	hw_ops = NULL;
				1364	if (!hw_event->raw && hw_event->type < 0)
				1365	hw_ops = sw_perf_counter_init(counter);
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1366	if (!hw_ops)
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1367	hw_ops = hw_perf_counter_init(counter);
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1368
Ingo Molnar	621a01e	2008-12-11 12:46:46 +0100	[diff] [blame]	1369	if (!hw_ops) {
				1370	kfree(counter);
				1371	return NULL;
				1372	}
				1373	counter->hw_ops = hw_ops;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1374
				1375	return counter;
				1376	}
				1377
				1378	/**
Ingo Molnar	9f66a38	2008-12-10 12:33:23 +0100	[diff] [blame]	1379	* sys_perf_task_open - open a performance counter, associate it to a task/cpu
				1380	*
				1381	* @hw_event_uptr: event type attributes for monitoring/sampling
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1382	* @pid: target pid
Ingo Molnar	9f66a38	2008-12-10 12:33:23 +0100	[diff] [blame]	1383	* @cpu: target cpu
				1384	* @group_fd: group leader counter fd
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1385	*/
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	1386	asmlinkage int
				1387	sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
				1388	pid_t pid, int cpu, int group_fd)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1389	{
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1390	struct perf_counter counter, group_leader;
Ingo Molnar	9f66a38	2008-12-10 12:33:23 +0100	[diff] [blame]	1391	struct perf_counter_hw_event hw_event;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1392	struct perf_counter_context *ctx;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1393	struct file *counter_file = NULL;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1394	struct file *group_file = NULL;
				1395	int fput_needed = 0;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1396	int fput_needed2 = 0;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1397	int ret;
				1398
Ingo Molnar	9f66a38	2008-12-10 12:33:23 +0100	[diff] [blame]	1399	if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
Thomas Gleixner	eab656a	2008-12-08 19:26:59 +0100	[diff] [blame]	1400	return -EFAULT;
				1401
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1402	/*
Ingo Molnar	ccff286	2008-12-11 11:26:29 +0100	[diff] [blame]	1403	* Get the target context (task or percpu):
				1404	*/
				1405	ctx = find_get_context(pid, cpu);
				1406	if (IS_ERR(ctx))
				1407	return PTR_ERR(ctx);
				1408
				1409	/*
				1410	* Look up the group leader (we will attach this counter to it):
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1411	*/
				1412	group_leader = NULL;
				1413	if (group_fd != -1) {
				1414	ret = -EINVAL;
				1415	group_file = fget_light(group_fd, &fput_needed);
				1416	if (!group_file)
Ingo Molnar	ccff286	2008-12-11 11:26:29 +0100	[diff] [blame]	1417	goto err_put_context;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1418	if (group_file->f_op != &perf_fops)
Ingo Molnar	ccff286	2008-12-11 11:26:29 +0100	[diff] [blame]	1419	goto err_put_context;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1420
				1421	group_leader = group_file->private_data;
				1422	/*
Ingo Molnar	ccff286	2008-12-11 11:26:29 +0100	[diff] [blame]	1423	* Do not allow a recursive hierarchy (this new sibling
				1424	* becoming part of another group-sibling):
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1425	*/
Ingo Molnar	ccff286	2008-12-11 11:26:29 +0100	[diff] [blame]	1426	if (group_leader->group_leader != group_leader)
				1427	goto err_put_context;
				1428	/*
				1429	* Do not allow to attach to a group in a different
				1430	* task or CPU context:
				1431	*/
				1432	if (group_leader->ctx != ctx)
				1433	goto err_put_context;
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	1434	/*
				1435	* Only a group leader can be exclusive or pinned
				1436	*/
				1437	if (hw_event.exclusive \|\| hw_event.pinned)
				1438	goto err_put_context;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1439	}
				1440
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1441	ret = -EINVAL;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1442	counter = perf_counter_alloc(&hw_event, cpu, group_leader, GFP_KERNEL);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1443	if (!counter)
				1444	goto err_put_context;
				1445
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1446	ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
				1447	if (ret < 0)
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1448	goto err_free_put_context;
				1449
				1450	counter_file = fget_light(ret, &fput_needed2);
				1451	if (!counter_file)
				1452	goto err_free_put_context;
				1453
				1454	counter->filp = counter_file;
				1455	perf_install_in_context(ctx, counter, cpu);
				1456
				1457	fput_light(counter_file, fput_needed2);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1458
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1459	out_fput:
				1460	fput_light(group_file, fput_needed);
				1461
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1462	return ret;
				1463
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1464	err_free_put_context:
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1465	kfree(counter);
				1466
				1467	err_put_context:
				1468	put_context(ctx);
				1469
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1470	goto out_fput;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1471	}
				1472
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1473	/*
				1474	* Initialize the perf_counter context in a task_struct:
				1475	*/
				1476	static void
				1477	__perf_counter_init_context(struct perf_counter_context *ctx,
				1478	struct task_struct *task)
				1479	{
				1480	memset(ctx, 0, sizeof(*ctx));
				1481	spin_lock_init(&ctx->lock);
				1482	INIT_LIST_HEAD(&ctx->counter_list);
				1483	ctx->task = task;
				1484	}
				1485
				1486	/*
				1487	* inherit a counter from parent task to child task:
				1488	*/
				1489	static int
				1490	inherit_counter(struct perf_counter *parent_counter,
				1491	struct task_struct *parent,
				1492	struct perf_counter_context *parent_ctx,
				1493	struct task_struct *child,
				1494	struct perf_counter_context *child_ctx)
				1495	{
				1496	struct perf_counter *child_counter;
				1497
				1498	child_counter = perf_counter_alloc(&parent_counter->hw_event,
				1499	parent_counter->cpu, NULL,
				1500	GFP_ATOMIC);
				1501	if (!child_counter)
				1502	return -ENOMEM;
				1503
				1504	/*
				1505	* Link it up in the child's context:
				1506	*/
				1507	child_counter->ctx = child_ctx;
				1508	child_counter->task = child;
				1509	list_add_counter(child_counter, child_ctx);
				1510	child_ctx->nr_counters++;
				1511
				1512	child_counter->parent = parent_counter;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1513	/*
				1514	* inherit into child's child as well:
				1515	*/
				1516	child_counter->hw_event.inherit = 1;
				1517
				1518	/*
				1519	* Get a reference to the parent filp - we will fput it
				1520	* when the child counter exits. This is safe to do because
				1521	* we are in the parent and we know that the filp still
				1522	* exists and has a nonzero count:
				1523	*/
				1524	atomic_long_inc(&parent_counter->filp->f_count);
				1525
				1526	return 0;
				1527	}
				1528
				1529	static void
				1530	__perf_counter_exit_task(struct task_struct *child,
				1531	struct perf_counter *child_counter,
				1532	struct perf_counter_context *child_ctx)
				1533	{
				1534	struct perf_counter *parent_counter;
				1535	u64 parent_val, child_val;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1536
				1537	/*
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	1538	* If we do not self-reap then we have to wait for the
				1539	* child task to unschedule (it will happen for sure),
				1540	* so that its counter is at its final count. (This
				1541	* condition triggers rarely - child tasks usually get
				1542	* off their CPU before the parent has a chance to
				1543	* get this far into the reaping action)
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1544	*/
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	1545	if (child != current) {
				1546	wait_task_inactive(child, 0);
				1547	list_del_init(&child_counter->list_entry);
				1548	} else {
Ingo Molnar	0cc0c02	2008-12-14 23:20:36 +0100	[diff] [blame]	1549	struct perf_cpu_context *cpuctx;
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	1550	unsigned long flags;
				1551	u64 perf_flags;
				1552
				1553	/*
				1554	* Disable and unlink this counter.
				1555	*
				1556	* Be careful about zapping the list - IRQ/NMI context
				1557	* could still be processing it:
				1558	*/
				1559	curr_rq_lock_irq_save(&flags);
				1560	perf_flags = hw_perf_save_disable();
Ingo Molnar	0cc0c02	2008-12-14 23:20:36 +0100	[diff] [blame]	1561
				1562	cpuctx = &__get_cpu_var(perf_cpu_context);
				1563
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame^]	1564	counter_sched_out(child_counter, cpuctx, child_ctx);
Ingo Molnar	0cc0c02	2008-12-14 23:20:36 +0100	[diff] [blame]	1565
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	1566	list_del_init(&child_counter->list_entry);
				1567
				1568	child_ctx->nr_counters--;
				1569
				1570	hw_perf_restore(perf_flags);
				1571	curr_rq_unlock_irq_restore(&flags);
Ingo Molnar	0cc0c02	2008-12-14 23:20:36 +0100	[diff] [blame]	1572	}
				1573
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1574	parent_counter = child_counter->parent;
				1575	/*
				1576	* It can happen that parent exits first, and has counters
				1577	* that are still around due to the child reference. These
				1578	* counters need to be zapped - but otherwise linger.
				1579	*/
				1580	if (!parent_counter)
				1581	return;
				1582
				1583	parent_val = atomic64_read(&parent_counter->count);
				1584	child_val = atomic64_read(&child_counter->count);
				1585
				1586	/*
				1587	* Add back the child's count to the parent's count:
				1588	*/
				1589	atomic64_add(child_val, &parent_counter->count);
				1590
				1591	fput(parent_counter->filp);
				1592
				1593	kfree(child_counter);
				1594	}
				1595
				1596	/*
				1597	* When a child task exist, feed back counter values to parent counters.
				1598	*
				1599	* Note: we are running in child context, but the PID is not hashed
				1600	* anymore so new counters will not be added.
				1601	*/
				1602	void perf_counter_exit_task(struct task_struct *child)
				1603	{
				1604	struct perf_counter child_counter, tmp;
				1605	struct perf_counter_context *child_ctx;
				1606
				1607	child_ctx = &child->perf_counter_ctx;
				1608
				1609	if (likely(!child_ctx->nr_counters))
				1610	return;
				1611
				1612	list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
				1613	list_entry)
				1614	__perf_counter_exit_task(child, child_counter, child_ctx);
				1615	}
				1616
				1617	/*
				1618	* Initialize the perf_counter context in task_struct
				1619	*/
				1620	void perf_counter_init_task(struct task_struct *child)
				1621	{
				1622	struct perf_counter_context child_ctx, parent_ctx;
				1623	struct perf_counter counter, parent_counter;
				1624	struct task_struct *parent = current;
				1625	unsigned long flags;
				1626
				1627	child_ctx = &child->perf_counter_ctx;
				1628	parent_ctx = &parent->perf_counter_ctx;
				1629
				1630	__perf_counter_init_context(child_ctx, child);
				1631
				1632	/*
				1633	* This is executed from the parent task context, so inherit
				1634	* counters that have been marked for cloning:
				1635	*/
				1636
				1637	if (likely(!parent_ctx->nr_counters))
				1638	return;
				1639
				1640	/*
				1641	* Lock the parent list. No need to lock the child - not PID
				1642	* hashed yet and not running, so nobody can access it.
				1643	*/
				1644	spin_lock_irqsave(&parent_ctx->lock, flags);
				1645
				1646	/*
				1647	* We dont have to disable NMIs - we are only looking at
				1648	* the list, not manipulating it:
				1649	*/
				1650	list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) {
				1651	if (!counter->hw_event.inherit \|\| counter->group_leader != counter)
				1652	continue;
				1653
				1654	/*
				1655	* Instead of creating recursive hierarchies of counters,
				1656	* we link inheritd counters back to the original parent,
				1657	* which has a filp for sure, which we use as the reference
				1658	* count:
				1659	*/
				1660	parent_counter = counter;
				1661	if (counter->parent)
				1662	parent_counter = counter->parent;
				1663
				1664	if (inherit_counter(parent_counter, parent,
				1665	parent_ctx, child, child_ctx))
				1666	break;
				1667	}
				1668
				1669	spin_unlock_irqrestore(&parent_ctx->lock, flags);
				1670	}
				1671
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1672	static void __cpuinit perf_counter_init_cpu(int cpu)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1673	{
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1674	struct perf_cpu_context *cpuctx;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1675
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1676	cpuctx = &per_cpu(perf_cpu_context, cpu);
				1677	__perf_counter_init_context(&cpuctx->ctx, NULL);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1678
				1679	mutex_lock(&perf_resource_mutex);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1680	cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1681	mutex_unlock(&perf_resource_mutex);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1682
Paul Mackerras	01d0287	2009-01-14 13:44:19 +1100	[diff] [blame]	1683	hw_perf_counter_setup(cpu);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1684	}
				1685
				1686	#ifdef CONFIG_HOTPLUG_CPU
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1687	static void __perf_counter_exit_cpu(void *info)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1688	{
				1689	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
				1690	struct perf_counter_context *ctx = &cpuctx->ctx;
				1691	struct perf_counter counter, tmp;
				1692
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1693	list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry)
				1694	__perf_counter_remove_from_context(counter);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1695
				1696	}
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1697	static void perf_counter_exit_cpu(int cpu)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1698	{
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1699	smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1700	}
				1701	#else
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1702	static inline void perf_counter_exit_cpu(int cpu) { }
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1703	#endif
				1704
				1705	static int __cpuinit
				1706	perf_cpu_notify(struct notifier_block self, unsigned long action, void hcpu)
				1707	{
				1708	unsigned int cpu = (long)hcpu;
				1709
				1710	switch (action) {
				1711
				1712	case CPU_UP_PREPARE:
				1713	case CPU_UP_PREPARE_FROZEN:
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1714	perf_counter_init_cpu(cpu);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1715	break;
				1716
				1717	case CPU_DOWN_PREPARE:
				1718	case CPU_DOWN_PREPARE_FROZEN:
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1719	perf_counter_exit_cpu(cpu);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1720	break;
				1721
				1722	default:
				1723	break;
				1724	}
				1725
				1726	return NOTIFY_OK;
				1727	}
				1728
				1729	static struct notifier_block __cpuinitdata perf_cpu_nb = {
				1730	.notifier_call = perf_cpu_notify,
				1731	};
				1732
				1733	static int __init perf_counter_init(void)
				1734	{
				1735	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
				1736	(void *)(long)smp_processor_id());
				1737	register_cpu_notifier(&perf_cpu_nb);
				1738
				1739	return 0;
				1740	}
				1741	early_initcall(perf_counter_init);
				1742
				1743	static ssize_t perf_show_reserve_percpu(struct sysdev_class class, char buf)
				1744	{
				1745	return sprintf(buf, "%d\n", perf_reserved_percpu);
				1746	}
				1747
				1748	static ssize_t
				1749	perf_set_reserve_percpu(struct sysdev_class *class,
				1750	const char *buf,
				1751	size_t count)
				1752	{
				1753	struct perf_cpu_context *cpuctx;
				1754	unsigned long val;
				1755	int err, cpu, mpt;
				1756
				1757	err = strict_strtoul(buf, 10, &val);
				1758	if (err)
				1759	return err;
				1760	if (val > perf_max_counters)
				1761	return -EINVAL;
				1762
				1763	mutex_lock(&perf_resource_mutex);
				1764	perf_reserved_percpu = val;
				1765	for_each_online_cpu(cpu) {
				1766	cpuctx = &per_cpu(perf_cpu_context, cpu);
				1767	spin_lock_irq(&cpuctx->ctx.lock);
				1768	mpt = min(perf_max_counters - cpuctx->ctx.nr_counters,
				1769	perf_max_counters - perf_reserved_percpu);
				1770	cpuctx->max_pertask = mpt;
				1771	spin_unlock_irq(&cpuctx->ctx.lock);
				1772	}
				1773	mutex_unlock(&perf_resource_mutex);
				1774
				1775	return count;
				1776	}
				1777
				1778	static ssize_t perf_show_overcommit(struct sysdev_class class, char buf)
				1779	{
				1780	return sprintf(buf, "%d\n", perf_overcommit);
				1781	}
				1782
				1783	static ssize_t
				1784	perf_set_overcommit(struct sysdev_class class, const char buf, size_t count)
				1785	{
				1786	unsigned long val;
				1787	int err;
				1788
				1789	err = strict_strtoul(buf, 10, &val);
				1790	if (err)
				1791	return err;
				1792	if (val > 1)
				1793	return -EINVAL;
				1794
				1795	mutex_lock(&perf_resource_mutex);
				1796	perf_overcommit = val;
				1797	mutex_unlock(&perf_resource_mutex);
				1798
				1799	return count;
				1800	}
				1801
				1802	static SYSDEV_CLASS_ATTR(
				1803	reserve_percpu,
				1804	0644,
				1805	perf_show_reserve_percpu,
				1806	perf_set_reserve_percpu
				1807	);
				1808
				1809	static SYSDEV_CLASS_ATTR(
				1810	overcommit,
				1811	0644,
				1812	perf_show_overcommit,
				1813	perf_set_overcommit
				1814	);
				1815
				1816	static struct attribute *perfclass_attrs[] = {
				1817	&attr_reserve_percpu.attr,
				1818	&attr_overcommit.attr,
				1819	NULL
				1820	};
				1821
				1822	static struct attribute_group perfclass_attr_group = {
				1823	.attrs = perfclass_attrs,
				1824	.name = "perf_counters",
				1825	};
				1826
				1827	static int __init perf_counter_sysfs_init(void)
				1828	{
				1829	return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
				1830	&perfclass_attr_group);
				1831	}
				1832	device_initcall(perf_counter_sysfs_init);