| /* | 
 |  * trace event based perf event profiling/tracing | 
 |  * | 
 |  * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com> | 
 |  * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com> | 
 |  */ | 
 |  | 
 | #include <linux/module.h> | 
 | #include <linux/kprobes.h> | 
 | #include "trace.h" | 
 |  | 
 | static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS]; | 
 |  | 
 | /* | 
 |  * Force it to be aligned to unsigned long to avoid misaligned accesses | 
 |  * suprises | 
 |  */ | 
 | typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) | 
 | 	perf_trace_t; | 
 |  | 
 | /* Count the events in use (per event id, not per instance) */ | 
 | static int	total_ref_count; | 
 |  | 
 | static int perf_trace_event_perm(struct ftrace_event_call *tp_event, | 
 | 				 struct perf_event *p_event) | 
 | { | 
 | 	/* No tracing, just counting, so no obvious leak */ | 
 | 	if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) | 
 | 		return 0; | 
 |  | 
 | 	/* Some events are ok to be traced by non-root users... */ | 
 | 	if (p_event->attach_state == PERF_ATTACH_TASK) { | 
 | 		if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY) | 
 | 			return 0; | 
 | 	} | 
 |  | 
 | 	/* | 
 | 	 * ...otherwise raw tracepoint data can be a severe data leak, | 
 | 	 * only allow root to have these. | 
 | 	 */ | 
 | 	if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) | 
 | 		return -EPERM; | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | static int perf_trace_event_init(struct ftrace_event_call *tp_event, | 
 | 				 struct perf_event *p_event) | 
 | { | 
 | 	struct hlist_head __percpu *list; | 
 | 	int ret; | 
 | 	int cpu; | 
 |  | 
 | 	ret = perf_trace_event_perm(tp_event, p_event); | 
 | 	if (ret) | 
 | 		return ret; | 
 |  | 
 | 	p_event->tp_event = tp_event; | 
 | 	if (tp_event->perf_refcount++ > 0) | 
 | 		return 0; | 
 |  | 
 | 	ret = -ENOMEM; | 
 |  | 
 | 	list = alloc_percpu(struct hlist_head); | 
 | 	if (!list) | 
 | 		goto fail; | 
 |  | 
 | 	for_each_possible_cpu(cpu) | 
 | 		INIT_HLIST_HEAD(per_cpu_ptr(list, cpu)); | 
 |  | 
 | 	tp_event->perf_events = list; | 
 |  | 
 | 	if (!total_ref_count) { | 
 | 		char __percpu *buf; | 
 | 		int i; | 
 |  | 
 | 		for (i = 0; i < PERF_NR_CONTEXTS; i++) { | 
 | 			buf = (char __percpu *)alloc_percpu(perf_trace_t); | 
 | 			if (!buf) | 
 | 				goto fail; | 
 |  | 
 | 			perf_trace_buf[i] = buf; | 
 | 		} | 
 | 	} | 
 |  | 
 | 	ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER); | 
 | 	if (ret) | 
 | 		goto fail; | 
 |  | 
 | 	total_ref_count++; | 
 | 	return 0; | 
 |  | 
 | fail: | 
 | 	if (!total_ref_count) { | 
 | 		int i; | 
 |  | 
 | 		for (i = 0; i < PERF_NR_CONTEXTS; i++) { | 
 | 			free_percpu(perf_trace_buf[i]); | 
 | 			perf_trace_buf[i] = NULL; | 
 | 		} | 
 | 	} | 
 |  | 
 | 	if (!--tp_event->perf_refcount) { | 
 | 		free_percpu(tp_event->perf_events); | 
 | 		tp_event->perf_events = NULL; | 
 | 	} | 
 |  | 
 | 	return ret; | 
 | } | 
 |  | 
 | int perf_trace_init(struct perf_event *p_event) | 
 | { | 
 | 	struct ftrace_event_call *tp_event; | 
 | 	int event_id = p_event->attr.config; | 
 | 	int ret = -EINVAL; | 
 |  | 
 | 	mutex_lock(&event_mutex); | 
 | 	list_for_each_entry(tp_event, &ftrace_events, list) { | 
 | 		if (tp_event->event.type == event_id && | 
 | 		    tp_event->class && tp_event->class->reg && | 
 | 		    try_module_get(tp_event->mod)) { | 
 | 			ret = perf_trace_event_init(tp_event, p_event); | 
 | 			if (ret) | 
 | 				module_put(tp_event->mod); | 
 | 			break; | 
 | 		} | 
 | 	} | 
 | 	mutex_unlock(&event_mutex); | 
 |  | 
 | 	return ret; | 
 | } | 
 |  | 
 | int perf_trace_add(struct perf_event *p_event, int flags) | 
 | { | 
 | 	struct ftrace_event_call *tp_event = p_event->tp_event; | 
 | 	struct hlist_head __percpu *pcpu_list; | 
 | 	struct hlist_head *list; | 
 |  | 
 | 	pcpu_list = tp_event->perf_events; | 
 | 	if (WARN_ON_ONCE(!pcpu_list)) | 
 | 		return -EINVAL; | 
 |  | 
 | 	if (!(flags & PERF_EF_START)) | 
 | 		p_event->hw.state = PERF_HES_STOPPED; | 
 |  | 
 | 	list = this_cpu_ptr(pcpu_list); | 
 | 	hlist_add_head_rcu(&p_event->hlist_entry, list); | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | void perf_trace_del(struct perf_event *p_event, int flags) | 
 | { | 
 | 	hlist_del_rcu(&p_event->hlist_entry); | 
 | } | 
 |  | 
 | void perf_trace_destroy(struct perf_event *p_event) | 
 | { | 
 | 	struct ftrace_event_call *tp_event = p_event->tp_event; | 
 | 	int i; | 
 |  | 
 | 	mutex_lock(&event_mutex); | 
 | 	if (--tp_event->perf_refcount > 0) | 
 | 		goto out; | 
 |  | 
 | 	tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER); | 
 |  | 
 | 	/* | 
 | 	 * Ensure our callback won't be called anymore. The buffers | 
 | 	 * will be freed after that. | 
 | 	 */ | 
 | 	tracepoint_synchronize_unregister(); | 
 |  | 
 | 	free_percpu(tp_event->perf_events); | 
 | 	tp_event->perf_events = NULL; | 
 |  | 
 | 	if (!--total_ref_count) { | 
 | 		for (i = 0; i < PERF_NR_CONTEXTS; i++) { | 
 | 			free_percpu(perf_trace_buf[i]); | 
 | 			perf_trace_buf[i] = NULL; | 
 | 		} | 
 | 	} | 
 | out: | 
 | 	module_put(tp_event->mod); | 
 | 	mutex_unlock(&event_mutex); | 
 | } | 
 |  | 
 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | 
 | 				       struct pt_regs *regs, int *rctxp) | 
 | { | 
 | 	struct trace_entry *entry; | 
 | 	unsigned long flags; | 
 | 	char *raw_data; | 
 | 	int pc; | 
 |  | 
 | 	BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); | 
 |  | 
 | 	pc = preempt_count(); | 
 |  | 
 | 	*rctxp = perf_swevent_get_recursion_context(); | 
 | 	if (*rctxp < 0) | 
 | 		return NULL; | 
 |  | 
 | 	raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]); | 
 |  | 
 | 	/* zero the dead bytes from align to not leak stack to user */ | 
 | 	memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); | 
 |  | 
 | 	entry = (struct trace_entry *)raw_data; | 
 | 	local_save_flags(flags); | 
 | 	tracing_generic_entry_update(entry, flags, pc); | 
 | 	entry->type = type; | 
 |  | 
 | 	return raw_data; | 
 | } | 
 | EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); |