| Scott Wood | a111065 | 2010-02-25 18:09:45 -0600 | [diff] [blame] | 1 | /* | 
 | 2 |  * Performance counter support for e500 family processors. | 
 | 3 |  * | 
 | 4 |  * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | 
 | 5 |  * Copyright 2010 Freescale Semiconductor, Inc. | 
 | 6 |  * | 
 | 7 |  * This program is free software; you can redistribute it and/or | 
 | 8 |  * modify it under the terms of the GNU General Public License | 
 | 9 |  * as published by the Free Software Foundation; either version | 
 | 10 |  * 2 of the License, or (at your option) any later version. | 
 | 11 |  */ | 
 | 12 | #include <linux/string.h> | 
 | 13 | #include <linux/perf_event.h> | 
 | 14 | #include <asm/reg.h> | 
 | 15 | #include <asm/cputable.h> | 
 | 16 |  | 
 | 17 | /* | 
 | 18 |  * Map of generic hardware event types to hardware events | 
 | 19 |  * Zero if unsupported | 
 | 20 |  */ | 
 | 21 | static int e500_generic_events[] = { | 
 | 22 | 	[PERF_COUNT_HW_CPU_CYCLES] = 1, | 
 | 23 | 	[PERF_COUNT_HW_INSTRUCTIONS] = 2, | 
 | 24 | 	[PERF_COUNT_HW_CACHE_MISSES] = 41, /* Data L1 cache reloads */ | 
 | 25 | 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 12, | 
 | 26 | 	[PERF_COUNT_HW_BRANCH_MISSES] = 15, | 
 | 27 | }; | 
 | 28 |  | 
 | 29 | #define C(x)	PERF_COUNT_HW_CACHE_##x | 
 | 30 |  | 
 | 31 | /* | 
 | 32 |  * Table of generalized cache-related events. | 
 | 33 |  * 0 means not supported, -1 means nonsensical, other values | 
 | 34 |  * are event codes. | 
 | 35 |  */ | 
 | 36 | static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | 
 | 37 | 	/* | 
 | 38 | 	 * D-cache misses are not split into read/write/prefetch; | 
 | 39 | 	 * use raw event 41. | 
 | 40 | 	 */ | 
 | 41 | 	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */ | 
 | 42 | 		[C(OP_READ)] = {	27,		0	}, | 
 | 43 | 		[C(OP_WRITE)] = {	28,		0	}, | 
 | 44 | 		[C(OP_PREFETCH)] = {	29,		0	}, | 
 | 45 | 	}, | 
 | 46 | 	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */ | 
 | 47 | 		[C(OP_READ)] = {	2,		60	}, | 
 | 48 | 		[C(OP_WRITE)] = {	-1,		-1	}, | 
 | 49 | 		[C(OP_PREFETCH)] = {	0,		0	}, | 
 | 50 | 	}, | 
 | 51 | 	/* | 
 | 52 | 	 * Assuming LL means L2, it's not a good match for this model. | 
 | 53 | 	 * It allocates only on L1 castout or explicit prefetch, and | 
 | 54 | 	 * does not have separate read/write events (but it does have | 
 | 55 | 	 * separate instruction/data events). | 
 | 56 | 	 */ | 
 | 57 | 	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */ | 
 | 58 | 		[C(OP_READ)] = {	0,		0	}, | 
 | 59 | 		[C(OP_WRITE)] = {	0,		0	}, | 
 | 60 | 		[C(OP_PREFETCH)] = {	0,		0	}, | 
 | 61 | 	}, | 
 | 62 | 	/* | 
 | 63 | 	 * There are data/instruction MMU misses, but that's a miss on | 
 | 64 | 	 * the chip's internal level-one TLB which is probably not | 
 | 65 | 	 * what the user wants.  Instead, unified level-two TLB misses | 
 | 66 | 	 * are reported here. | 
 | 67 | 	 */ | 
 | 68 | 	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */ | 
 | 69 | 		[C(OP_READ)] = {	26,		66	}, | 
 | 70 | 		[C(OP_WRITE)] = {	-1,		-1	}, | 
 | 71 | 		[C(OP_PREFETCH)] = {	-1,		-1	}, | 
 | 72 | 	}, | 
 | 73 | 	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */ | 
 | 74 | 		[C(OP_READ)] = {	12,		15 	}, | 
 | 75 | 		[C(OP_WRITE)] = {	-1,		-1	}, | 
 | 76 | 		[C(OP_PREFETCH)] = {	-1,		-1	}, | 
 | 77 | 	}, | 
 | 78 | }; | 
 | 79 |  | 
 | 80 | static int num_events = 128; | 
 | 81 |  | 
 | 82 | /* Upper half of event id is PMLCb, for threshold events */ | 
 | 83 | static u64 e500_xlate_event(u64 event_id) | 
 | 84 | { | 
 | 85 | 	u32 event_low = (u32)event_id; | 
 | 86 | 	u64 ret; | 
 | 87 |  | 
 | 88 | 	if (event_low >= num_events) | 
 | 89 | 		return 0; | 
 | 90 |  | 
 | 91 | 	ret = FSL_EMB_EVENT_VALID; | 
 | 92 |  | 
 | 93 | 	if (event_low >= 76 && event_low <= 81) { | 
 | 94 | 		ret |= FSL_EMB_EVENT_RESTRICTED; | 
 | 95 | 		ret |= event_id & | 
 | 96 | 		       (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH); | 
 | 97 | 	} else if (event_id & | 
 | 98 | 	           (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH)) { | 
 | 99 | 		/* Threshold requested on non-threshold event */ | 
 | 100 | 		return 0; | 
 | 101 | 	} | 
 | 102 |  | 
 | 103 | 	return ret; | 
 | 104 | } | 
 | 105 |  | 
 | 106 | static struct fsl_emb_pmu e500_pmu = { | 
 | 107 | 	.name			= "e500 family", | 
 | 108 | 	.n_counter		= 4, | 
 | 109 | 	.n_restricted		= 2, | 
 | 110 | 	.xlate_event		= e500_xlate_event, | 
 | 111 | 	.n_generic		= ARRAY_SIZE(e500_generic_events), | 
 | 112 | 	.generic_events		= e500_generic_events, | 
 | 113 | 	.cache_events		= &e500_cache_events, | 
 | 114 | }; | 
 | 115 |  | 
 | 116 | static int init_e500_pmu(void) | 
 | 117 | { | 
 | 118 | 	if (!cur_cpu_spec->oprofile_cpu_type) | 
 | 119 | 		return -ENODEV; | 
 | 120 |  | 
 | 121 | 	if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500mc")) | 
 | 122 | 		num_events = 256; | 
 | 123 | 	else if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500")) | 
 | 124 | 		return -ENODEV; | 
 | 125 |  | 
 | 126 | 	return register_fsl_emb_pmu(&e500_pmu); | 
 | 127 | } | 
 | 128 |  | 
 | 129 | arch_initcall(init_e500_pmu); |