ARM: Perf: Add PMU support for Krait Pass 2
Add perf support for Krait P2 which has different base
addresses and supports counting of VeNUM events.
Change-Id: I39e6a52314f2463dfea0765c68825dd0d84dd04f
Signed-off-by: Ashwin Chaugule <ashwinc@codeaurora.org>
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index c8f963c..be324ac 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -85,7 +85,7 @@
void (*start)(void);
void (*stop)(void);
void (*reset)(void *);
- const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
+ unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
const unsigned (*event_map)[PERF_COUNT_HW_MAX];
diff --git a/arch/arm/kernel/perf_event_msm.c b/arch/arm/kernel/perf_event_msm.c
index 579cd3b..1c2206f 100644
--- a/arch/arm/kernel/perf_event_msm.c
+++ b/arch/arm/kernel/perf_event_msm.c
@@ -128,7 +128,7 @@
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
};
-static const unsigned armv7_scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+static unsigned armv7_scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
diff --git a/arch/arm/kernel/perf_event_msm_krait.c b/arch/arm/kernel/perf_event_msm_krait.c
index 1e77489..e3e9bf2 100644
--- a/arch/arm/kernel/perf_event_msm_krait.c
+++ b/arch/arm/kernel/perf_event_msm_krait.c
@@ -15,20 +15,41 @@
#ifdef CONFIG_CPU_V7
#define KRAIT_EVT_PREFIX 1
-#define KRAIT_MAX_L1_REG 2
+#define KRAIT_VENUMEVT_PREFIX 2
/*
event encoding: prccg
- p = prefix (1 for Krait L1)
+ p = prefix (1 for Krait L1) (2 for Krait VeNum events)
r = register
cc = code
g = group
*/
-#define KRAIT_L1_ICACHE_MISS 0x10010
-#define KRAIT_L1_ICACHE_ACCESS 0x10011
-#define KRAIT_DTLB_ACCESS 0x121B2
-#define KRAIT_ITLB_ACCESS 0x121C0
-u32 evt_type_base[] = {0x4c, 0x50, 0x54};
+#define KRAIT_L1_ICACHE_ACCESS 0x10011
+#define KRAIT_L1_ICACHE_MISS 0x10010
+
+#define KRAIT_P1_L1_ITLB_ACCESS 0x121b2
+#define KRAIT_P1_L1_DTLB_ACCESS 0x121c0
+
+#define KRAIT_P2_L1_ITLB_ACCESS 0x12222
+#define KRAIT_P2_L1_DTLB_ACCESS 0x12210
+
+u32 evt_type_base[][4] = {
+ {0x4c, 0x50, 0x54}, /* Pass 1 */
+ {0xcc, 0xd0, 0xd4, 0xd8}, /* Pass 2 */
+};
+
+#define KRAIT_MIDR_PASS1 0x510F04D0
+#define KRAIT_MIDR_MASK 0xfffffff0
+
+/*
+ * This offset is used to calculate the index
+ * into evt_type_base[][] and krait_functions[]
+ */
+#define VENUM_BASE_OFFSET 3
+
+/* Krait Pass 1 has 3 groups, Pass 2 has 4 */
+static u32 krait_ver, evt_index;
+static u32 krait_max_l1_reg;
static const unsigned armv7_krait_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
@@ -40,7 +61,7 @@
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
};
-static const unsigned armv7_krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+static unsigned armv7_krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
@@ -93,11 +114,11 @@
},
[C(DTLB)] = {
[C(OP_READ)] = {
- [C(RESULT_ACCESS)] = KRAIT_DTLB_ACCESS,
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = KRAIT_DTLB_ACCESS,
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
@@ -107,11 +128,11 @@
},
[C(ITLB)] = {
[C(OP_READ)] = {
- [C(RESULT_ACCESS)] = KRAIT_ITLB_ACCESS,
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = KRAIT_ITLB_ACCESS,
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
@@ -170,17 +191,26 @@
u8 group;
prefix = (krait_evt_type & 0xF0000) >> 16;
- reg = (krait_evt_type & 0x0F000) >> 12;
- code = (krait_evt_type & 0x00FF0) >> 4;
- group = krait_evt_type & 0x0000F;
+ reg = (krait_evt_type & 0x0F000) >> 12;
+ code = (krait_evt_type & 0x00FF0) >> 4;
+ group = krait_evt_type & 0x0000F;
- if ((prefix != KRAIT_EVT_PREFIX) || (group > 3) ||
- (reg > KRAIT_MAX_L1_REG))
+ if ((group > 3) || (reg > krait_max_l1_reg))
return -EINVAL;
+ if (prefix != KRAIT_EVT_PREFIX || prefix != KRAIT_VENUMEVT_PREFIX)
+ return -EINVAL;
+
+ if (prefix == KRAIT_VENUMEVT_PREFIX) {
+ if ((code & 0xe0) || krait_ver != 2)
+ return -EINVAL;
+ else
+ reg += VENUM_BASE_OFFSET;
+ }
+
evtinfo->group_setval = 0x80000000 | (code << (group * 8));
evtinfo->groupcode = reg;
- evtinfo->armv7_evt_type = evt_type_base[reg] | group;
+ evtinfo->armv7_evt_type = evt_type_base[evt_index][reg] | group;
return evtinfo->armv7_evt_type;
}
@@ -224,9 +254,59 @@
asm volatile("mcr p15, 1, %0, c9, c15, 2" : : "r" (val));
}
+static u32 krait_read_vmresr0(void)
+{
+ u32 val;
+
+ asm volatile ("mrc p10, 7, %0, c11, c0, 0" : "=r" (val));
+ return val;
+}
+
+static void krait_write_vmresr0(u32 val)
+{
+ asm volatile ("mcr p10, 7, %0, c11, c0, 0" : : "r" (val));
+}
+
+static DEFINE_PER_CPU(u32, venum_orig_val);
+static DEFINE_PER_CPU(u32, fp_orig_val);
+
+static void krait_pre_vmresr0(void)
+{
+ u32 venum_new_val;
+ u32 fp_new_val;
+ u32 v_orig_val;
+ u32 f_orig_val;
+
+ /* CPACR Enable CP10 access */
+ v_orig_val = get_copro_access();
+ venum_new_val = v_orig_val | CPACC_SVC(10);
+ set_copro_access(venum_new_val);
+ /* Store orig venum val */
+ __get_cpu_var(venum_orig_val) = v_orig_val;
+
+ /* Enable FPEXC */
+ f_orig_val = fmrx(FPEXC);
+ fp_new_val = f_orig_val | FPEXC_EN;
+ fmxr(FPEXC, fp_new_val);
+ /* Store orig fp val */
+ __get_cpu_var(fp_orig_val) = f_orig_val;
+
+}
+
+static void krait_post_vmresr0(void)
+{
+ /* Restore FPEXC */
+ fmxr(FPEXC, __get_cpu_var(fp_orig_val));
+ isb();
+ /* Restore CPACR */
+ set_copro_access(__get_cpu_var(venum_orig_val));
+}
+
struct krait_access_funcs {
u32 (*read) (void);
void (*write) (u32);
+ void (*pre) (void);
+ void (*post) (void);
};
/*
@@ -235,9 +315,11 @@
* Having the following array modularizes the code for doing that.
*/
struct krait_access_funcs krait_functions[] = {
- {krait_read_pmresr0, krait_write_pmresr0},
- {krait_read_pmresr1, krait_write_pmresr1},
- {krait_read_pmresr2, krait_write_pmresr2},
+ {krait_read_pmresr0, krait_write_pmresr0, NULL, NULL},
+ {krait_read_pmresr1, krait_write_pmresr1, NULL, NULL},
+ {krait_read_pmresr2, krait_write_pmresr2, NULL, NULL},
+ {krait_read_vmresr0, krait_write_vmresr0, krait_pre_vmresr0,
+ krait_post_vmresr0},
};
static inline u32 krait_get_columnmask(u32 evt_code)
@@ -252,9 +334,15 @@
{
u32 val;
+ if (krait_functions[gr].pre)
+ krait_functions[gr].pre();
+
val = krait_get_columnmask(evt_code) & krait_functions[gr].read();
val = val | setval;
krait_functions[gr].write(val);
+
+ if (krait_functions[gr].post)
+ krait_functions[gr].post();
}
static void krait_clear_pmuregs(void)
@@ -262,15 +350,25 @@
krait_write_pmresr0(0);
krait_write_pmresr1(0);
krait_write_pmresr2(0);
+
+ krait_pre_vmresr0();
+ krait_write_vmresr0(0);
+ krait_post_vmresr0();
}
static void krait_clearpmu(u32 grp, u32 val, u32 evt_code)
{
u32 new_pmuval;
+ if (krait_functions[grp].pre)
+ krait_functions[grp].pre();
+
new_pmuval = krait_functions[grp].read() &
krait_get_columnmask(evt_code);
krait_functions[grp].write(new_pmuval);
+
+ if (krait_functions[grp].post)
+ krait_functions[grp].post();
}
static void krait_pmu_disable_event(struct hw_perf_event *hwc, int idx)
@@ -380,6 +478,19 @@
.max_period = (1LLU << 32) - 1,
};
+int get_krait_ver(void)
+{
+ int ver = 0;
+ int midr = read_cpuid_id();
+
+ if ((midr & KRAIT_MIDR_MASK) != KRAIT_MIDR_PASS1)
+ ver = 2;
+
+ pr_debug("krait_ver: %d, midr: %x\n", ver, midr);
+
+ return ver;
+}
+
static const struct arm_pmu *__init armv7_krait_pmu_init(void)
{
krait_pmu.id = ARM_PERF_PMU_ID_KRAIT;
@@ -388,6 +499,41 @@
krait_pmu.event_map = &armv7_krait_perf_map;
krait_pmu.num_events = armv7_read_num_pmnc_events();
krait_clear_pmuregs();
+
+ krait_ver = get_krait_ver();
+
+ if (krait_ver > 0) {
+ evt_index = 1;
+ krait_max_l1_reg = 3;
+ armv7_krait_perf_cache_map[C(ITLB)]
+ [C(OP_READ)]
+ [C(RESULT_ACCESS)] = KRAIT_P2_L1_ITLB_ACCESS;
+ armv7_krait_perf_cache_map[C(ITLB)]
+ [C(OP_WRITE)]
+ [C(RESULT_ACCESS)] = KRAIT_P2_L1_ITLB_ACCESS;
+ armv7_krait_perf_cache_map[C(DTLB)]
+ [C(OP_READ)]
+ [C(RESULT_ACCESS)] = KRAIT_P2_L1_DTLB_ACCESS;
+ armv7_krait_perf_cache_map[C(DTLB)]
+ [C(OP_WRITE)]
+ [C(RESULT_ACCESS)] = KRAIT_P2_L1_DTLB_ACCESS;
+ } else {
+ evt_index = 0;
+ krait_max_l1_reg = 2;
+ armv7_krait_perf_cache_map[C(ITLB)]
+ [C(OP_READ)]
+ [C(RESULT_ACCESS)] = KRAIT_P1_L1_ITLB_ACCESS;
+ armv7_krait_perf_cache_map[C(ITLB)]
+ [C(OP_WRITE)]
+ [C(RESULT_ACCESS)] = KRAIT_P1_L1_ITLB_ACCESS;
+ armv7_krait_perf_cache_map[C(DTLB)]
+ [C(OP_READ)]
+ [C(RESULT_ACCESS)] = KRAIT_P1_L1_DTLB_ACCESS;
+ armv7_krait_perf_cache_map[C(DTLB)]
+ [C(OP_WRITE)]
+ [C(RESULT_ACCESS)] = KRAIT_P1_L1_DTLB_ACCESS;
+ }
+
return &krait_pmu;
}
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index f1e8dd9..0635b7e 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -74,7 +74,7 @@
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
};
-static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+static unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
@@ -213,7 +213,7 @@
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
};
-static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+static unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 462aefb..4031c7b 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -203,7 +203,7 @@
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
};
-static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+static unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
@@ -314,7 +314,7 @@
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
};
-static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+static unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
@@ -424,7 +424,7 @@
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
};
-static const unsigned armv7_a5_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+static unsigned armv7_a5_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
@@ -540,7 +540,7 @@
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES,
};
-static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+static unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 39affbe..21977cf 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -57,7 +57,7 @@
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
};
-static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+static unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {