x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 25423a5..edcde52 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -368,6 +368,10 @@
if (c->x86 >= 6)
set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
+ /* Enable Performance counter for K7 and later */
+ if (c->x86 > 6 && c->x86 <= 0x11)
+ set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
+
if (!c->x86_model_id[0]) {
switch (c->x86) {
case 0xf:
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a3c8852..266618a 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -74,6 +74,24 @@
}
/*
+ * AMD Performance Monitor K7 and later.
+ */
+static const int amd_perfmon_event_map[] =
+{
+ [PERF_COUNT_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_CACHE_REFERENCES] = 0x0080,
+ [PERF_COUNT_CACHE_MISSES] = 0x0081,
+ [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
+ [PERF_COUNT_BRANCH_MISSES] = 0x00c5,
+};
+
+static int pmc_amd_event_map(int event)
+{
+ return amd_perfmon_event_map[event];
+}
+
+/*
* Propagate counter elapsed time into the generic counter.
* Can only be executed on the CPU where the counter is active.
* Returns the delta events processed.
@@ -151,8 +169,9 @@
* so we install an artificial 1<<31 period regardless of
* the generic counter period:
*/
- if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
- hwc->irq_period = 0x7FFFFFFF;
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
+ hwc->irq_period = 0x7FFFFFFF;
atomic64_set(&hwc->period_left, hwc->irq_period);
@@ -184,6 +203,22 @@
return ctrl;
}
+static u64 pmc_amd_save_disable_all(void)
+{
+ int idx;
+ u64 val, ctrl = 0;
+
+ for (idx = 0; idx < nr_counters_generic; idx++) {
+ rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
+ if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
+ ctrl |= (1 << idx);
+ val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+ }
+
+ return ctrl;
+}
+
u64 hw_perf_save_disable(void)
{
if (unlikely(!perf_counters_initialized))
@@ -198,6 +233,20 @@
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
}
+static void pmc_amd_restore_all(u64 ctrl)
+{
+ u64 val;
+ int idx;
+
+ for (idx = 0; idx < nr_counters_generic; idx++) {
+ if (ctrl & (1 << idx)) {
+ rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
+ val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+ }
+ }
+}
+
void hw_perf_restore(u64 ctrl)
{
if (unlikely(!perf_counters_initialized))
@@ -314,6 +363,9 @@
{
unsigned int event;
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ return -1;
+
if (unlikely(hwc->nmi))
return -1;
@@ -401,6 +453,7 @@
cpu = smp_processor_id();
cpuc = &per_cpu(cpu_hw_counters, cpu);
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
@@ -411,6 +464,7 @@
printk(KERN_INFO "CPU#%d: status: %016llx\n", cpu, status);
printk(KERN_INFO "CPU#%d: overflow: %016llx\n", cpu, overflow);
printk(KERN_INFO "CPU#%d: fixed: %016llx\n", cpu, fixed);
+ }
printk(KERN_INFO "CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used);
for (idx = 0; idx < nr_counters_generic; idx++) {
@@ -588,6 +642,9 @@
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
return;
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ return;
+
if (unlikely(!perf_counters_initialized))
return;
@@ -692,6 +749,15 @@
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
};
+static struct pmc_x86_ops pmc_amd_ops = {
+ .save_disable_all = pmc_amd_save_disable_all,
+ .restore_all = pmc_amd_restore_all,
+ .eventsel = MSR_K7_EVNTSEL0,
+ .perfctr = MSR_K7_PERFCTR0,
+ .event_map = pmc_amd_event_map,
+ .max_events = ARRAY_SIZE(amd_perfmon_event_map),
+};
+
static struct pmc_x86_ops *pmc_intel_init(void)
{
union cpuid10_eax eax;
@@ -719,6 +785,16 @@
return &pmc_intel_ops;
}
+static struct pmc_x86_ops *pmc_amd_init(void)
+{
+ nr_counters_generic = 4;
+ nr_counters_fixed = 0;
+
+ printk(KERN_INFO "AMD Performance Monitoring support detected.\n");
+
+ return &pmc_amd_ops;
+}
+
void __init init_hw_perf_counters(void)
{
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
@@ -728,6 +804,9 @@
case X86_VENDOR_INTEL:
pmc_ops = pmc_intel_init();
break;
+ case X86_VENDOR_AMD:
+ pmc_ops = pmc_amd_init();
+ break;
}
if (!pmc_ops)
return;