perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index deb84bb..9cb4924 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -402,6 +402,9 @@
{
s64 val, delta, prev;
+ if (event->hw.state & PERF_HES_STOPPED)
+ return;
+
if (!event->hw.idx)
return;
/*
@@ -517,7 +520,7 @@
* Disable all events to prevent PMU interrupts and to allow
* events to be added or removed.
*/
-static void power_pmu_pmu_disable(struct pmu *pmu)
+static void power_pmu_disable(struct pmu *pmu)
{
struct cpu_hw_events *cpuhw;
unsigned long flags;
@@ -565,7 +568,7 @@
* If we were previously disabled and events were added, then
* put the new config on the PMU.
*/
-static void power_pmu_pmu_enable(struct pmu *pmu)
+static void power_pmu_enable(struct pmu *pmu)
{
struct perf_event *event;
struct cpu_hw_events *cpuhw;
@@ -672,6 +675,8 @@
}
local64_set(&event->hw.prev_count, val);
event->hw.idx = idx;
+ if (event->hw.state & PERF_HES_STOPPED)
+ val = 0;
write_pmc(idx, val);
perf_event_update_userpage(event);
}
@@ -727,7 +732,7 @@
* re-enable the PMU in order to get hw_perf_enable to do the
* actual work of reconfiguring the PMU.
*/
-static int power_pmu_enable(struct perf_event *event)
+static int power_pmu_add(struct perf_event *event, int ef_flags)
{
struct cpu_hw_events *cpuhw;
unsigned long flags;
@@ -749,6 +754,9 @@
cpuhw->events[n0] = event->hw.config;
cpuhw->flags[n0] = event->hw.event_base;
+ if (!(ef_flags & PERF_EF_START))
+ event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
/*
* If group events scheduling transaction was started,
* skip the schedulability test here, it will be peformed
@@ -777,7 +785,7 @@
/*
* Remove a event from the PMU.
*/
-static void power_pmu_disable(struct perf_event *event)
+static void power_pmu_del(struct perf_event *event, int ef_flags)
{
struct cpu_hw_events *cpuhw;
long i;
@@ -826,27 +834,53 @@
}
/*
- * Re-enable interrupts on a event after they were throttled
- * because they were coming too fast.
+ * POWER-PMU does not support disabling individual counters, hence
+ * program their cycle counter to their max value and ignore the interrupts.
*/
-static void power_pmu_unthrottle(struct perf_event *event)
+
+static void power_pmu_start(struct perf_event *event, int ef_flags)
{
- s64 val, left;
+ unsigned long flags;
+ s64 left;
+
+ if (!event->hw.idx || !event->hw.sample_period)
+ return;
+
+ if (!(event->hw.state & PERF_HES_STOPPED))
+ return;
+
+ if (ef_flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+ local_irq_save(flags);
+ perf_pmu_disable(event->pmu);
+
+ event->hw.state = 0;
+ left = local64_read(&event->hw.period_left);
+ write_pmc(event->hw.idx, left);
+
+ perf_event_update_userpage(event);
+ perf_pmu_enable(event->pmu);
+ local_irq_restore(flags);
+}
+
+static void power_pmu_stop(struct perf_event *event, int ef_flags)
+{
unsigned long flags;
if (!event->hw.idx || !event->hw.sample_period)
return;
+
+ if (event->hw.state & PERF_HES_STOPPED)
+ return;
+
local_irq_save(flags);
perf_pmu_disable(event->pmu);
+
power_pmu_read(event);
- left = event->hw.sample_period;
- event->hw.last_period = left;
- val = 0;
- if (left < 0x80000000L)
- val = 0x80000000L - left;
- write_pmc(event->hw.idx, val);
- local64_set(&event->hw.prev_count, val);
- local64_set(&event->hw.period_left, left);
+ event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+ write_pmc(event->hw.idx, 0);
+
perf_event_update_userpage(event);
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
@@ -1131,13 +1165,14 @@
}
struct pmu power_pmu = {
- .pmu_enable = power_pmu_pmu_enable,
- .pmu_disable = power_pmu_pmu_disable,
+ .pmu_enable = power_pmu_enable,
+ .pmu_disable = power_pmu_disable,
.event_init = power_pmu_event_init,
- .enable = power_pmu_enable,
- .disable = power_pmu_disable,
+ .add = power_pmu_add,
+ .del = power_pmu_del,
+ .start = power_pmu_start,
+ .stop = power_pmu_stop,
.read = power_pmu_read,
- .unthrottle = power_pmu_unthrottle,
.start_txn = power_pmu_start_txn,
.cancel_txn = power_pmu_cancel_txn,
.commit_txn = power_pmu_commit_txn,
@@ -1155,6 +1190,11 @@
s64 prev, delta, left;
int record = 0;
+ if (event->hw.state & PERF_HES_STOPPED) {
+ write_pmc(event->hw.idx, 0);
+ return;
+ }
+
/* we don't have to worry about interrupts here */
prev = local64_read(&event->hw.prev_count);
delta = (val - prev) & 0xfffffffful;
@@ -1177,6 +1217,11 @@
val = 0x80000000LL - left;
}
+ write_pmc(event->hw.idx, val);
+ local64_set(&event->hw.prev_count, val);
+ local64_set(&event->hw.period_left, left);
+ perf_event_update_userpage(event);
+
/*
* Finally record data if requested.
*/
@@ -1189,23 +1234,9 @@
if (event->attr.sample_type & PERF_SAMPLE_ADDR)
perf_get_data_addr(regs, &data.addr);
- if (perf_event_overflow(event, nmi, &data, regs)) {
- /*
- * Interrupts are coming too fast - throttle them
- * by setting the event to 0, so it will be
- * at least 2^30 cycles until the next interrupt
- * (assuming each event counts at most 2 counts
- * per cycle).
- */
- val = 0;
- left = ~0ULL >> 1;
- }
+ if (perf_event_overflow(event, nmi, &data, regs))
+ power_pmu_stop(event, 0);
}
-
- write_pmc(event->hw.idx, val);
- local64_set(&event->hw.prev_count, val);
- local64_set(&event->hw.period_left, left);
- perf_event_update_userpage(event);
}
/*