perf: Optimize perf_output_*() by avoiding local_xchg()
Since the x86 XCHG ins implies LOCK, avoid the use by
using a sequence count instead.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 1f98c78..7e3bcf1 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2917,6 +2917,7 @@
preempt_disable();
local_inc(&data->nest);
+ handle->wakeup = local_read(&data->wakeup);
}
static void perf_output_put_handle(struct perf_output_handle *handle)
@@ -2950,7 +2951,7 @@
goto again;
}
- if (local_xchg(&data->wakeup, 0))
+ if (handle->wakeup != local_read(&data->wakeup))
perf_output_wakeup(handle);
preempt_enable();