perf: Optimize perf_output_*() by avoiding local_xchg()

Since the x86 XCHG ins implies LOCK, avoid the use by
using a sequence count instead.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ce76676..fe50347 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -804,6 +804,7 @@
 	struct perf_mmap_data		*data;
 	unsigned long			head;
 	unsigned long			offset;
+	unsigned long			wakeup;
 	int				nmi;
 	int				sample;
 };