perf_counter: add more context information
Put in counts to tell which ips belong to what context.
-----
| | hv
| --
nr | | kernel
| --
| | user
-----
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Orig-LKML-Reference: <20090402091319.493101305@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 2a946a1..c74e20d 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -1088,6 +1088,7 @@
{
unsigned long bp;
char *stack;
+ int nr = entry->nr;
callchain_store(entry, instruction_pointer(regs));
@@ -1099,6 +1100,8 @@
#endif
dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry);
+
+ entry->kernel = entry->nr - nr;
}
@@ -1128,6 +1131,7 @@
{
struct stack_frame frame;
const void __user *fp;
+ int nr = entry->nr;
regs = (struct pt_regs *)current->thread.sp0 - 1;
fp = (void __user *)regs->bp;
@@ -1147,6 +1151,8 @@
callchain_store(entry, frame.return_address);
fp = frame.next_fp;
}
+
+ entry->user = entry->nr - nr;
}
static void
@@ -1182,6 +1188,9 @@
entry = &__get_cpu_var(irq_entry);
entry->nr = 0;
+ entry->hv = 0;
+ entry->kernel = 0;
+ entry->user = 0;
perf_do_callchain(regs, entry);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 5428ba1..90cce0c 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -513,10 +513,10 @@
extern void perf_counter_munmap(unsigned long addr, unsigned long len,
unsigned long pgoff, struct file *file);
-#define MAX_STACK_DEPTH 255
+#define MAX_STACK_DEPTH 254
struct perf_callchain_entry {
- u64 nr;
+ u32 nr, hv, kernel, user;
u64 ip[MAX_STACK_DEPTH];
};
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 9bcab10..f105a6e 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1819,7 +1819,7 @@
callchain = perf_callchain(regs);
if (callchain) {
- callchain_size = (1 + callchain->nr) * sizeof(u64);
+ callchain_size = (2 + callchain->nr) * sizeof(u64);
header.type |= __PERF_EVENT_CALLCHAIN;
header.size += callchain_size;