blob: 995111dee7fba4ffe808ad0f99641d8d2235fb3d [file] [log] [blame]
Ingo Molnare0143ba2009-03-23 21:29:59 +01001/*
2 * kerneltop.c: show top kernel functions - performance counters showcase
3
4 Build with:
5
Paul Mackerrascbe46552009-03-24 16:52:34 +11006 cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
Ingo Molnare0143ba2009-03-23 21:29:59 +01007
8 Sample output:
9
10------------------------------------------------------------------------------
11 KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2)
12------------------------------------------------------------------------------
13
14 weight RIP kernel function
15 ______ ________________ _______________
16
17 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
18 33.00 - ffffffff804cb740 : sock_alloc_send_skb
19 31.26 - ffffffff804ce808 : skb_push
20 22.43 - ffffffff80510004 : tcp_established_options
21 19.00 - ffffffff8027d250 : find_get_page
22 15.76 - ffffffff804e4fc9 : eth_type_trans
23 15.20 - ffffffff804d8baa : dst_release
24 14.86 - ffffffff804cf5d8 : skb_release_head_state
25 14.00 - ffffffff802217d5 : read_hpet
26 12.00 - ffffffff804ffb7f : __ip_local_out
27 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
28 8.54 - ffffffff805001a3 : ip_queue_xmit
Ingo Molnare0143ba2009-03-23 21:29:59 +010029 */
Wu Fengguangf7524bd2009-03-20 10:08:06 +080030
31/*
32 * perfstat: /usr/bin/time -alike performance counter statistics utility
33
34 It summarizes the counter events of all tasks (and child tasks),
35 covering all CPUs that the command (or workload) executes on.
36 It only counts the per-task events of the workload started,
37 independent of how many other tasks run on those CPUs.
38
39 Sample output:
40
41 $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
42
43 Performance counter stats for 'ls':
44
45 163516953 instructions
46 2295 cache-misses
47 2855182 branch-misses
48 */
49
50 /*
51 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
52 *
53 * Improvements and fixes by:
54 *
55 * Arjan van de Ven <arjan@linux.intel.com>
56 * Yanmin Zhang <yanmin.zhang@intel.com>
57 * Wu Fengguang <fengguang.wu@intel.com>
58 * Mike Galbraith <efault@gmx.de>
Paul Mackerrascbe46552009-03-24 16:52:34 +110059 * Paul Mackerras <paulus@samba.org>
Wu Fengguangf7524bd2009-03-20 10:08:06 +080060 *
61 * Released under the GPL v2. (and only v2, not any later version)
62 */
63
Ingo Molnare0143ba2009-03-23 21:29:59 +010064#define _GNU_SOURCE
65#include <sys/types.h>
66#include <sys/stat.h>
67#include <sys/time.h>
68#include <unistd.h>
69#include <stdint.h>
70#include <stdlib.h>
71#include <string.h>
Paul Mackerrascbe46552009-03-24 16:52:34 +110072#include <limits.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010073#include <getopt.h>
74#include <assert.h>
75#include <fcntl.h>
76#include <stdio.h>
77#include <errno.h>
78#include <ctype.h>
79#include <time.h>
80
Ingo Molnare0143ba2009-03-23 21:29:59 +010081#include <sys/syscall.h>
82#include <sys/ioctl.h>
83#include <sys/poll.h>
84#include <sys/prctl.h>
85#include <sys/wait.h>
86#include <sys/uio.h>
Peter Zijlstrabcbcb372009-03-23 18:22:12 +010087#include <sys/mman.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010088
89#include <linux/unistd.h>
Paul Mackerrascbe46552009-03-24 16:52:34 +110090#include <linux/types.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010091
Ingo Molnar383c5f82009-03-23 21:49:25 +010092#include "../../include/linux/perf_counter.h"
Ingo Molnare0143ba2009-03-23 21:29:59 +010093
Wu Fengguangf7524bd2009-03-20 10:08:06 +080094
Peter Zijlstra803d4f32009-03-23 18:22:11 +010095/*
96 * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
97 * counters in the current task.
98 */
99#define PR_TASK_PERF_COUNTERS_DISABLE 31
100#define PR_TASK_PERF_COUNTERS_ENABLE 32
101
102#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
103
104#define rdclock() \
105({ \
106 struct timespec ts; \
107 \
108 clock_gettime(CLOCK_MONOTONIC, &ts); \
109 ts.tv_sec * 1000000000ULL + ts.tv_nsec; \
110})
111
112/*
113 * Pick up some kernel type conventions:
114 */
115#define __user
116#define asmlinkage
117
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100118#ifdef __x86_64__
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100119#define __NR_perf_counter_open 295
120#define rmb() asm volatile("lfence" ::: "memory")
121#define cpu_relax() asm volatile("rep; nop" ::: "memory");
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100122#endif
123
124#ifdef __i386__
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100125#define __NR_perf_counter_open 333
126#define rmb() asm volatile("lfence" ::: "memory")
127#define cpu_relax() asm volatile("rep; nop" ::: "memory");
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100128#endif
129
130#ifdef __powerpc__
131#define __NR_perf_counter_open 319
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100132#define rmb() asm volatile ("sync" ::: "memory")
133#define cpu_relax() asm volatile ("" ::: "memory");
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100134#endif
135
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100136#define unlikely(x) __builtin_expect(!!(x), 0)
Peter Zijlstra00f0ad72009-03-25 12:30:27 +0100137#define min(x, y) ({ \
138 typeof(x) _min1 = (x); \
139 typeof(y) _min2 = (y); \
140 (void) (&_min1 == &_min2); \
141 _min1 < _min2 ? _min1 : _min2; })
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100142
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100143asmlinkage int sys_perf_counter_open(
144 struct perf_counter_hw_event *hw_event_uptr __user,
145 pid_t pid,
146 int cpu,
147 int group_fd,
148 unsigned long flags)
149{
Paul Mackerrascbe46552009-03-24 16:52:34 +1100150 return syscall(
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100151 __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100152}
153
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800154#define MAX_COUNTERS 64
155#define MAX_NR_CPUS 256
156
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100157#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800158
159static int run_perfstat = 0;
160static int system_wide = 0;
161
162static int nr_counters = 0;
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100163static __u64 event_id[MAX_COUNTERS] = {
164 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
Paul Mackerrascbe46552009-03-24 16:52:34 +1100165 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100166 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
167 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
168
169 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
170 EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
171 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
172 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
173};
174static int default_interval = 100000;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800175static int event_count[MAX_COUNTERS];
176static int fd[MAX_NR_CPUS][MAX_COUNTERS];
Ingo Molnare0143ba2009-03-23 21:29:59 +0100177
Ingo Molnare0143ba2009-03-23 21:29:59 +0100178static __u64 count_filter = 100;
179
Ingo Molnare0143ba2009-03-23 21:29:59 +0100180static int tid = -1;
181static int profile_cpu = -1;
182static int nr_cpus = 0;
183static int nmi = 1;
184static int group = 0;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100185static unsigned int page_size;
Peter Zijlstra00f0ad72009-03-25 12:30:27 +0100186static unsigned int mmap_pages = 16;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +0200187static int use_mmap = 0;
188static int use_munmap = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100189
190static char *vmlinux;
191
192static char *sym_filter;
193static unsigned long filter_start;
194static unsigned long filter_end;
195
196static int delay_secs = 2;
197static int zero;
198static int dump_symtab;
199
200struct source_line {
201 uint64_t EIP;
202 unsigned long count;
203 char *line;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100204 struct source_line *next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100205};
206
Paul Mackerrascbe46552009-03-24 16:52:34 +1100207static struct source_line *lines;
208static struct source_line **lines_tail;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800209
210const unsigned int default_count[] = {
Ingo Molnar81cdbe02009-03-23 22:29:50 +0100211 1000000,
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800212 1000000,
213 10000,
214 10000,
215 1000000,
216 10000,
217};
218
219static char *hw_event_names[] = {
220 "CPU cycles",
221 "instructions",
222 "cache references",
223 "cache misses",
224 "branches",
225 "branch misses",
226 "bus cycles",
227};
228
229static char *sw_event_names[] = {
230 "cpu clock ticks",
231 "task clock ticks",
232 "pagefaults",
233 "context switches",
234 "CPU migrations",
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100235 "minor faults",
236 "major faults",
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800237};
238
239struct event_symbol {
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100240 __u64 event;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800241 char *symbol;
242};
243
244static struct event_symbol event_symbols[] = {
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100245 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", },
246 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", },
247 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", },
248 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", },
249 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", },
250 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", },
251 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", },
252 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", },
253 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", },
254
255 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", },
256 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", },
257 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", },
258 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", },
259 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", },
260 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", },
261 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", },
262 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", },
263 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", },
264 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", },
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800265};
266
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100267#define __PERF_COUNTER_FIELD(config, name) \
268 ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
269
270#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW)
271#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG)
272#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE)
273#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT)
274
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800275static void display_events_help(void)
276{
277 unsigned int i;
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100278 __u64 e;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800279
280 printf(
281 " -e EVENT --event=EVENT # symbolic-name abbreviations");
282
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100283 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
284 int type, id;
285
286 e = event_symbols[i].event;
287 type = PERF_COUNTER_TYPE(e);
288 id = PERF_COUNTER_ID(e);
289
290 printf("\n %d:%d: %-20s",
291 type, id, event_symbols[i].symbol);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800292 }
293
294 printf("\n"
295 " rNNN: raw PMU events (eventsel+umask)\n\n");
296}
297
298static void display_perfstat_help(void)
299{
300 printf(
301 "Usage: perfstat [<events...>] <cmd...>\n\n"
302 "PerfStat Options (up to %d event types can be specified):\n\n",
303 MAX_COUNTERS);
304
305 display_events_help();
306
307 printf(
308 " -a # system-wide collection\n");
309 exit(0);
310}
Ingo Molnare0143ba2009-03-23 21:29:59 +0100311
312static void display_help(void)
313{
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800314 if (run_perfstat)
315 return display_perfstat_help();
316
Ingo Molnare0143ba2009-03-23 21:29:59 +0100317 printf(
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800318 "Usage: kerneltop [<options>]\n"
319 " Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100320 "KernelTop Options (up to %d event types can be specified at once):\n\n",
321 MAX_COUNTERS);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800322
323 display_events_help();
324
Ingo Molnare0143ba2009-03-23 21:29:59 +0100325 printf(
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800326 " -S --stat # perfstat COMMAND\n"
327 " -a # system-wide collection (for perfstat)\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100328 " -c CNT --count=CNT # event period to sample\n\n"
329 " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n"
330 " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n"
331 " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800332 " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100333 " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800334 " -x path --vmlinux=<path> # the vmlinux binary, required for -s use\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100335 " -z --zero # zero counts after display\n"
336 " -D --dump_symtab # dump symbol table to stderr on startup\n"
Peter Zijlstra4c4ba212009-03-25 12:30:26 +0100337 " -m pages --mmap_pages=<pages> # number of mmap data pages\n"
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +0200338 " -M --mmap_info # print mmap info stream\n"
339 " -U --munmap_info # print munmap info stream\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800340 );
Ingo Molnare0143ba2009-03-23 21:29:59 +0100341
342 exit(0);
343}
344
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800345static char *event_name(int ctr)
346{
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100347 __u64 config = event_id[ctr];
348 int type = PERF_COUNTER_TYPE(config);
349 int id = PERF_COUNTER_ID(config);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800350 static char buf[32];
351
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100352 if (PERF_COUNTER_RAW(config)) {
353 sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800354 return buf;
355 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800356
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100357 switch (type) {
358 case PERF_TYPE_HARDWARE:
359 if (id < PERF_HW_EVENTS_MAX)
360 return hw_event_names[id];
361 return "unknown-hardware";
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800362
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100363 case PERF_TYPE_SOFTWARE:
364 if (id < PERF_SW_EVENTS_MAX)
365 return sw_event_names[id];
366 return "unknown-software";
367
368 default:
369 break;
370 }
371
372 return "unknown";
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800373}
374
375/*
376 * Each event can have multiple symbolic names.
377 * Symbolic names are (almost) exactly matched.
378 */
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100379static __u64 match_event_symbols(char *str)
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800380{
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100381 __u64 config, id;
382 int type;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800383 unsigned int i;
384
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100385 if (sscanf(str, "r%llx", &config) == 1)
386 return config | PERF_COUNTER_RAW_MASK;
387
388 if (sscanf(str, "%d:%llu", &type, &id) == 2)
389 return EID(type, id);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800390
391 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
392 if (!strncmp(str, event_symbols[i].symbol,
393 strlen(event_symbols[i].symbol)))
394 return event_symbols[i].event;
395 }
396
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100397 return ~0ULL;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800398}
399
400static int parse_events(char *str)
401{
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100402 __u64 config;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800403
404again:
405 if (nr_counters == MAX_COUNTERS)
406 return -1;
407
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100408 config = match_event_symbols(str);
409 if (config == ~0ULL)
410 return -1;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800411
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100412 event_id[nr_counters] = config;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800413 nr_counters++;
414
415 str = strstr(str, ",");
416 if (str) {
417 str++;
418 goto again;
419 }
420
421 return 0;
422}
423
424
425/*
426 * perfstat
427 */
428
429char fault_here[1000000];
430
431static void create_perfstat_counter(int counter)
432{
433 struct perf_counter_hw_event hw_event;
434
435 memset(&hw_event, 0, sizeof(hw_event));
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100436 hw_event.config = event_id[counter];
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800437 hw_event.record_type = PERF_RECORD_SIMPLE;
438 hw_event.nmi = 0;
439
440 if (system_wide) {
441 int cpu;
442 for (cpu = 0; cpu < nr_cpus; cpu ++) {
443 fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
444 if (fd[cpu][counter] < 0) {
445 printf("perfstat error: syscall returned with %d (%s)\n",
446 fd[cpu][counter], strerror(errno));
447 exit(-1);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100448 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800449 }
450 } else {
451 hw_event.inherit = 1;
452 hw_event.disabled = 1;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100453
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800454 fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
455 if (fd[0][counter] < 0) {
456 printf("perfstat error: syscall returned with %d (%s)\n",
457 fd[0][counter], strerror(errno));
458 exit(-1);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100459 }
460 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800461}
Ingo Molnare0143ba2009-03-23 21:29:59 +0100462
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800463int do_perfstat(int argc, char *argv[])
464{
465 unsigned long long t0, t1;
466 int counter;
467 ssize_t res;
468 int status;
469 int pid;
470
471 if (!system_wide)
472 nr_cpus = 1;
473
474 for (counter = 0; counter < nr_counters; counter++)
475 create_perfstat_counter(counter);
476
477 argc -= optind;
478 argv += optind;
479
Wu Fengguangaf9522c2009-03-20 10:08:10 +0800480 if (!argc)
481 display_help();
482
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800483 /*
484 * Enable counters and exec the command:
485 */
486 t0 = rdclock();
487 prctl(PR_TASK_PERF_COUNTERS_ENABLE);
488
489 if ((pid = fork()) < 0)
490 perror("failed to fork");
491 if (!pid) {
492 if (execvp(argv[0], argv)) {
493 perror(argv[0]);
494 exit(-1);
495 }
Wu Fengguang95bb3be2009-03-20 10:08:04 +0800496 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800497 while (wait(&status) >= 0)
498 ;
499 prctl(PR_TASK_PERF_COUNTERS_DISABLE);
500 t1 = rdclock();
501
502 fflush(stdout);
503
504 fprintf(stderr, "\n");
505 fprintf(stderr, " Performance counter stats for \'%s\':\n",
506 argv[0]);
507 fprintf(stderr, "\n");
Ingo Molnare0143ba2009-03-23 21:29:59 +0100508
509 for (counter = 0; counter < nr_counters; counter++) {
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800510 int cpu;
511 __u64 count, single_count;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100512
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800513 count = 0;
514 for (cpu = 0; cpu < nr_cpus; cpu ++) {
515 res = read(fd[cpu][counter],
516 (char *) &single_count, sizeof(single_count));
517 assert(res == sizeof(single_count));
518 count += single_count;
519 }
520
Paul Mackerrascbe46552009-03-24 16:52:34 +1100521 if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
522 event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800523
524 double msecs = (double)count / 1000000;
525
526 fprintf(stderr, " %14.6f %-20s (msecs)\n",
527 msecs, event_name(counter));
528 } else {
529 fprintf(stderr, " %14Ld %-20s (events)\n",
530 count, event_name(counter));
531 }
Ingo Molnare0143ba2009-03-23 21:29:59 +0100532 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800533 fprintf(stderr, "\n");
534 fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
535 (double)(t1-t0)/1e6);
536 fprintf(stderr, "\n");
537
538 return 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100539}
540
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800541/*
542 * Symbols
543 */
544
Ingo Molnare0143ba2009-03-23 21:29:59 +0100545static uint64_t min_ip;
546static uint64_t max_ip = -1ll;
547
548struct sym_entry {
549 unsigned long long addr;
550 char *sym;
551 unsigned long count[MAX_COUNTERS];
552 int skip;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100553 struct source_line *source;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100554};
555
556#define MAX_SYMS 100000
557
558static int sym_table_count;
559
560struct sym_entry *sym_filter_entry;
561
562static struct sym_entry sym_table[MAX_SYMS];
563
564static void show_details(struct sym_entry *sym);
565
566/*
Wu Fengguangef45fa92009-03-20 10:08:07 +0800567 * Ordering weight: count-1 * count-2 * ... / count-n
Ingo Molnare0143ba2009-03-23 21:29:59 +0100568 */
569static double sym_weight(const struct sym_entry *sym)
570{
571 double weight;
572 int counter;
573
574 weight = sym->count[0];
575
576 for (counter = 1; counter < nr_counters-1; counter++)
577 weight *= sym->count[counter];
578
579 weight /= (sym->count[counter] + 1);
580
581 return weight;
582}
583
584static int compare(const void *__sym1, const void *__sym2)
585{
586 const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
587
588 return sym_weight(sym1) < sym_weight(sym2);
589}
590
591static time_t last_refresh;
592static long events;
593static long userspace_events;
594static const char CONSOLE_CLEAR[] = "";
595
596static struct sym_entry tmp[MAX_SYMS];
597
598static void print_sym_table(void)
599{
600 int i, printed;
601 int counter;
602 float events_per_sec = events/delay_secs;
603 float kevents_per_sec = (events-userspace_events)/delay_secs;
604
605 memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count);
606 qsort(tmp, sym_table_count, sizeof(tmp[0]), compare);
607
608 write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
609
610 printf(
611"------------------------------------------------------------------------------\n");
612 printf( " KernelTop:%8.0f irqs/sec kernel:%3.1f%% [%s, ",
613 events_per_sec,
614 100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)),
615 nmi ? "NMI" : "IRQ");
616
617 if (nr_counters == 1)
618 printf("%d ", event_count[0]);
619
620 for (counter = 0; counter < nr_counters; counter++) {
621 if (counter)
622 printf("/");
623
Wu Fengguange3908612009-03-20 10:08:05 +0800624 printf("%s", event_name(counter));
Ingo Molnare0143ba2009-03-23 21:29:59 +0100625 }
626
627 printf( "], ");
628
629 if (tid != -1)
630 printf(" (tid: %d", tid);
631 else
632 printf(" (all");
633
634 if (profile_cpu != -1)
635 printf(", cpu: %d)\n", profile_cpu);
636 else {
637 if (tid != -1)
638 printf(")\n");
639 else
640 printf(", %d CPUs)\n", nr_cpus);
641 }
642
643 printf("------------------------------------------------------------------------------\n\n");
644
645 if (nr_counters == 1)
646 printf(" events");
647 else
648 printf(" weight events");
649
650 printf(" RIP kernel function\n"
651 " ______ ______ ________________ _______________\n\n"
652 );
653
654 printed = 0;
655 for (i = 0; i < sym_table_count; i++) {
656 int count;
657
658 if (nr_counters == 1) {
659 if (printed <= 18 &&
660 tmp[i].count[0] >= count_filter) {
661 printf("%19.2f - %016llx : %s\n",
662 sym_weight(tmp + i), tmp[i].addr, tmp[i].sym);
663 printed++;
664 }
665 } else {
666 if (printed <= 18 &&
667 tmp[i].count[0] >= count_filter) {
668 printf("%8.1f %10ld - %016llx : %s\n",
669 sym_weight(tmp + i),
670 tmp[i].count[0],
671 tmp[i].addr, tmp[i].sym);
672 printed++;
673 }
674 }
675 /*
676 * Add decay to the counts:
677 */
678 for (count = 0; count < nr_counters; count++)
679 sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8;
680 }
681
682 if (sym_filter_entry)
683 show_details(sym_filter_entry);
684
685 last_refresh = time(NULL);
686
687 {
688 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
689
690 if (poll(&stdin_poll, 1, 0) == 1) {
691 printf("key pressed - exiting.\n");
692 exit(0);
693 }
694 }
695}
696
697static int read_symbol(FILE *in, struct sym_entry *s)
698{
699 static int filter_match = 0;
700 char *sym, stype;
701 char str[500];
702 int rc, pos;
703
704 rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str);
705 if (rc == EOF)
706 return -1;
707
708 assert(rc == 3);
709
710 /* skip until end of line: */
711 pos = strlen(str);
712 do {
713 rc = fgetc(in);
714 if (rc == '\n' || rc == EOF || pos >= 499)
715 break;
716 str[pos] = rc;
717 pos++;
718 } while (1);
719 str[pos] = 0;
720
721 sym = str;
722
723 /* Filter out known duplicates and non-text symbols. */
724 if (!strcmp(sym, "_text"))
725 return 1;
726 if (!min_ip && !strcmp(sym, "_stext"))
727 return 1;
728 if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext"))
729 return 1;
730 if (stype != 'T' && stype != 't')
731 return 1;
732 if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14))
733 return 1;
734 if (strstr(sym, "_text_start") || strstr(sym, "_text_end"))
735 return 1;
736
737 s->sym = malloc(strlen(str));
738 assert(s->sym);
739
740 strcpy((char *)s->sym, str);
741 s->skip = 0;
742
743 /* Tag events to be skipped. */
744 if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym))
745 s->skip = 1;
Peter Zijlstra4c4ba212009-03-25 12:30:26 +0100746 else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
747 s->skip = 1;
748 else if (!strcmp("mwait_idle", s->sym))
Ingo Molnare0143ba2009-03-23 21:29:59 +0100749 s->skip = 1;
750
751 if (filter_match == 1) {
752 filter_end = s->addr;
753 filter_match = -1;
754 if (filter_end - filter_start > 10000) {
755 printf("hm, too large filter symbol <%s> - skipping.\n",
756 sym_filter);
757 printf("symbol filter start: %016lx\n", filter_start);
758 printf(" end: %016lx\n", filter_end);
759 filter_end = filter_start = 0;
760 sym_filter = NULL;
761 sleep(1);
762 }
763 }
764 if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) {
765 filter_match = 1;
766 filter_start = s->addr;
767 }
768
769 return 0;
770}
771
772int compare_addr(const void *__sym1, const void *__sym2)
773{
774 const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
775
776 return sym1->addr > sym2->addr;
777}
778
779static void sort_symbol_table(void)
780{
781 int i, dups;
782
783 do {
784 qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr);
785 for (i = 0, dups = 0; i < sym_table_count; i++) {
786 if (sym_table[i].addr == sym_table[i+1].addr) {
787 sym_table[i+1].addr = -1ll;
788 dups++;
789 }
790 }
791 sym_table_count -= dups;
792 } while(dups);
793}
794
795static void parse_symbols(void)
796{
797 struct sym_entry *last;
798
799 FILE *kallsyms = fopen("/proc/kallsyms", "r");
800
801 if (!kallsyms) {
802 printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n");
803 exit(-1);
804 }
805
806 while (!feof(kallsyms)) {
807 if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) {
808 sym_table_count++;
809 assert(sym_table_count <= MAX_SYMS);
810 }
811 }
812
813 sort_symbol_table();
814 min_ip = sym_table[0].addr;
815 max_ip = sym_table[sym_table_count-1].addr;
816 last = sym_table + sym_table_count++;
817
818 last->addr = -1ll;
819 last->sym = "<end>";
820
821 if (filter_end) {
822 int count;
823 for (count=0; count < sym_table_count; count ++) {
824 if (!strcmp(sym_table[count].sym, sym_filter)) {
825 sym_filter_entry = &sym_table[count];
826 break;
827 }
828 }
829 }
830 if (dump_symtab) {
831 int i;
832
833 for (i = 0; i < sym_table_count; i++)
834 fprintf(stderr, "%llx %s\n",
835 sym_table[i].addr, sym_table[i].sym);
836 }
837}
838
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800839/*
840 * Source lines
841 */
Ingo Molnare0143ba2009-03-23 21:29:59 +0100842
843static void parse_vmlinux(char *filename)
844{
845 FILE *file;
846 char command[PATH_MAX*2];
847 if (!filename)
848 return;
849
850 sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename);
851
852 file = popen(command, "r");
853 if (!file)
854 return;
855
Paul Mackerrascbe46552009-03-24 16:52:34 +1100856 lines_tail = &lines;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100857 while (!feof(file)) {
858 struct source_line *src;
859 size_t dummy = 0;
860 char *c;
861
862 src = malloc(sizeof(struct source_line));
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800863 assert(src != NULL);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100864 memset(src, 0, sizeof(struct source_line));
865
866 if (getline(&src->line, &dummy, file) < 0)
867 break;
868 if (!src->line)
869 break;
870
871 c = strchr(src->line, '\n');
872 if (c)
873 *c = 0;
874
Paul Mackerrascbe46552009-03-24 16:52:34 +1100875 src->next = NULL;
876 *lines_tail = src;
877 lines_tail = &src->next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100878
879 if (strlen(src->line)>8 && src->line[8] == ':')
880 src->EIP = strtoull(src->line, NULL, 16);
881 if (strlen(src->line)>8 && src->line[16] == ':')
882 src->EIP = strtoull(src->line, NULL, 16);
883 }
884 pclose(file);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100885}
886
887static void record_precise_ip(uint64_t ip)
888{
889 struct source_line *line;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100890
Paul Mackerrascbe46552009-03-24 16:52:34 +1100891 for (line = lines; line; line = line->next) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100892 if (line->EIP == ip)
893 line->count++;
894 if (line->EIP > ip)
895 break;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100896 }
897}
898
899static void lookup_sym_in_vmlinux(struct sym_entry *sym)
900{
901 struct source_line *line;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100902 char pattern[PATH_MAX];
903 sprintf(pattern, "<%s>:", sym->sym);
904
Paul Mackerrascbe46552009-03-24 16:52:34 +1100905 for (line = lines; line; line = line->next) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100906 if (strstr(line->line, pattern)) {
Paul Mackerrascbe46552009-03-24 16:52:34 +1100907 sym->source = line;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100908 break;
909 }
Ingo Molnare0143ba2009-03-23 21:29:59 +0100910 }
911}
912
Paul Mackerrascbe46552009-03-24 16:52:34 +1100913static void show_lines(struct source_line *line_queue, int line_queue_count)
Ingo Molnare0143ba2009-03-23 21:29:59 +0100914{
915 int i;
916 struct source_line *line;
917
Paul Mackerrascbe46552009-03-24 16:52:34 +1100918 line = line_queue;
919 for (i = 0; i < line_queue_count; i++) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100920 printf("%8li\t%s\n", line->count, line->line);
Paul Mackerrascbe46552009-03-24 16:52:34 +1100921 line = line->next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100922 }
923}
924
925#define TRACE_COUNT 3
926
927static void show_details(struct sym_entry *sym)
928{
929 struct source_line *line;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100930 struct source_line *line_queue = NULL;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100931 int displayed = 0;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100932 int line_queue_count = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100933
934 if (!sym->source)
935 lookup_sym_in_vmlinux(sym);
936 if (!sym->source)
937 return;
938
939 printf("Showing details for %s\n", sym->sym);
940
Paul Mackerrascbe46552009-03-24 16:52:34 +1100941 line = sym->source;
942 while (line) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100943 if (displayed && strstr(line->line, ">:"))
944 break;
945
Paul Mackerrascbe46552009-03-24 16:52:34 +1100946 if (!line_queue_count)
947 line_queue = line;
948 line_queue_count ++;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100949
950 if (line->count >= count_filter) {
Paul Mackerrascbe46552009-03-24 16:52:34 +1100951 show_lines(line_queue, line_queue_count);
952 line_queue_count = 0;
953 line_queue = NULL;
954 } else if (line_queue_count > TRACE_COUNT) {
955 line_queue = line_queue->next;
956 line_queue_count --;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100957 }
958
959 line->count = 0;
960 displayed++;
961 if (displayed > 300)
962 break;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100963 line = line->next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100964 }
965}
966
967/*
968 * Binary search in the histogram table and record the hit:
969 */
970static void record_ip(uint64_t ip, int counter)
971{
972 int left_idx, middle_idx, right_idx, idx;
973 unsigned long left, middle, right;
974
975 record_precise_ip(ip);
976
977 left_idx = 0;
978 right_idx = sym_table_count-1;
979 assert(ip <= max_ip && ip >= min_ip);
980
981 while (left_idx + 1 < right_idx) {
982 middle_idx = (left_idx + right_idx) / 2;
983
984 left = sym_table[ left_idx].addr;
985 middle = sym_table[middle_idx].addr;
986 right = sym_table[ right_idx].addr;
987
988 if (!(left <= middle && middle <= right)) {
989 printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right);
990 printf("%d %d %d\n", left_idx, middle_idx, right_idx);
991 }
992 assert(left <= middle && middle <= right);
993 if (!(left <= ip && ip <= right)) {
994 printf(" left: %016lx\n", left);
Ingo Molnar193e8df2009-03-23 22:23:16 +0100995 printf(" ip: %016lx\n", (unsigned long)ip);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100996 printf("right: %016lx\n", right);
997 }
998 assert(left <= ip && ip <= right);
999 /*
1000 * [ left .... target .... middle .... right ]
1001 * => right := middle
1002 */
1003 if (ip < middle) {
1004 right_idx = middle_idx;
1005 continue;
1006 }
1007 /*
1008 * [ left .... middle ... target ... right ]
1009 * => left := middle
1010 */
1011 left_idx = middle_idx;
1012 }
1013
1014 idx = left_idx;
1015
1016 if (!sym_table[idx].skip)
1017 sym_table[idx].count[counter]++;
1018 else events--;
1019}
1020
1021static void process_event(uint64_t ip, int counter)
1022{
1023 events++;
1024
1025 if (ip < min_ip || ip > max_ip) {
1026 userspace_events++;
1027 return;
1028 }
1029
1030 record_ip(ip, counter);
1031}
1032
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001033static void process_options(int argc, char *argv[])
1034{
1035 int error = 0, counter;
1036
1037 if (strstr(argv[0], "perfstat"))
1038 run_perfstat = 1;
1039
1040 for (;;) {
1041 int option_index = 0;
1042 /** Options for getopt */
1043 static struct option long_options[] = {
1044 {"count", required_argument, NULL, 'c'},
1045 {"cpu", required_argument, NULL, 'C'},
1046 {"delay", required_argument, NULL, 'd'},
1047 {"dump_symtab", no_argument, NULL, 'D'},
1048 {"event", required_argument, NULL, 'e'},
1049 {"filter", required_argument, NULL, 'f'},
1050 {"group", required_argument, NULL, 'g'},
1051 {"help", no_argument, NULL, 'h'},
1052 {"nmi", required_argument, NULL, 'n'},
1053 {"pid", required_argument, NULL, 'p'},
1054 {"vmlinux", required_argument, NULL, 'x'},
1055 {"symbol", required_argument, NULL, 's'},
1056 {"stat", no_argument, NULL, 'S'},
1057 {"zero", no_argument, NULL, 'z'},
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001058 {"mmap_pages", required_argument, NULL, 'm'},
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001059 {"mmap_info", no_argument, NULL, 'M'},
1060 {"munmap_info", no_argument, NULL, 'U'},
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001061 {NULL, 0, NULL, 0 }
1062 };
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001063 int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:m:p:s:Sx:zMU",
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001064 long_options, &option_index);
1065 if (c == -1)
1066 break;
1067
1068 switch (c) {
1069 case 'a': system_wide = 1; break;
Peter Zijlstra803d4f32009-03-23 18:22:11 +01001070 case 'c': default_interval = atoi(optarg); break;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001071 case 'C':
1072 /* CPU and PID are mutually exclusive */
1073 if (tid != -1) {
1074 printf("WARNING: CPU switch overriding PID\n");
1075 sleep(1);
1076 tid = -1;
1077 }
1078 profile_cpu = atoi(optarg); break;
1079 case 'd': delay_secs = atoi(optarg); break;
1080 case 'D': dump_symtab = 1; break;
1081
1082 case 'e': error = parse_events(optarg); break;
1083
1084 case 'f': count_filter = atoi(optarg); break;
1085 case 'g': group = atoi(optarg); break;
1086 case 'h': display_help(); break;
1087 case 'n': nmi = atoi(optarg); break;
1088 case 'p':
1089 /* CPU and PID are mutually exclusive */
1090 if (profile_cpu != -1) {
1091 printf("WARNING: PID switch overriding CPU\n");
1092 sleep(1);
1093 profile_cpu = -1;
1094 }
1095 tid = atoi(optarg); break;
1096 case 's': sym_filter = strdup(optarg); break;
1097 case 'S': run_perfstat = 1; break;
1098 case 'x': vmlinux = strdup(optarg); break;
1099 case 'z': zero = 1; break;
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001100 case 'm': mmap_pages = atoi(optarg); break;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001101 case 'M': use_mmap = 1; break;
1102 case 'U': use_munmap = 1; break;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001103 default: error = 1; break;
1104 }
1105 }
1106 if (error)
1107 display_help();
1108
1109 if (!nr_counters) {
1110 if (run_perfstat)
1111 nr_counters = 8;
1112 else {
1113 nr_counters = 1;
1114 event_id[0] = 0;
1115 }
1116 }
1117
1118 for (counter = 0; counter < nr_counters; counter++) {
1119 if (event_count[counter])
1120 continue;
1121
Peter Zijlstra803d4f32009-03-23 18:22:11 +01001122 event_count[counter] = default_interval;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001123 }
1124}
1125
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001126struct mmap_data {
1127 int counter;
1128 void *base;
1129 unsigned int mask;
1130 unsigned int prev;
1131};
1132
1133static unsigned int mmap_read_head(struct mmap_data *md)
1134{
1135 struct perf_counter_mmap_page *pc = md->base;
Peter Zijlstra19556432009-03-30 19:07:04 +02001136 int head;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001137
1138 head = pc->data_head;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001139 rmb();
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001140
1141 return head;
1142}
1143
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001144struct timeval last_read, this_read;
1145
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001146static void mmap_read(struct mmap_data *md)
1147{
1148 unsigned int head = mmap_read_head(md);
1149 unsigned int old = md->prev;
1150 unsigned char *data = md->base + page_size;
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001151 int diff;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001152
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001153 gettimeofday(&this_read, NULL);
1154
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001155 /*
1156 * If we're further behind than half the buffer, there's a chance
1157 * the writer will bite our tail and screw up the events under us.
1158 *
1159 * If we somehow ended up ahead of the head, we got messed up.
1160 *
1161 * In either case, truncate and restart at head.
1162 */
1163 diff = head - old;
1164 if (diff > md->mask / 2 || diff < 0) {
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001165 struct timeval iv;
1166 unsigned long msecs;
1167
1168 timersub(&this_read, &last_read, &iv);
1169 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
1170
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001171 fprintf(stderr, "WARNING: failed to keep up with mmap data."
1172 " Last read %lu msecs ago.\n", msecs);
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001173
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001174 /*
1175 * head points to a known good entry, start there.
1176 */
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001177 old = head;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001178 }
1179
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001180 last_read = this_read;
1181
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001182 for (; old != head;) {
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001183 struct ip_event {
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001184 struct perf_event_header header;
1185 __u64 ip;
1186 __u32 pid, tid;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001187 };
1188 struct mmap_event {
1189 struct perf_event_header header;
1190 __u32 pid, tid;
1191 __u64 start;
1192 __u64 len;
1193 __u64 pgoff;
1194 char filename[PATH_MAX];
1195 };
1196
1197 typedef union event_union {
1198 struct perf_event_header header;
1199 struct ip_event ip;
1200 struct mmap_event mmap;
1201 } event_t;
1202
1203 event_t *event = (event_t *)&data[old & md->mask];
1204
1205 event_t event_copy;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001206
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001207 unsigned int size = event->header.size;
1208
1209 /*
1210 * Event straddles the mmap boundary -- header should always
1211 * be inside due to u64 alignment of output.
1212 */
1213 if ((old & md->mask) + size != ((old + size) & md->mask)) {
1214 unsigned int offset = old;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001215 unsigned int len = min(sizeof(*event), size), cpy;
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001216 void *dst = &event_copy;
1217
1218 do {
1219 cpy = min(md->mask + 1 - (offset & md->mask), len);
1220 memcpy(dst, &data[offset & md->mask], cpy);
1221 offset += cpy;
1222 dst += cpy;
1223 len -= cpy;
1224 } while (len);
1225
1226 event = &event_copy;
1227 }
1228
1229 old += size;
1230
1231 switch (event->header.type) {
1232 case PERF_EVENT_IP:
1233 case PERF_EVENT_IP | __PERF_EVENT_TID:
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001234 process_event(event->ip.ip, md->counter);
1235 break;
1236
1237 case PERF_EVENT_MMAP:
1238 case PERF_EVENT_MUNMAP:
1239 printf("%s: %Lu %Lu %Lu %s\n",
1240 event->header.type == PERF_EVENT_MMAP
1241 ? "mmap" : "munmap",
1242 event->mmap.start,
1243 event->mmap.len,
1244 event->mmap.pgoff,
1245 event->mmap.filename);
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001246 break;
1247 }
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001248 }
1249
1250 md->prev = old;
1251}
1252
Ingo Molnare0143ba2009-03-23 21:29:59 +01001253int main(int argc, char *argv[])
1254{
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001255 struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001256 struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
Ingo Molnare0143ba2009-03-23 21:29:59 +01001257 struct perf_counter_hw_event hw_event;
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001258 int i, counter, group_fd, nr_poll = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001259 unsigned int cpu;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001260 int ret;
1261
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001262 page_size = sysconf(_SC_PAGE_SIZE);
1263
Ingo Molnare0143ba2009-03-23 21:29:59 +01001264 process_options(argc, argv);
1265
1266 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001267 assert(nr_cpus <= MAX_NR_CPUS);
1268 assert(nr_cpus >= 0);
1269
1270 if (run_perfstat)
1271 return do_perfstat(argc, argv);
1272
Ingo Molnare0143ba2009-03-23 21:29:59 +01001273 if (tid != -1 || profile_cpu != -1)
1274 nr_cpus = 1;
1275
Paul Mackerrascbe46552009-03-24 16:52:34 +11001276 parse_symbols();
1277 if (vmlinux && sym_filter_entry)
1278 parse_vmlinux(vmlinux);
1279
Ingo Molnare0143ba2009-03-23 21:29:59 +01001280 for (i = 0; i < nr_cpus; i++) {
1281 group_fd = -1;
1282 for (counter = 0; counter < nr_counters; counter++) {
1283
1284 cpu = profile_cpu;
1285 if (tid == -1 && profile_cpu == -1)
1286 cpu = i;
1287
1288 memset(&hw_event, 0, sizeof(hw_event));
Peter Zijlstra803d4f32009-03-23 18:22:11 +01001289 hw_event.config = event_id[counter];
Ingo Molnare0143ba2009-03-23 21:29:59 +01001290 hw_event.irq_period = event_count[counter];
1291 hw_event.record_type = PERF_RECORD_IRQ;
1292 hw_event.nmi = nmi;
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001293 hw_event.include_tid = 1;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001294 hw_event.mmap = use_mmap;
1295 hw_event.munmap = use_munmap;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001296
Ingo Molnare0143ba2009-03-23 21:29:59 +01001297 fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001298 if (fd[i][counter] < 0) {
Paul Mackerrascbe46552009-03-24 16:52:34 +11001299 int err = errno;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001300 printf("kerneltop error: syscall returned with %d (%s)\n",
Paul Mackerrascbe46552009-03-24 16:52:34 +11001301 fd[i][counter], strerror(err));
1302 if (err == EPERM)
Ingo Molnare0143ba2009-03-23 21:29:59 +01001303 printf("Are you root?\n");
1304 exit(-1);
1305 }
1306 assert(fd[i][counter] >= 0);
Paul Mackerrascbe46552009-03-24 16:52:34 +11001307 fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001308
1309 /*
1310 * First counter acts as the group leader:
1311 */
1312 if (group && group_fd == -1)
1313 group_fd = fd[i][counter];
1314
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001315 event_array[nr_poll].fd = fd[i][counter];
1316 event_array[nr_poll].events = POLLIN;
1317 nr_poll++;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001318
1319 mmap_array[i][counter].counter = counter;
1320 mmap_array[i][counter].prev = 0;
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001321 mmap_array[i][counter].mask = mmap_pages*page_size - 1;
1322 mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001323 PROT_READ, MAP_SHARED, fd[i][counter], 0);
1324 if (mmap_array[i][counter].base == MAP_FAILED) {
1325 printf("kerneltop error: failed to mmap with %d (%s)\n",
1326 errno, strerror(errno));
1327 exit(-1);
1328 }
Ingo Molnare0143ba2009-03-23 21:29:59 +01001329 }
1330 }
1331
Ingo Molnare0143ba2009-03-23 21:29:59 +01001332 printf("KernelTop refresh period: %d seconds\n", delay_secs);
1333 last_refresh = time(NULL);
1334
1335 while (1) {
1336 int hits = events;
1337
1338 for (i = 0; i < nr_cpus; i++) {
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001339 for (counter = 0; counter < nr_counters; counter++)
1340 mmap_read(&mmap_array[i][counter]);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001341 }
1342
1343 if (time(NULL) >= last_refresh + delay_secs) {
1344 print_sym_table();
1345 events = userspace_events = 0;
1346 }
1347
1348 if (hits == events)
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001349 ret = poll(event_array, nr_poll, 1000);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001350 hits = events;
1351 }
1352
1353 return 0;
1354}