blob: 33b4fcf6e489f0d5d20e3da894399c087ad72dd9 [file] [log] [blame]
Ingo Molnare0143ba2009-03-23 21:29:59 +01001/*
2 * kerneltop.c: show top kernel functions - performance counters showcase
3
4 Build with:
5
Paul Mackerrascbe46552009-03-24 16:52:34 +11006 cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
Ingo Molnare0143ba2009-03-23 21:29:59 +01007
8 Sample output:
9
10------------------------------------------------------------------------------
11 KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2)
12------------------------------------------------------------------------------
13
14 weight RIP kernel function
15 ______ ________________ _______________
16
17 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
18 33.00 - ffffffff804cb740 : sock_alloc_send_skb
19 31.26 - ffffffff804ce808 : skb_push
20 22.43 - ffffffff80510004 : tcp_established_options
21 19.00 - ffffffff8027d250 : find_get_page
22 15.76 - ffffffff804e4fc9 : eth_type_trans
23 15.20 - ffffffff804d8baa : dst_release
24 14.86 - ffffffff804cf5d8 : skb_release_head_state
25 14.00 - ffffffff802217d5 : read_hpet
26 12.00 - ffffffff804ffb7f : __ip_local_out
27 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
28 8.54 - ffffffff805001a3 : ip_queue_xmit
Ingo Molnare0143ba2009-03-23 21:29:59 +010029 */
Wu Fengguangf7524bd2009-03-20 10:08:06 +080030
31/*
32 * perfstat: /usr/bin/time -alike performance counter statistics utility
33
34 It summarizes the counter events of all tasks (and child tasks),
35 covering all CPUs that the command (or workload) executes on.
36 It only counts the per-task events of the workload started,
37 independent of how many other tasks run on those CPUs.
38
39 Sample output:
40
41 $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
42
43 Performance counter stats for 'ls':
44
45 163516953 instructions
46 2295 cache-misses
47 2855182 branch-misses
48 */
49
50 /*
51 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
52 *
53 * Improvements and fixes by:
54 *
55 * Arjan van de Ven <arjan@linux.intel.com>
56 * Yanmin Zhang <yanmin.zhang@intel.com>
57 * Wu Fengguang <fengguang.wu@intel.com>
58 * Mike Galbraith <efault@gmx.de>
Paul Mackerrascbe46552009-03-24 16:52:34 +110059 * Paul Mackerras <paulus@samba.org>
Wu Fengguangf7524bd2009-03-20 10:08:06 +080060 *
61 * Released under the GPL v2. (and only v2, not any later version)
62 */
63
Ingo Molnare0143ba2009-03-23 21:29:59 +010064#define _GNU_SOURCE
65#include <sys/types.h>
66#include <sys/stat.h>
67#include <sys/time.h>
68#include <unistd.h>
69#include <stdint.h>
70#include <stdlib.h>
71#include <string.h>
Paul Mackerrascbe46552009-03-24 16:52:34 +110072#include <limits.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010073#include <getopt.h>
74#include <assert.h>
75#include <fcntl.h>
76#include <stdio.h>
77#include <errno.h>
78#include <ctype.h>
79#include <time.h>
Mike Galbraith9dd49982009-03-27 12:13:43 +010080#include <sched.h>
81#include <pthread.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010082
Ingo Molnare0143ba2009-03-23 21:29:59 +010083#include <sys/syscall.h>
84#include <sys/ioctl.h>
85#include <sys/poll.h>
86#include <sys/prctl.h>
87#include <sys/wait.h>
88#include <sys/uio.h>
Peter Zijlstrabcbcb372009-03-23 18:22:12 +010089#include <sys/mman.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010090
91#include <linux/unistd.h>
Paul Mackerrascbe46552009-03-24 16:52:34 +110092#include <linux/types.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010093
Ingo Molnar383c5f82009-03-23 21:49:25 +010094#include "../../include/linux/perf_counter.h"
Ingo Molnare0143ba2009-03-23 21:29:59 +010095
Wu Fengguangf7524bd2009-03-20 10:08:06 +080096
Peter Zijlstra803d4f32009-03-23 18:22:11 +010097/*
98 * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
99 * counters in the current task.
100 */
101#define PR_TASK_PERF_COUNTERS_DISABLE 31
102#define PR_TASK_PERF_COUNTERS_ENABLE 32
103
104#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
105
106#define rdclock() \
107({ \
108 struct timespec ts; \
109 \
110 clock_gettime(CLOCK_MONOTONIC, &ts); \
111 ts.tv_sec * 1000000000ULL + ts.tv_nsec; \
112})
113
114/*
115 * Pick up some kernel type conventions:
116 */
117#define __user
118#define asmlinkage
119
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100120#ifdef __x86_64__
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100121#define __NR_perf_counter_open 295
122#define rmb() asm volatile("lfence" ::: "memory")
123#define cpu_relax() asm volatile("rep; nop" ::: "memory");
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100124#endif
125
126#ifdef __i386__
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100127#define __NR_perf_counter_open 333
128#define rmb() asm volatile("lfence" ::: "memory")
129#define cpu_relax() asm volatile("rep; nop" ::: "memory");
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100130#endif
131
132#ifdef __powerpc__
133#define __NR_perf_counter_open 319
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100134#define rmb() asm volatile ("sync" ::: "memory")
135#define cpu_relax() asm volatile ("" ::: "memory");
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100136#endif
137
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100138#define unlikely(x) __builtin_expect(!!(x), 0)
Peter Zijlstra00f0ad72009-03-25 12:30:27 +0100139#define min(x, y) ({ \
140 typeof(x) _min1 = (x); \
141 typeof(y) _min2 = (y); \
142 (void) (&_min1 == &_min2); \
143 _min1 < _min2 ? _min1 : _min2; })
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100144
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100145asmlinkage int sys_perf_counter_open(
146 struct perf_counter_hw_event *hw_event_uptr __user,
147 pid_t pid,
148 int cpu,
149 int group_fd,
150 unsigned long flags)
151{
Paul Mackerrascbe46552009-03-24 16:52:34 +1100152 return syscall(
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100153 __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100154}
155
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800156#define MAX_COUNTERS 64
157#define MAX_NR_CPUS 256
158
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100159#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800160
161static int run_perfstat = 0;
162static int system_wide = 0;
163
164static int nr_counters = 0;
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100165static __u64 event_id[MAX_COUNTERS] = {
166 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
Paul Mackerrascbe46552009-03-24 16:52:34 +1100167 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100168 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
169 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
170
171 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
172 EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
173 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
174 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
175};
176static int default_interval = 100000;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800177static int event_count[MAX_COUNTERS];
178static int fd[MAX_NR_CPUS][MAX_COUNTERS];
Ingo Molnare0143ba2009-03-23 21:29:59 +0100179
Ingo Molnare0143ba2009-03-23 21:29:59 +0100180static __u64 count_filter = 100;
181
Ingo Molnare0143ba2009-03-23 21:29:59 +0100182static int tid = -1;
183static int profile_cpu = -1;
184static int nr_cpus = 0;
185static int nmi = 1;
Mike Galbraith9dd49982009-03-27 12:13:43 +0100186static unsigned int realtime_prio = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100187static int group = 0;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100188static unsigned int page_size;
Peter Zijlstra00f0ad72009-03-25 12:30:27 +0100189static unsigned int mmap_pages = 16;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +0200190static int use_mmap = 0;
191static int use_munmap = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100192
193static char *vmlinux;
194
195static char *sym_filter;
196static unsigned long filter_start;
197static unsigned long filter_end;
198
199static int delay_secs = 2;
200static int zero;
201static int dump_symtab;
202
Paul Mackerras31f004d2009-03-30 19:07:10 +0200203static int scale;
204
Ingo Molnare0143ba2009-03-23 21:29:59 +0100205struct source_line {
206 uint64_t EIP;
207 unsigned long count;
208 char *line;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100209 struct source_line *next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100210};
211
Paul Mackerrascbe46552009-03-24 16:52:34 +1100212static struct source_line *lines;
213static struct source_line **lines_tail;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800214
215const unsigned int default_count[] = {
Ingo Molnar81cdbe02009-03-23 22:29:50 +0100216 1000000,
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800217 1000000,
218 10000,
219 10000,
220 1000000,
221 10000,
222};
223
224static char *hw_event_names[] = {
225 "CPU cycles",
226 "instructions",
227 "cache references",
228 "cache misses",
229 "branches",
230 "branch misses",
231 "bus cycles",
232};
233
234static char *sw_event_names[] = {
235 "cpu clock ticks",
236 "task clock ticks",
237 "pagefaults",
238 "context switches",
239 "CPU migrations",
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100240 "minor faults",
241 "major faults",
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800242};
243
244struct event_symbol {
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100245 __u64 event;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800246 char *symbol;
247};
248
249static struct event_symbol event_symbols[] = {
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100250 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", },
251 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", },
252 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", },
253 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", },
254 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", },
255 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", },
256 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", },
257 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", },
258 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", },
259
260 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", },
261 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", },
262 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", },
263 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", },
264 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", },
265 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", },
266 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", },
267 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", },
268 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", },
269 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", },
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800270};
271
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100272#define __PERF_COUNTER_FIELD(config, name) \
273 ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
274
275#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW)
276#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG)
277#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE)
278#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT)
279
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800280static void display_events_help(void)
281{
282 unsigned int i;
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100283 __u64 e;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800284
285 printf(
286 " -e EVENT --event=EVENT # symbolic-name abbreviations");
287
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100288 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
289 int type, id;
290
291 e = event_symbols[i].event;
292 type = PERF_COUNTER_TYPE(e);
293 id = PERF_COUNTER_ID(e);
294
295 printf("\n %d:%d: %-20s",
296 type, id, event_symbols[i].symbol);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800297 }
298
299 printf("\n"
300 " rNNN: raw PMU events (eventsel+umask)\n\n");
301}
302
303static void display_perfstat_help(void)
304{
305 printf(
306 "Usage: perfstat [<events...>] <cmd...>\n\n"
307 "PerfStat Options (up to %d event types can be specified):\n\n",
308 MAX_COUNTERS);
309
310 display_events_help();
311
312 printf(
Paul Mackerras31f004d2009-03-30 19:07:10 +0200313 " -l # scale counter values\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800314 " -a # system-wide collection\n");
315 exit(0);
316}
Ingo Molnare0143ba2009-03-23 21:29:59 +0100317
318static void display_help(void)
319{
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800320 if (run_perfstat)
321 return display_perfstat_help();
322
Ingo Molnare0143ba2009-03-23 21:29:59 +0100323 printf(
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800324 "Usage: kerneltop [<options>]\n"
325 " Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100326 "KernelTop Options (up to %d event types can be specified at once):\n\n",
327 MAX_COUNTERS);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800328
329 display_events_help();
330
Ingo Molnare0143ba2009-03-23 21:29:59 +0100331 printf(
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800332 " -S --stat # perfstat COMMAND\n"
333 " -a # system-wide collection (for perfstat)\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100334 " -c CNT --count=CNT # event period to sample\n\n"
335 " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n"
336 " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n"
Paul Mackerras31f004d2009-03-30 19:07:10 +0200337 " -l # show scale factor for RR events\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100338 " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800339 " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n"
Mike Galbraith9dd49982009-03-27 12:13:43 +0100340 " -r prio --realtime=<prio> # event acquisition runs with SCHED_FIFO policy\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100341 " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800342 " -x path --vmlinux=<path> # the vmlinux binary, required for -s use\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100343 " -z --zero # zero counts after display\n"
344 " -D --dump_symtab # dump symbol table to stderr on startup\n"
Peter Zijlstra4c4ba212009-03-25 12:30:26 +0100345 " -m pages --mmap_pages=<pages> # number of mmap data pages\n"
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +0200346 " -M --mmap_info # print mmap info stream\n"
347 " -U --munmap_info # print munmap info stream\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800348 );
Ingo Molnare0143ba2009-03-23 21:29:59 +0100349
350 exit(0);
351}
352
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800353static char *event_name(int ctr)
354{
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100355 __u64 config = event_id[ctr];
356 int type = PERF_COUNTER_TYPE(config);
357 int id = PERF_COUNTER_ID(config);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800358 static char buf[32];
359
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100360 if (PERF_COUNTER_RAW(config)) {
361 sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800362 return buf;
363 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800364
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100365 switch (type) {
366 case PERF_TYPE_HARDWARE:
367 if (id < PERF_HW_EVENTS_MAX)
368 return hw_event_names[id];
369 return "unknown-hardware";
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800370
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100371 case PERF_TYPE_SOFTWARE:
372 if (id < PERF_SW_EVENTS_MAX)
373 return sw_event_names[id];
374 return "unknown-software";
375
376 default:
377 break;
378 }
379
380 return "unknown";
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800381}
382
383/*
384 * Each event can have multiple symbolic names.
385 * Symbolic names are (almost) exactly matched.
386 */
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100387static __u64 match_event_symbols(char *str)
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800388{
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100389 __u64 config, id;
390 int type;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800391 unsigned int i;
392
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100393 if (sscanf(str, "r%llx", &config) == 1)
394 return config | PERF_COUNTER_RAW_MASK;
395
396 if (sscanf(str, "%d:%llu", &type, &id) == 2)
397 return EID(type, id);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800398
399 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
400 if (!strncmp(str, event_symbols[i].symbol,
401 strlen(event_symbols[i].symbol)))
402 return event_symbols[i].event;
403 }
404
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100405 return ~0ULL;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800406}
407
408static int parse_events(char *str)
409{
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100410 __u64 config;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800411
412again:
413 if (nr_counters == MAX_COUNTERS)
414 return -1;
415
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100416 config = match_event_symbols(str);
417 if (config == ~0ULL)
418 return -1;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800419
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100420 event_id[nr_counters] = config;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800421 nr_counters++;
422
423 str = strstr(str, ",");
424 if (str) {
425 str++;
426 goto again;
427 }
428
429 return 0;
430}
431
432
433/*
434 * perfstat
435 */
436
437char fault_here[1000000];
438
439static void create_perfstat_counter(int counter)
440{
441 struct perf_counter_hw_event hw_event;
442
443 memset(&hw_event, 0, sizeof(hw_event));
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100444 hw_event.config = event_id[counter];
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800445 hw_event.record_type = PERF_RECORD_SIMPLE;
446 hw_event.nmi = 0;
Paul Mackerras31f004d2009-03-30 19:07:10 +0200447 if (scale)
448 hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
449 PERF_FORMAT_TOTAL_TIME_RUNNING;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800450
451 if (system_wide) {
452 int cpu;
453 for (cpu = 0; cpu < nr_cpus; cpu ++) {
454 fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
455 if (fd[cpu][counter] < 0) {
456 printf("perfstat error: syscall returned with %d (%s)\n",
457 fd[cpu][counter], strerror(errno));
458 exit(-1);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100459 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800460 }
461 } else {
462 hw_event.inherit = 1;
463 hw_event.disabled = 1;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100464
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800465 fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
466 if (fd[0][counter] < 0) {
467 printf("perfstat error: syscall returned with %d (%s)\n",
468 fd[0][counter], strerror(errno));
469 exit(-1);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100470 }
471 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800472}
Ingo Molnare0143ba2009-03-23 21:29:59 +0100473
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800474int do_perfstat(int argc, char *argv[])
475{
476 unsigned long long t0, t1;
477 int counter;
478 ssize_t res;
479 int status;
480 int pid;
481
482 if (!system_wide)
483 nr_cpus = 1;
484
485 for (counter = 0; counter < nr_counters; counter++)
486 create_perfstat_counter(counter);
487
488 argc -= optind;
489 argv += optind;
490
Wu Fengguangaf9522c2009-03-20 10:08:10 +0800491 if (!argc)
492 display_help();
493
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800494 /*
495 * Enable counters and exec the command:
496 */
497 t0 = rdclock();
498 prctl(PR_TASK_PERF_COUNTERS_ENABLE);
499
500 if ((pid = fork()) < 0)
501 perror("failed to fork");
502 if (!pid) {
503 if (execvp(argv[0], argv)) {
504 perror(argv[0]);
505 exit(-1);
506 }
Wu Fengguang95bb3be2009-03-20 10:08:04 +0800507 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800508 while (wait(&status) >= 0)
509 ;
510 prctl(PR_TASK_PERF_COUNTERS_DISABLE);
511 t1 = rdclock();
512
513 fflush(stdout);
514
515 fprintf(stderr, "\n");
516 fprintf(stderr, " Performance counter stats for \'%s\':\n",
517 argv[0]);
518 fprintf(stderr, "\n");
Ingo Molnare0143ba2009-03-23 21:29:59 +0100519
520 for (counter = 0; counter < nr_counters; counter++) {
Paul Mackerras31f004d2009-03-30 19:07:10 +0200521 int cpu, nv;
522 __u64 count[3], single_count[3];
523 int scaled;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100524
Paul Mackerras31f004d2009-03-30 19:07:10 +0200525 count[0] = count[1] = count[2] = 0;
526 nv = scale ? 3 : 1;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800527 for (cpu = 0; cpu < nr_cpus; cpu ++) {
528 res = read(fd[cpu][counter],
Paul Mackerras31f004d2009-03-30 19:07:10 +0200529 single_count, nv * sizeof(__u64));
530 assert(res == nv * sizeof(__u64));
531
532 count[0] += single_count[0];
533 if (scale) {
534 count[1] += single_count[1];
535 count[2] += single_count[2];
536 }
537 }
538
539 scaled = 0;
540 if (scale) {
541 if (count[2] == 0) {
542 fprintf(stderr, " %14s %-20s\n",
543 "<not counted>", event_name(counter));
544 continue;
545 }
546 if (count[2] < count[1]) {
547 scaled = 1;
548 count[0] = (unsigned long long)
549 ((double)count[0] * count[1] / count[2] + 0.5);
550 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800551 }
552
Paul Mackerrascbe46552009-03-24 16:52:34 +1100553 if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
554 event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800555
Paul Mackerras31f004d2009-03-30 19:07:10 +0200556 double msecs = (double)count[0] / 1000000;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800557
Paul Mackerras31f004d2009-03-30 19:07:10 +0200558 fprintf(stderr, " %14.6f %-20s (msecs)",
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800559 msecs, event_name(counter));
560 } else {
Paul Mackerras31f004d2009-03-30 19:07:10 +0200561 fprintf(stderr, " %14Ld %-20s (events)",
562 count[0], event_name(counter));
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800563 }
Paul Mackerras31f004d2009-03-30 19:07:10 +0200564 if (scaled)
565 fprintf(stderr, " (scaled from %.2f%%)",
566 (double) count[2] / count[1] * 100);
567 fprintf(stderr, "\n");
Ingo Molnare0143ba2009-03-23 21:29:59 +0100568 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800569 fprintf(stderr, "\n");
570 fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
571 (double)(t1-t0)/1e6);
572 fprintf(stderr, "\n");
573
574 return 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100575}
576
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800577/*
578 * Symbols
579 */
580
Ingo Molnare0143ba2009-03-23 21:29:59 +0100581static uint64_t min_ip;
582static uint64_t max_ip = -1ll;
583
584struct sym_entry {
585 unsigned long long addr;
586 char *sym;
587 unsigned long count[MAX_COUNTERS];
588 int skip;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100589 struct source_line *source;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100590};
591
592#define MAX_SYMS 100000
593
594static int sym_table_count;
595
596struct sym_entry *sym_filter_entry;
597
598static struct sym_entry sym_table[MAX_SYMS];
599
600static void show_details(struct sym_entry *sym);
601
602/*
Wu Fengguangef45fa92009-03-20 10:08:07 +0800603 * Ordering weight: count-1 * count-2 * ... / count-n
Ingo Molnare0143ba2009-03-23 21:29:59 +0100604 */
605static double sym_weight(const struct sym_entry *sym)
606{
607 double weight;
608 int counter;
609
610 weight = sym->count[0];
611
612 for (counter = 1; counter < nr_counters-1; counter++)
613 weight *= sym->count[counter];
614
615 weight /= (sym->count[counter] + 1);
616
617 return weight;
618}
619
620static int compare(const void *__sym1, const void *__sym2)
621{
622 const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
623
624 return sym_weight(sym1) < sym_weight(sym2);
625}
626
Ingo Molnare0143ba2009-03-23 21:29:59 +0100627static long events;
628static long userspace_events;
629static const char CONSOLE_CLEAR[] = "";
630
631static struct sym_entry tmp[MAX_SYMS];
632
633static void print_sym_table(void)
634{
635 int i, printed;
636 int counter;
637 float events_per_sec = events/delay_secs;
638 float kevents_per_sec = (events-userspace_events)/delay_secs;
639
Mike Galbraith9dd49982009-03-27 12:13:43 +0100640 events = userspace_events = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100641 memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count);
642 qsort(tmp, sym_table_count, sizeof(tmp[0]), compare);
643
644 write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
645
646 printf(
647"------------------------------------------------------------------------------\n");
648 printf( " KernelTop:%8.0f irqs/sec kernel:%3.1f%% [%s, ",
649 events_per_sec,
650 100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)),
651 nmi ? "NMI" : "IRQ");
652
653 if (nr_counters == 1)
654 printf("%d ", event_count[0]);
655
656 for (counter = 0; counter < nr_counters; counter++) {
657 if (counter)
658 printf("/");
659
Wu Fengguange3908612009-03-20 10:08:05 +0800660 printf("%s", event_name(counter));
Ingo Molnare0143ba2009-03-23 21:29:59 +0100661 }
662
663 printf( "], ");
664
665 if (tid != -1)
666 printf(" (tid: %d", tid);
667 else
668 printf(" (all");
669
670 if (profile_cpu != -1)
671 printf(", cpu: %d)\n", profile_cpu);
672 else {
673 if (tid != -1)
674 printf(")\n");
675 else
676 printf(", %d CPUs)\n", nr_cpus);
677 }
678
679 printf("------------------------------------------------------------------------------\n\n");
680
681 if (nr_counters == 1)
682 printf(" events");
683 else
684 printf(" weight events");
685
686 printf(" RIP kernel function\n"
687 " ______ ______ ________________ _______________\n\n"
688 );
689
690 printed = 0;
691 for (i = 0; i < sym_table_count; i++) {
692 int count;
693
694 if (nr_counters == 1) {
695 if (printed <= 18 &&
696 tmp[i].count[0] >= count_filter) {
697 printf("%19.2f - %016llx : %s\n",
698 sym_weight(tmp + i), tmp[i].addr, tmp[i].sym);
699 printed++;
700 }
701 } else {
702 if (printed <= 18 &&
703 tmp[i].count[0] >= count_filter) {
704 printf("%8.1f %10ld - %016llx : %s\n",
705 sym_weight(tmp + i),
706 tmp[i].count[0],
707 tmp[i].addr, tmp[i].sym);
708 printed++;
709 }
710 }
711 /*
712 * Add decay to the counts:
713 */
714 for (count = 0; count < nr_counters; count++)
715 sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8;
716 }
717
718 if (sym_filter_entry)
719 show_details(sym_filter_entry);
720
Ingo Molnare0143ba2009-03-23 21:29:59 +0100721 {
722 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
723
724 if (poll(&stdin_poll, 1, 0) == 1) {
725 printf("key pressed - exiting.\n");
726 exit(0);
727 }
728 }
729}
730
Mike Galbraith9dd49982009-03-27 12:13:43 +0100731static void *display_thread(void *arg)
732{
733 printf("KernelTop refresh period: %d seconds\n", delay_secs);
734
735 while (!sleep(delay_secs))
736 print_sym_table();
737
738 return NULL;
739}
740
Ingo Molnare0143ba2009-03-23 21:29:59 +0100741static int read_symbol(FILE *in, struct sym_entry *s)
742{
743 static int filter_match = 0;
744 char *sym, stype;
745 char str[500];
746 int rc, pos;
747
748 rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str);
749 if (rc == EOF)
750 return -1;
751
752 assert(rc == 3);
753
754 /* skip until end of line: */
755 pos = strlen(str);
756 do {
757 rc = fgetc(in);
758 if (rc == '\n' || rc == EOF || pos >= 499)
759 break;
760 str[pos] = rc;
761 pos++;
762 } while (1);
763 str[pos] = 0;
764
765 sym = str;
766
767 /* Filter out known duplicates and non-text symbols. */
768 if (!strcmp(sym, "_text"))
769 return 1;
770 if (!min_ip && !strcmp(sym, "_stext"))
771 return 1;
772 if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext"))
773 return 1;
774 if (stype != 'T' && stype != 't')
775 return 1;
776 if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14))
777 return 1;
778 if (strstr(sym, "_text_start") || strstr(sym, "_text_end"))
779 return 1;
780
781 s->sym = malloc(strlen(str));
782 assert(s->sym);
783
784 strcpy((char *)s->sym, str);
785 s->skip = 0;
786
787 /* Tag events to be skipped. */
788 if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym))
789 s->skip = 1;
Peter Zijlstra4c4ba212009-03-25 12:30:26 +0100790 else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
791 s->skip = 1;
792 else if (!strcmp("mwait_idle", s->sym))
Ingo Molnare0143ba2009-03-23 21:29:59 +0100793 s->skip = 1;
794
795 if (filter_match == 1) {
796 filter_end = s->addr;
797 filter_match = -1;
798 if (filter_end - filter_start > 10000) {
799 printf("hm, too large filter symbol <%s> - skipping.\n",
800 sym_filter);
801 printf("symbol filter start: %016lx\n", filter_start);
802 printf(" end: %016lx\n", filter_end);
803 filter_end = filter_start = 0;
804 sym_filter = NULL;
805 sleep(1);
806 }
807 }
808 if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) {
809 filter_match = 1;
810 filter_start = s->addr;
811 }
812
813 return 0;
814}
815
816int compare_addr(const void *__sym1, const void *__sym2)
817{
818 const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
819
820 return sym1->addr > sym2->addr;
821}
822
823static void sort_symbol_table(void)
824{
825 int i, dups;
826
827 do {
828 qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr);
829 for (i = 0, dups = 0; i < sym_table_count; i++) {
830 if (sym_table[i].addr == sym_table[i+1].addr) {
831 sym_table[i+1].addr = -1ll;
832 dups++;
833 }
834 }
835 sym_table_count -= dups;
836 } while(dups);
837}
838
839static void parse_symbols(void)
840{
841 struct sym_entry *last;
842
843 FILE *kallsyms = fopen("/proc/kallsyms", "r");
844
845 if (!kallsyms) {
846 printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n");
847 exit(-1);
848 }
849
850 while (!feof(kallsyms)) {
851 if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) {
852 sym_table_count++;
853 assert(sym_table_count <= MAX_SYMS);
854 }
855 }
856
857 sort_symbol_table();
858 min_ip = sym_table[0].addr;
859 max_ip = sym_table[sym_table_count-1].addr;
860 last = sym_table + sym_table_count++;
861
862 last->addr = -1ll;
863 last->sym = "<end>";
864
865 if (filter_end) {
866 int count;
867 for (count=0; count < sym_table_count; count ++) {
868 if (!strcmp(sym_table[count].sym, sym_filter)) {
869 sym_filter_entry = &sym_table[count];
870 break;
871 }
872 }
873 }
874 if (dump_symtab) {
875 int i;
876
877 for (i = 0; i < sym_table_count; i++)
878 fprintf(stderr, "%llx %s\n",
879 sym_table[i].addr, sym_table[i].sym);
880 }
881}
882
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800883/*
884 * Source lines
885 */
Ingo Molnare0143ba2009-03-23 21:29:59 +0100886
887static void parse_vmlinux(char *filename)
888{
889 FILE *file;
890 char command[PATH_MAX*2];
891 if (!filename)
892 return;
893
894 sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename);
895
896 file = popen(command, "r");
897 if (!file)
898 return;
899
Paul Mackerrascbe46552009-03-24 16:52:34 +1100900 lines_tail = &lines;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100901 while (!feof(file)) {
902 struct source_line *src;
903 size_t dummy = 0;
904 char *c;
905
906 src = malloc(sizeof(struct source_line));
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800907 assert(src != NULL);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100908 memset(src, 0, sizeof(struct source_line));
909
910 if (getline(&src->line, &dummy, file) < 0)
911 break;
912 if (!src->line)
913 break;
914
915 c = strchr(src->line, '\n');
916 if (c)
917 *c = 0;
918
Paul Mackerrascbe46552009-03-24 16:52:34 +1100919 src->next = NULL;
920 *lines_tail = src;
921 lines_tail = &src->next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100922
923 if (strlen(src->line)>8 && src->line[8] == ':')
924 src->EIP = strtoull(src->line, NULL, 16);
925 if (strlen(src->line)>8 && src->line[16] == ':')
926 src->EIP = strtoull(src->line, NULL, 16);
927 }
928 pclose(file);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100929}
930
931static void record_precise_ip(uint64_t ip)
932{
933 struct source_line *line;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100934
Paul Mackerrascbe46552009-03-24 16:52:34 +1100935 for (line = lines; line; line = line->next) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100936 if (line->EIP == ip)
937 line->count++;
938 if (line->EIP > ip)
939 break;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100940 }
941}
942
943static void lookup_sym_in_vmlinux(struct sym_entry *sym)
944{
945 struct source_line *line;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100946 char pattern[PATH_MAX];
947 sprintf(pattern, "<%s>:", sym->sym);
948
Paul Mackerrascbe46552009-03-24 16:52:34 +1100949 for (line = lines; line; line = line->next) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100950 if (strstr(line->line, pattern)) {
Paul Mackerrascbe46552009-03-24 16:52:34 +1100951 sym->source = line;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100952 break;
953 }
Ingo Molnare0143ba2009-03-23 21:29:59 +0100954 }
955}
956
Paul Mackerrascbe46552009-03-24 16:52:34 +1100957static void show_lines(struct source_line *line_queue, int line_queue_count)
Ingo Molnare0143ba2009-03-23 21:29:59 +0100958{
959 int i;
960 struct source_line *line;
961
Paul Mackerrascbe46552009-03-24 16:52:34 +1100962 line = line_queue;
963 for (i = 0; i < line_queue_count; i++) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100964 printf("%8li\t%s\n", line->count, line->line);
Paul Mackerrascbe46552009-03-24 16:52:34 +1100965 line = line->next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100966 }
967}
968
969#define TRACE_COUNT 3
970
971static void show_details(struct sym_entry *sym)
972{
973 struct source_line *line;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100974 struct source_line *line_queue = NULL;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100975 int displayed = 0;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100976 int line_queue_count = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100977
978 if (!sym->source)
979 lookup_sym_in_vmlinux(sym);
980 if (!sym->source)
981 return;
982
983 printf("Showing details for %s\n", sym->sym);
984
Paul Mackerrascbe46552009-03-24 16:52:34 +1100985 line = sym->source;
986 while (line) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100987 if (displayed && strstr(line->line, ">:"))
988 break;
989
Paul Mackerrascbe46552009-03-24 16:52:34 +1100990 if (!line_queue_count)
991 line_queue = line;
992 line_queue_count ++;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100993
994 if (line->count >= count_filter) {
Paul Mackerrascbe46552009-03-24 16:52:34 +1100995 show_lines(line_queue, line_queue_count);
996 line_queue_count = 0;
997 line_queue = NULL;
998 } else if (line_queue_count > TRACE_COUNT) {
999 line_queue = line_queue->next;
1000 line_queue_count --;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001001 }
1002
1003 line->count = 0;
1004 displayed++;
1005 if (displayed > 300)
1006 break;
Paul Mackerrascbe46552009-03-24 16:52:34 +11001007 line = line->next;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001008 }
1009}
1010
1011/*
1012 * Binary search in the histogram table and record the hit:
1013 */
1014static void record_ip(uint64_t ip, int counter)
1015{
1016 int left_idx, middle_idx, right_idx, idx;
1017 unsigned long left, middle, right;
1018
1019 record_precise_ip(ip);
1020
1021 left_idx = 0;
1022 right_idx = sym_table_count-1;
1023 assert(ip <= max_ip && ip >= min_ip);
1024
1025 while (left_idx + 1 < right_idx) {
1026 middle_idx = (left_idx + right_idx) / 2;
1027
1028 left = sym_table[ left_idx].addr;
1029 middle = sym_table[middle_idx].addr;
1030 right = sym_table[ right_idx].addr;
1031
1032 if (!(left <= middle && middle <= right)) {
1033 printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right);
1034 printf("%d %d %d\n", left_idx, middle_idx, right_idx);
1035 }
1036 assert(left <= middle && middle <= right);
1037 if (!(left <= ip && ip <= right)) {
1038 printf(" left: %016lx\n", left);
Ingo Molnar193e8df2009-03-23 22:23:16 +01001039 printf(" ip: %016lx\n", (unsigned long)ip);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001040 printf("right: %016lx\n", right);
1041 }
1042 assert(left <= ip && ip <= right);
1043 /*
1044 * [ left .... target .... middle .... right ]
1045 * => right := middle
1046 */
1047 if (ip < middle) {
1048 right_idx = middle_idx;
1049 continue;
1050 }
1051 /*
1052 * [ left .... middle ... target ... right ]
1053 * => left := middle
1054 */
1055 left_idx = middle_idx;
1056 }
1057
1058 idx = left_idx;
1059
1060 if (!sym_table[idx].skip)
1061 sym_table[idx].count[counter]++;
1062 else events--;
1063}
1064
1065static void process_event(uint64_t ip, int counter)
1066{
1067 events++;
1068
1069 if (ip < min_ip || ip > max_ip) {
1070 userspace_events++;
1071 return;
1072 }
1073
1074 record_ip(ip, counter);
1075}
1076
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001077static void process_options(int argc, char *argv[])
1078{
1079 int error = 0, counter;
1080
1081 if (strstr(argv[0], "perfstat"))
1082 run_perfstat = 1;
1083
1084 for (;;) {
1085 int option_index = 0;
1086 /** Options for getopt */
1087 static struct option long_options[] = {
1088 {"count", required_argument, NULL, 'c'},
1089 {"cpu", required_argument, NULL, 'C'},
1090 {"delay", required_argument, NULL, 'd'},
1091 {"dump_symtab", no_argument, NULL, 'D'},
1092 {"event", required_argument, NULL, 'e'},
1093 {"filter", required_argument, NULL, 'f'},
1094 {"group", required_argument, NULL, 'g'},
1095 {"help", no_argument, NULL, 'h'},
1096 {"nmi", required_argument, NULL, 'n'},
Mike Galbraith9dd49982009-03-27 12:13:43 +01001097 {"mmap_info", no_argument, NULL, 'M'},
1098 {"mmap_pages", required_argument, NULL, 'm'},
1099 {"munmap_info", no_argument, NULL, 'U'},
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001100 {"pid", required_argument, NULL, 'p'},
Mike Galbraith9dd49982009-03-27 12:13:43 +01001101 {"realtime", required_argument, NULL, 'r'},
1102 {"scale", no_argument, NULL, 'l'},
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001103 {"symbol", required_argument, NULL, 's'},
1104 {"stat", no_argument, NULL, 'S'},
Mike Galbraith9dd49982009-03-27 12:13:43 +01001105 {"vmlinux", required_argument, NULL, 'x'},
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001106 {"zero", no_argument, NULL, 'z'},
1107 {NULL, 0, NULL, 0 }
1108 };
Mike Galbraith9dd49982009-03-27 12:13:43 +01001109 int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001110 long_options, &option_index);
1111 if (c == -1)
1112 break;
1113
1114 switch (c) {
1115 case 'a': system_wide = 1; break;
Peter Zijlstra803d4f32009-03-23 18:22:11 +01001116 case 'c': default_interval = atoi(optarg); break;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001117 case 'C':
1118 /* CPU and PID are mutually exclusive */
1119 if (tid != -1) {
1120 printf("WARNING: CPU switch overriding PID\n");
1121 sleep(1);
1122 tid = -1;
1123 }
1124 profile_cpu = atoi(optarg); break;
1125 case 'd': delay_secs = atoi(optarg); break;
1126 case 'D': dump_symtab = 1; break;
1127
1128 case 'e': error = parse_events(optarg); break;
1129
1130 case 'f': count_filter = atoi(optarg); break;
1131 case 'g': group = atoi(optarg); break;
1132 case 'h': display_help(); break;
Paul Mackerras31f004d2009-03-30 19:07:10 +02001133 case 'l': scale = 1; break;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001134 case 'n': nmi = atoi(optarg); break;
1135 case 'p':
1136 /* CPU and PID are mutually exclusive */
1137 if (profile_cpu != -1) {
1138 printf("WARNING: PID switch overriding CPU\n");
1139 sleep(1);
1140 profile_cpu = -1;
1141 }
1142 tid = atoi(optarg); break;
Mike Galbraith9dd49982009-03-27 12:13:43 +01001143 case 'r': realtime_prio = atoi(optarg); break;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001144 case 's': sym_filter = strdup(optarg); break;
1145 case 'S': run_perfstat = 1; break;
1146 case 'x': vmlinux = strdup(optarg); break;
1147 case 'z': zero = 1; break;
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001148 case 'm': mmap_pages = atoi(optarg); break;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001149 case 'M': use_mmap = 1; break;
1150 case 'U': use_munmap = 1; break;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001151 default: error = 1; break;
1152 }
1153 }
1154 if (error)
1155 display_help();
1156
1157 if (!nr_counters) {
1158 if (run_perfstat)
1159 nr_counters = 8;
1160 else {
1161 nr_counters = 1;
1162 event_id[0] = 0;
1163 }
1164 }
1165
1166 for (counter = 0; counter < nr_counters; counter++) {
1167 if (event_count[counter])
1168 continue;
1169
Peter Zijlstra803d4f32009-03-23 18:22:11 +01001170 event_count[counter] = default_interval;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001171 }
1172}
1173
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001174struct mmap_data {
1175 int counter;
1176 void *base;
1177 unsigned int mask;
1178 unsigned int prev;
1179};
1180
1181static unsigned int mmap_read_head(struct mmap_data *md)
1182{
1183 struct perf_counter_mmap_page *pc = md->base;
Peter Zijlstra19556432009-03-30 19:07:04 +02001184 int head;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001185
1186 head = pc->data_head;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001187 rmb();
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001188
1189 return head;
1190}
1191
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001192struct timeval last_read, this_read;
1193
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001194static void mmap_read(struct mmap_data *md)
1195{
1196 unsigned int head = mmap_read_head(md);
1197 unsigned int old = md->prev;
1198 unsigned char *data = md->base + page_size;
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001199 int diff;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001200
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001201 gettimeofday(&this_read, NULL);
1202
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001203 /*
1204 * If we're further behind than half the buffer, there's a chance
1205 * the writer will bite our tail and screw up the events under us.
1206 *
1207 * If we somehow ended up ahead of the head, we got messed up.
1208 *
1209 * In either case, truncate and restart at head.
1210 */
1211 diff = head - old;
1212 if (diff > md->mask / 2 || diff < 0) {
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001213 struct timeval iv;
1214 unsigned long msecs;
1215
1216 timersub(&this_read, &last_read, &iv);
1217 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
1218
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001219 fprintf(stderr, "WARNING: failed to keep up with mmap data."
1220 " Last read %lu msecs ago.\n", msecs);
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001221
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001222 /*
1223 * head points to a known good entry, start there.
1224 */
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001225 old = head;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001226 }
1227
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001228 last_read = this_read;
1229
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001230 for (; old != head;) {
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001231 struct ip_event {
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001232 struct perf_event_header header;
1233 __u64 ip;
1234 __u32 pid, tid;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001235 };
1236 struct mmap_event {
1237 struct perf_event_header header;
1238 __u32 pid, tid;
1239 __u64 start;
1240 __u64 len;
1241 __u64 pgoff;
1242 char filename[PATH_MAX];
1243 };
1244
1245 typedef union event_union {
1246 struct perf_event_header header;
1247 struct ip_event ip;
1248 struct mmap_event mmap;
1249 } event_t;
1250
1251 event_t *event = (event_t *)&data[old & md->mask];
1252
1253 event_t event_copy;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001254
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001255 unsigned int size = event->header.size;
1256
1257 /*
1258 * Event straddles the mmap boundary -- header should always
1259 * be inside due to u64 alignment of output.
1260 */
1261 if ((old & md->mask) + size != ((old + size) & md->mask)) {
1262 unsigned int offset = old;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001263 unsigned int len = min(sizeof(*event), size), cpy;
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001264 void *dst = &event_copy;
1265
1266 do {
1267 cpy = min(md->mask + 1 - (offset & md->mask), len);
1268 memcpy(dst, &data[offset & md->mask], cpy);
1269 offset += cpy;
1270 dst += cpy;
1271 len -= cpy;
1272 } while (len);
1273
1274 event = &event_copy;
1275 }
1276
1277 old += size;
1278
1279 switch (event->header.type) {
Peter Zijlstra023c54c2009-03-30 19:07:13 +02001280 case PERF_EVENT_OVERFLOW | __PERF_EVENT_IP:
1281 case PERF_EVENT_OVERFLOW | __PERF_EVENT_IP | __PERF_EVENT_TID:
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001282 process_event(event->ip.ip, md->counter);
1283 break;
1284
1285 case PERF_EVENT_MMAP:
1286 case PERF_EVENT_MUNMAP:
1287 printf("%s: %Lu %Lu %Lu %s\n",
1288 event->header.type == PERF_EVENT_MMAP
1289 ? "mmap" : "munmap",
1290 event->mmap.start,
1291 event->mmap.len,
1292 event->mmap.pgoff,
1293 event->mmap.filename);
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001294 break;
1295 }
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001296 }
1297
1298 md->prev = old;
1299}
1300
Ingo Molnare0143ba2009-03-23 21:29:59 +01001301int main(int argc, char *argv[])
1302{
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001303 struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001304 struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
Ingo Molnare0143ba2009-03-23 21:29:59 +01001305 struct perf_counter_hw_event hw_event;
Mike Galbraith9dd49982009-03-27 12:13:43 +01001306 pthread_t thread;
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001307 int i, counter, group_fd, nr_poll = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001308 unsigned int cpu;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001309 int ret;
1310
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001311 page_size = sysconf(_SC_PAGE_SIZE);
1312
Ingo Molnare0143ba2009-03-23 21:29:59 +01001313 process_options(argc, argv);
1314
1315 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001316 assert(nr_cpus <= MAX_NR_CPUS);
1317 assert(nr_cpus >= 0);
1318
1319 if (run_perfstat)
1320 return do_perfstat(argc, argv);
1321
Ingo Molnare0143ba2009-03-23 21:29:59 +01001322 if (tid != -1 || profile_cpu != -1)
1323 nr_cpus = 1;
1324
Paul Mackerrascbe46552009-03-24 16:52:34 +11001325 parse_symbols();
1326 if (vmlinux && sym_filter_entry)
1327 parse_vmlinux(vmlinux);
1328
Ingo Molnare0143ba2009-03-23 21:29:59 +01001329 for (i = 0; i < nr_cpus; i++) {
1330 group_fd = -1;
1331 for (counter = 0; counter < nr_counters; counter++) {
1332
1333 cpu = profile_cpu;
1334 if (tid == -1 && profile_cpu == -1)
1335 cpu = i;
1336
1337 memset(&hw_event, 0, sizeof(hw_event));
Peter Zijlstra803d4f32009-03-23 18:22:11 +01001338 hw_event.config = event_id[counter];
Ingo Molnare0143ba2009-03-23 21:29:59 +01001339 hw_event.irq_period = event_count[counter];
1340 hw_event.record_type = PERF_RECORD_IRQ;
1341 hw_event.nmi = nmi;
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001342 hw_event.include_tid = 1;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001343 hw_event.mmap = use_mmap;
1344 hw_event.munmap = use_munmap;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001345
Ingo Molnare0143ba2009-03-23 21:29:59 +01001346 fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001347 if (fd[i][counter] < 0) {
Paul Mackerrascbe46552009-03-24 16:52:34 +11001348 int err = errno;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001349 printf("kerneltop error: syscall returned with %d (%s)\n",
Paul Mackerrascbe46552009-03-24 16:52:34 +11001350 fd[i][counter], strerror(err));
1351 if (err == EPERM)
Ingo Molnare0143ba2009-03-23 21:29:59 +01001352 printf("Are you root?\n");
1353 exit(-1);
1354 }
1355 assert(fd[i][counter] >= 0);
Paul Mackerrascbe46552009-03-24 16:52:34 +11001356 fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001357
1358 /*
1359 * First counter acts as the group leader:
1360 */
1361 if (group && group_fd == -1)
1362 group_fd = fd[i][counter];
1363
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001364 event_array[nr_poll].fd = fd[i][counter];
1365 event_array[nr_poll].events = POLLIN;
1366 nr_poll++;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001367
1368 mmap_array[i][counter].counter = counter;
1369 mmap_array[i][counter].prev = 0;
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001370 mmap_array[i][counter].mask = mmap_pages*page_size - 1;
1371 mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001372 PROT_READ, MAP_SHARED, fd[i][counter], 0);
1373 if (mmap_array[i][counter].base == MAP_FAILED) {
1374 printf("kerneltop error: failed to mmap with %d (%s)\n",
1375 errno, strerror(errno));
1376 exit(-1);
1377 }
Ingo Molnare0143ba2009-03-23 21:29:59 +01001378 }
1379 }
1380
Mike Galbraith9dd49982009-03-27 12:13:43 +01001381 if (pthread_create(&thread, NULL, display_thread, NULL)) {
1382 printf("Could not create display thread.\n");
1383 exit(-1);
1384 }
1385
1386 if (realtime_prio) {
1387 struct sched_param param;
1388
1389 param.sched_priority = realtime_prio;
1390 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1391 printf("Could not set realtime priority.\n");
1392 exit(-1);
1393 }
1394 }
Ingo Molnare0143ba2009-03-23 21:29:59 +01001395
1396 while (1) {
1397 int hits = events;
1398
1399 for (i = 0; i < nr_cpus; i++) {
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001400 for (counter = 0; counter < nr_counters; counter++)
1401 mmap_read(&mmap_array[i][counter]);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001402 }
1403
Ingo Molnare0143ba2009-03-23 21:29:59 +01001404 if (hits == events)
Mike Galbraith9dd49982009-03-27 12:13:43 +01001405 ret = poll(event_array, nr_poll, 100);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001406 }
1407
1408 return 0;
1409}