blob: c0ca01504ff36deb6ee7e4ae9fb7f257cd3dfded [file] [log] [blame]
Ingo Molnare0143ba2009-03-23 21:29:59 +01001/*
2 * kerneltop.c: show top kernel functions - performance counters showcase
3
4 Build with:
5
Paul Mackerrascbe46552009-03-24 16:52:34 +11006 cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
Ingo Molnare0143ba2009-03-23 21:29:59 +01007
8 Sample output:
9
10------------------------------------------------------------------------------
11 KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2)
12------------------------------------------------------------------------------
13
14 weight RIP kernel function
15 ______ ________________ _______________
16
17 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
18 33.00 - ffffffff804cb740 : sock_alloc_send_skb
19 31.26 - ffffffff804ce808 : skb_push
20 22.43 - ffffffff80510004 : tcp_established_options
21 19.00 - ffffffff8027d250 : find_get_page
22 15.76 - ffffffff804e4fc9 : eth_type_trans
23 15.20 - ffffffff804d8baa : dst_release
24 14.86 - ffffffff804cf5d8 : skb_release_head_state
25 14.00 - ffffffff802217d5 : read_hpet
26 12.00 - ffffffff804ffb7f : __ip_local_out
27 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
28 8.54 - ffffffff805001a3 : ip_queue_xmit
Ingo Molnare0143ba2009-03-23 21:29:59 +010029 */
Wu Fengguangf7524bd2009-03-20 10:08:06 +080030
31/*
32 * perfstat: /usr/bin/time -alike performance counter statistics utility
33
34 It summarizes the counter events of all tasks (and child tasks),
35 covering all CPUs that the command (or workload) executes on.
36 It only counts the per-task events of the workload started,
37 independent of how many other tasks run on those CPUs.
38
39 Sample output:
40
41 $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
42
43 Performance counter stats for 'ls':
44
45 163516953 instructions
46 2295 cache-misses
47 2855182 branch-misses
48 */
49
50 /*
51 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
52 *
53 * Improvements and fixes by:
54 *
55 * Arjan van de Ven <arjan@linux.intel.com>
56 * Yanmin Zhang <yanmin.zhang@intel.com>
57 * Wu Fengguang <fengguang.wu@intel.com>
58 * Mike Galbraith <efault@gmx.de>
Paul Mackerrascbe46552009-03-24 16:52:34 +110059 * Paul Mackerras <paulus@samba.org>
Wu Fengguangf7524bd2009-03-20 10:08:06 +080060 *
61 * Released under the GPL v2. (and only v2, not any later version)
62 */
63
Ingo Molnare0143ba2009-03-23 21:29:59 +010064#define _GNU_SOURCE
65#include <sys/types.h>
66#include <sys/stat.h>
67#include <sys/time.h>
68#include <unistd.h>
69#include <stdint.h>
70#include <stdlib.h>
71#include <string.h>
Paul Mackerrascbe46552009-03-24 16:52:34 +110072#include <limits.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010073#include <getopt.h>
74#include <assert.h>
75#include <fcntl.h>
76#include <stdio.h>
77#include <errno.h>
78#include <ctype.h>
79#include <time.h>
80
Ingo Molnare0143ba2009-03-23 21:29:59 +010081#include <sys/syscall.h>
82#include <sys/ioctl.h>
83#include <sys/poll.h>
84#include <sys/prctl.h>
85#include <sys/wait.h>
86#include <sys/uio.h>
Peter Zijlstrabcbcb372009-03-23 18:22:12 +010087#include <sys/mman.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010088
89#include <linux/unistd.h>
Paul Mackerrascbe46552009-03-24 16:52:34 +110090#include <linux/types.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010091
Ingo Molnar383c5f82009-03-23 21:49:25 +010092#include "../../include/linux/perf_counter.h"
Ingo Molnare0143ba2009-03-23 21:29:59 +010093
Wu Fengguangf7524bd2009-03-20 10:08:06 +080094
Peter Zijlstra803d4f32009-03-23 18:22:11 +010095/*
96 * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
97 * counters in the current task.
98 */
99#define PR_TASK_PERF_COUNTERS_DISABLE 31
100#define PR_TASK_PERF_COUNTERS_ENABLE 32
101
102#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
103
104#define rdclock() \
105({ \
106 struct timespec ts; \
107 \
108 clock_gettime(CLOCK_MONOTONIC, &ts); \
109 ts.tv_sec * 1000000000ULL + ts.tv_nsec; \
110})
111
112/*
113 * Pick up some kernel type conventions:
114 */
115#define __user
116#define asmlinkage
117
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100118#ifdef __x86_64__
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100119#define __NR_perf_counter_open 295
120#define rmb() asm volatile("lfence" ::: "memory")
121#define cpu_relax() asm volatile("rep; nop" ::: "memory");
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100122#endif
123
124#ifdef __i386__
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100125#define __NR_perf_counter_open 333
126#define rmb() asm volatile("lfence" ::: "memory")
127#define cpu_relax() asm volatile("rep; nop" ::: "memory");
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100128#endif
129
130#ifdef __powerpc__
131#define __NR_perf_counter_open 319
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100132#define rmb() asm volatile ("sync" ::: "memory")
133#define cpu_relax() asm volatile ("" ::: "memory");
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100134#endif
135
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100136#define unlikely(x) __builtin_expect(!!(x), 0)
Peter Zijlstra00f0ad72009-03-25 12:30:27 +0100137#define min(x, y) ({ \
138 typeof(x) _min1 = (x); \
139 typeof(y) _min2 = (y); \
140 (void) (&_min1 == &_min2); \
141 _min1 < _min2 ? _min1 : _min2; })
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100142
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100143asmlinkage int sys_perf_counter_open(
144 struct perf_counter_hw_event *hw_event_uptr __user,
145 pid_t pid,
146 int cpu,
147 int group_fd,
148 unsigned long flags)
149{
Paul Mackerrascbe46552009-03-24 16:52:34 +1100150 return syscall(
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100151 __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100152}
153
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800154#define MAX_COUNTERS 64
155#define MAX_NR_CPUS 256
156
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100157#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800158
159static int run_perfstat = 0;
160static int system_wide = 0;
161
162static int nr_counters = 0;
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100163static __u64 event_id[MAX_COUNTERS] = {
164 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
Paul Mackerrascbe46552009-03-24 16:52:34 +1100165 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100166 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
167 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
168
169 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
170 EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
171 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
172 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
173};
174static int default_interval = 100000;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800175static int event_count[MAX_COUNTERS];
176static int fd[MAX_NR_CPUS][MAX_COUNTERS];
Ingo Molnare0143ba2009-03-23 21:29:59 +0100177
Ingo Molnare0143ba2009-03-23 21:29:59 +0100178static __u64 count_filter = 100;
179
Ingo Molnare0143ba2009-03-23 21:29:59 +0100180static int tid = -1;
181static int profile_cpu = -1;
182static int nr_cpus = 0;
183static int nmi = 1;
184static int group = 0;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100185static unsigned int page_size;
Peter Zijlstra00f0ad72009-03-25 12:30:27 +0100186static unsigned int mmap_pages = 16;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +0200187static int use_mmap = 0;
188static int use_munmap = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100189
190static char *vmlinux;
191
192static char *sym_filter;
193static unsigned long filter_start;
194static unsigned long filter_end;
195
196static int delay_secs = 2;
197static int zero;
198static int dump_symtab;
199
Paul Mackerras31f004d2009-03-30 19:07:10 +0200200static int scale;
201
Ingo Molnare0143ba2009-03-23 21:29:59 +0100202struct source_line {
203 uint64_t EIP;
204 unsigned long count;
205 char *line;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100206 struct source_line *next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100207};
208
Paul Mackerrascbe46552009-03-24 16:52:34 +1100209static struct source_line *lines;
210static struct source_line **lines_tail;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800211
212const unsigned int default_count[] = {
Ingo Molnar81cdbe02009-03-23 22:29:50 +0100213 1000000,
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800214 1000000,
215 10000,
216 10000,
217 1000000,
218 10000,
219};
220
221static char *hw_event_names[] = {
222 "CPU cycles",
223 "instructions",
224 "cache references",
225 "cache misses",
226 "branches",
227 "branch misses",
228 "bus cycles",
229};
230
231static char *sw_event_names[] = {
232 "cpu clock ticks",
233 "task clock ticks",
234 "pagefaults",
235 "context switches",
236 "CPU migrations",
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100237 "minor faults",
238 "major faults",
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800239};
240
241struct event_symbol {
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100242 __u64 event;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800243 char *symbol;
244};
245
246static struct event_symbol event_symbols[] = {
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100247 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", },
248 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", },
249 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", },
250 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", },
251 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", },
252 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", },
253 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", },
254 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", },
255 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", },
256
257 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", },
258 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", },
259 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", },
260 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", },
261 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", },
262 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", },
263 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", },
264 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", },
265 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", },
266 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", },
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800267};
268
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100269#define __PERF_COUNTER_FIELD(config, name) \
270 ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
271
272#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW)
273#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG)
274#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE)
275#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT)
276
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800277static void display_events_help(void)
278{
279 unsigned int i;
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100280 __u64 e;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800281
282 printf(
283 " -e EVENT --event=EVENT # symbolic-name abbreviations");
284
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100285 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
286 int type, id;
287
288 e = event_symbols[i].event;
289 type = PERF_COUNTER_TYPE(e);
290 id = PERF_COUNTER_ID(e);
291
292 printf("\n %d:%d: %-20s",
293 type, id, event_symbols[i].symbol);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800294 }
295
296 printf("\n"
297 " rNNN: raw PMU events (eventsel+umask)\n\n");
298}
299
300static void display_perfstat_help(void)
301{
302 printf(
303 "Usage: perfstat [<events...>] <cmd...>\n\n"
304 "PerfStat Options (up to %d event types can be specified):\n\n",
305 MAX_COUNTERS);
306
307 display_events_help();
308
309 printf(
Paul Mackerras31f004d2009-03-30 19:07:10 +0200310 " -l # scale counter values\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800311 " -a # system-wide collection\n");
312 exit(0);
313}
Ingo Molnare0143ba2009-03-23 21:29:59 +0100314
315static void display_help(void)
316{
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800317 if (run_perfstat)
318 return display_perfstat_help();
319
Ingo Molnare0143ba2009-03-23 21:29:59 +0100320 printf(
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800321 "Usage: kerneltop [<options>]\n"
322 " Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100323 "KernelTop Options (up to %d event types can be specified at once):\n\n",
324 MAX_COUNTERS);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800325
326 display_events_help();
327
Ingo Molnare0143ba2009-03-23 21:29:59 +0100328 printf(
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800329 " -S --stat # perfstat COMMAND\n"
330 " -a # system-wide collection (for perfstat)\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100331 " -c CNT --count=CNT # event period to sample\n\n"
332 " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n"
333 " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n"
Paul Mackerras31f004d2009-03-30 19:07:10 +0200334 " -l # show scale factor for RR events\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100335 " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800336 " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100337 " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800338 " -x path --vmlinux=<path> # the vmlinux binary, required for -s use\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100339 " -z --zero # zero counts after display\n"
340 " -D --dump_symtab # dump symbol table to stderr on startup\n"
Peter Zijlstra4c4ba212009-03-25 12:30:26 +0100341 " -m pages --mmap_pages=<pages> # number of mmap data pages\n"
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +0200342 " -M --mmap_info # print mmap info stream\n"
343 " -U --munmap_info # print munmap info stream\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800344 );
Ingo Molnare0143ba2009-03-23 21:29:59 +0100345
346 exit(0);
347}
348
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800349static char *event_name(int ctr)
350{
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100351 __u64 config = event_id[ctr];
352 int type = PERF_COUNTER_TYPE(config);
353 int id = PERF_COUNTER_ID(config);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800354 static char buf[32];
355
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100356 if (PERF_COUNTER_RAW(config)) {
357 sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800358 return buf;
359 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800360
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100361 switch (type) {
362 case PERF_TYPE_HARDWARE:
363 if (id < PERF_HW_EVENTS_MAX)
364 return hw_event_names[id];
365 return "unknown-hardware";
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800366
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100367 case PERF_TYPE_SOFTWARE:
368 if (id < PERF_SW_EVENTS_MAX)
369 return sw_event_names[id];
370 return "unknown-software";
371
372 default:
373 break;
374 }
375
376 return "unknown";
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800377}
378
379/*
380 * Each event can have multiple symbolic names.
381 * Symbolic names are (almost) exactly matched.
382 */
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100383static __u64 match_event_symbols(char *str)
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800384{
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100385 __u64 config, id;
386 int type;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800387 unsigned int i;
388
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100389 if (sscanf(str, "r%llx", &config) == 1)
390 return config | PERF_COUNTER_RAW_MASK;
391
392 if (sscanf(str, "%d:%llu", &type, &id) == 2)
393 return EID(type, id);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800394
395 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
396 if (!strncmp(str, event_symbols[i].symbol,
397 strlen(event_symbols[i].symbol)))
398 return event_symbols[i].event;
399 }
400
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100401 return ~0ULL;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800402}
403
404static int parse_events(char *str)
405{
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100406 __u64 config;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800407
408again:
409 if (nr_counters == MAX_COUNTERS)
410 return -1;
411
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100412 config = match_event_symbols(str);
413 if (config == ~0ULL)
414 return -1;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800415
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100416 event_id[nr_counters] = config;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800417 nr_counters++;
418
419 str = strstr(str, ",");
420 if (str) {
421 str++;
422 goto again;
423 }
424
425 return 0;
426}
427
428
429/*
430 * perfstat
431 */
432
433char fault_here[1000000];
434
435static void create_perfstat_counter(int counter)
436{
437 struct perf_counter_hw_event hw_event;
438
439 memset(&hw_event, 0, sizeof(hw_event));
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100440 hw_event.config = event_id[counter];
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800441 hw_event.record_type = PERF_RECORD_SIMPLE;
442 hw_event.nmi = 0;
Paul Mackerras31f004d2009-03-30 19:07:10 +0200443 if (scale)
444 hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
445 PERF_FORMAT_TOTAL_TIME_RUNNING;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800446
447 if (system_wide) {
448 int cpu;
449 for (cpu = 0; cpu < nr_cpus; cpu ++) {
450 fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
451 if (fd[cpu][counter] < 0) {
452 printf("perfstat error: syscall returned with %d (%s)\n",
453 fd[cpu][counter], strerror(errno));
454 exit(-1);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100455 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800456 }
457 } else {
458 hw_event.inherit = 1;
459 hw_event.disabled = 1;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100460
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800461 fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
462 if (fd[0][counter] < 0) {
463 printf("perfstat error: syscall returned with %d (%s)\n",
464 fd[0][counter], strerror(errno));
465 exit(-1);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100466 }
467 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800468}
Ingo Molnare0143ba2009-03-23 21:29:59 +0100469
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800470int do_perfstat(int argc, char *argv[])
471{
472 unsigned long long t0, t1;
473 int counter;
474 ssize_t res;
475 int status;
476 int pid;
477
478 if (!system_wide)
479 nr_cpus = 1;
480
481 for (counter = 0; counter < nr_counters; counter++)
482 create_perfstat_counter(counter);
483
484 argc -= optind;
485 argv += optind;
486
Wu Fengguangaf9522c2009-03-20 10:08:10 +0800487 if (!argc)
488 display_help();
489
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800490 /*
491 * Enable counters and exec the command:
492 */
493 t0 = rdclock();
494 prctl(PR_TASK_PERF_COUNTERS_ENABLE);
495
496 if ((pid = fork()) < 0)
497 perror("failed to fork");
498 if (!pid) {
499 if (execvp(argv[0], argv)) {
500 perror(argv[0]);
501 exit(-1);
502 }
Wu Fengguang95bb3be2009-03-20 10:08:04 +0800503 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800504 while (wait(&status) >= 0)
505 ;
506 prctl(PR_TASK_PERF_COUNTERS_DISABLE);
507 t1 = rdclock();
508
509 fflush(stdout);
510
511 fprintf(stderr, "\n");
512 fprintf(stderr, " Performance counter stats for \'%s\':\n",
513 argv[0]);
514 fprintf(stderr, "\n");
Ingo Molnare0143ba2009-03-23 21:29:59 +0100515
516 for (counter = 0; counter < nr_counters; counter++) {
Paul Mackerras31f004d2009-03-30 19:07:10 +0200517 int cpu, nv;
518 __u64 count[3], single_count[3];
519 int scaled;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100520
Paul Mackerras31f004d2009-03-30 19:07:10 +0200521 count[0] = count[1] = count[2] = 0;
522 nv = scale ? 3 : 1;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800523 for (cpu = 0; cpu < nr_cpus; cpu ++) {
524 res = read(fd[cpu][counter],
Paul Mackerras31f004d2009-03-30 19:07:10 +0200525 single_count, nv * sizeof(__u64));
526 assert(res == nv * sizeof(__u64));
527
528 count[0] += single_count[0];
529 if (scale) {
530 count[1] += single_count[1];
531 count[2] += single_count[2];
532 }
533 }
534
535 scaled = 0;
536 if (scale) {
537 if (count[2] == 0) {
538 fprintf(stderr, " %14s %-20s\n",
539 "<not counted>", event_name(counter));
540 continue;
541 }
542 if (count[2] < count[1]) {
543 scaled = 1;
544 count[0] = (unsigned long long)
545 ((double)count[0] * count[1] / count[2] + 0.5);
546 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800547 }
548
Paul Mackerrascbe46552009-03-24 16:52:34 +1100549 if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
550 event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800551
Paul Mackerras31f004d2009-03-30 19:07:10 +0200552 double msecs = (double)count[0] / 1000000;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800553
Paul Mackerras31f004d2009-03-30 19:07:10 +0200554 fprintf(stderr, " %14.6f %-20s (msecs)",
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800555 msecs, event_name(counter));
556 } else {
Paul Mackerras31f004d2009-03-30 19:07:10 +0200557 fprintf(stderr, " %14Ld %-20s (events)",
558 count[0], event_name(counter));
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800559 }
Paul Mackerras31f004d2009-03-30 19:07:10 +0200560 if (scaled)
561 fprintf(stderr, " (scaled from %.2f%%)",
562 (double) count[2] / count[1] * 100);
563 fprintf(stderr, "\n");
Ingo Molnare0143ba2009-03-23 21:29:59 +0100564 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800565 fprintf(stderr, "\n");
566 fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
567 (double)(t1-t0)/1e6);
568 fprintf(stderr, "\n");
569
570 return 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100571}
572
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800573/*
574 * Symbols
575 */
576
Ingo Molnare0143ba2009-03-23 21:29:59 +0100577static uint64_t min_ip;
578static uint64_t max_ip = -1ll;
579
580struct sym_entry {
581 unsigned long long addr;
582 char *sym;
583 unsigned long count[MAX_COUNTERS];
584 int skip;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100585 struct source_line *source;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100586};
587
588#define MAX_SYMS 100000
589
590static int sym_table_count;
591
592struct sym_entry *sym_filter_entry;
593
594static struct sym_entry sym_table[MAX_SYMS];
595
596static void show_details(struct sym_entry *sym);
597
598/*
Wu Fengguangef45fa92009-03-20 10:08:07 +0800599 * Ordering weight: count-1 * count-2 * ... / count-n
Ingo Molnare0143ba2009-03-23 21:29:59 +0100600 */
601static double sym_weight(const struct sym_entry *sym)
602{
603 double weight;
604 int counter;
605
606 weight = sym->count[0];
607
608 for (counter = 1; counter < nr_counters-1; counter++)
609 weight *= sym->count[counter];
610
611 weight /= (sym->count[counter] + 1);
612
613 return weight;
614}
615
616static int compare(const void *__sym1, const void *__sym2)
617{
618 const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
619
620 return sym_weight(sym1) < sym_weight(sym2);
621}
622
623static time_t last_refresh;
624static long events;
625static long userspace_events;
626static const char CONSOLE_CLEAR[] = "";
627
628static struct sym_entry tmp[MAX_SYMS];
629
630static void print_sym_table(void)
631{
632 int i, printed;
633 int counter;
634 float events_per_sec = events/delay_secs;
635 float kevents_per_sec = (events-userspace_events)/delay_secs;
636
637 memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count);
638 qsort(tmp, sym_table_count, sizeof(tmp[0]), compare);
639
640 write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
641
642 printf(
643"------------------------------------------------------------------------------\n");
644 printf( " KernelTop:%8.0f irqs/sec kernel:%3.1f%% [%s, ",
645 events_per_sec,
646 100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)),
647 nmi ? "NMI" : "IRQ");
648
649 if (nr_counters == 1)
650 printf("%d ", event_count[0]);
651
652 for (counter = 0; counter < nr_counters; counter++) {
653 if (counter)
654 printf("/");
655
Wu Fengguange3908612009-03-20 10:08:05 +0800656 printf("%s", event_name(counter));
Ingo Molnare0143ba2009-03-23 21:29:59 +0100657 }
658
659 printf( "], ");
660
661 if (tid != -1)
662 printf(" (tid: %d", tid);
663 else
664 printf(" (all");
665
666 if (profile_cpu != -1)
667 printf(", cpu: %d)\n", profile_cpu);
668 else {
669 if (tid != -1)
670 printf(")\n");
671 else
672 printf(", %d CPUs)\n", nr_cpus);
673 }
674
675 printf("------------------------------------------------------------------------------\n\n");
676
677 if (nr_counters == 1)
678 printf(" events");
679 else
680 printf(" weight events");
681
682 printf(" RIP kernel function\n"
683 " ______ ______ ________________ _______________\n\n"
684 );
685
686 printed = 0;
687 for (i = 0; i < sym_table_count; i++) {
688 int count;
689
690 if (nr_counters == 1) {
691 if (printed <= 18 &&
692 tmp[i].count[0] >= count_filter) {
693 printf("%19.2f - %016llx : %s\n",
694 sym_weight(tmp + i), tmp[i].addr, tmp[i].sym);
695 printed++;
696 }
697 } else {
698 if (printed <= 18 &&
699 tmp[i].count[0] >= count_filter) {
700 printf("%8.1f %10ld - %016llx : %s\n",
701 sym_weight(tmp + i),
702 tmp[i].count[0],
703 tmp[i].addr, tmp[i].sym);
704 printed++;
705 }
706 }
707 /*
708 * Add decay to the counts:
709 */
710 for (count = 0; count < nr_counters; count++)
711 sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8;
712 }
713
714 if (sym_filter_entry)
715 show_details(sym_filter_entry);
716
717 last_refresh = time(NULL);
718
719 {
720 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
721
722 if (poll(&stdin_poll, 1, 0) == 1) {
723 printf("key pressed - exiting.\n");
724 exit(0);
725 }
726 }
727}
728
729static int read_symbol(FILE *in, struct sym_entry *s)
730{
731 static int filter_match = 0;
732 char *sym, stype;
733 char str[500];
734 int rc, pos;
735
736 rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str);
737 if (rc == EOF)
738 return -1;
739
740 assert(rc == 3);
741
742 /* skip until end of line: */
743 pos = strlen(str);
744 do {
745 rc = fgetc(in);
746 if (rc == '\n' || rc == EOF || pos >= 499)
747 break;
748 str[pos] = rc;
749 pos++;
750 } while (1);
751 str[pos] = 0;
752
753 sym = str;
754
755 /* Filter out known duplicates and non-text symbols. */
756 if (!strcmp(sym, "_text"))
757 return 1;
758 if (!min_ip && !strcmp(sym, "_stext"))
759 return 1;
760 if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext"))
761 return 1;
762 if (stype != 'T' && stype != 't')
763 return 1;
764 if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14))
765 return 1;
766 if (strstr(sym, "_text_start") || strstr(sym, "_text_end"))
767 return 1;
768
769 s->sym = malloc(strlen(str));
770 assert(s->sym);
771
772 strcpy((char *)s->sym, str);
773 s->skip = 0;
774
775 /* Tag events to be skipped. */
776 if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym))
777 s->skip = 1;
Peter Zijlstra4c4ba212009-03-25 12:30:26 +0100778 else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
779 s->skip = 1;
780 else if (!strcmp("mwait_idle", s->sym))
Ingo Molnare0143ba2009-03-23 21:29:59 +0100781 s->skip = 1;
782
783 if (filter_match == 1) {
784 filter_end = s->addr;
785 filter_match = -1;
786 if (filter_end - filter_start > 10000) {
787 printf("hm, too large filter symbol <%s> - skipping.\n",
788 sym_filter);
789 printf("symbol filter start: %016lx\n", filter_start);
790 printf(" end: %016lx\n", filter_end);
791 filter_end = filter_start = 0;
792 sym_filter = NULL;
793 sleep(1);
794 }
795 }
796 if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) {
797 filter_match = 1;
798 filter_start = s->addr;
799 }
800
801 return 0;
802}
803
804int compare_addr(const void *__sym1, const void *__sym2)
805{
806 const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
807
808 return sym1->addr > sym2->addr;
809}
810
811static void sort_symbol_table(void)
812{
813 int i, dups;
814
815 do {
816 qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr);
817 for (i = 0, dups = 0; i < sym_table_count; i++) {
818 if (sym_table[i].addr == sym_table[i+1].addr) {
819 sym_table[i+1].addr = -1ll;
820 dups++;
821 }
822 }
823 sym_table_count -= dups;
824 } while(dups);
825}
826
827static void parse_symbols(void)
828{
829 struct sym_entry *last;
830
831 FILE *kallsyms = fopen("/proc/kallsyms", "r");
832
833 if (!kallsyms) {
834 printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n");
835 exit(-1);
836 }
837
838 while (!feof(kallsyms)) {
839 if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) {
840 sym_table_count++;
841 assert(sym_table_count <= MAX_SYMS);
842 }
843 }
844
845 sort_symbol_table();
846 min_ip = sym_table[0].addr;
847 max_ip = sym_table[sym_table_count-1].addr;
848 last = sym_table + sym_table_count++;
849
850 last->addr = -1ll;
851 last->sym = "<end>";
852
853 if (filter_end) {
854 int count;
855 for (count=0; count < sym_table_count; count ++) {
856 if (!strcmp(sym_table[count].sym, sym_filter)) {
857 sym_filter_entry = &sym_table[count];
858 break;
859 }
860 }
861 }
862 if (dump_symtab) {
863 int i;
864
865 for (i = 0; i < sym_table_count; i++)
866 fprintf(stderr, "%llx %s\n",
867 sym_table[i].addr, sym_table[i].sym);
868 }
869}
870
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800871/*
872 * Source lines
873 */
Ingo Molnare0143ba2009-03-23 21:29:59 +0100874
875static void parse_vmlinux(char *filename)
876{
877 FILE *file;
878 char command[PATH_MAX*2];
879 if (!filename)
880 return;
881
882 sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename);
883
884 file = popen(command, "r");
885 if (!file)
886 return;
887
Paul Mackerrascbe46552009-03-24 16:52:34 +1100888 lines_tail = &lines;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100889 while (!feof(file)) {
890 struct source_line *src;
891 size_t dummy = 0;
892 char *c;
893
894 src = malloc(sizeof(struct source_line));
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800895 assert(src != NULL);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100896 memset(src, 0, sizeof(struct source_line));
897
898 if (getline(&src->line, &dummy, file) < 0)
899 break;
900 if (!src->line)
901 break;
902
903 c = strchr(src->line, '\n');
904 if (c)
905 *c = 0;
906
Paul Mackerrascbe46552009-03-24 16:52:34 +1100907 src->next = NULL;
908 *lines_tail = src;
909 lines_tail = &src->next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100910
911 if (strlen(src->line)>8 && src->line[8] == ':')
912 src->EIP = strtoull(src->line, NULL, 16);
913 if (strlen(src->line)>8 && src->line[16] == ':')
914 src->EIP = strtoull(src->line, NULL, 16);
915 }
916 pclose(file);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100917}
918
919static void record_precise_ip(uint64_t ip)
920{
921 struct source_line *line;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100922
Paul Mackerrascbe46552009-03-24 16:52:34 +1100923 for (line = lines; line; line = line->next) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100924 if (line->EIP == ip)
925 line->count++;
926 if (line->EIP > ip)
927 break;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100928 }
929}
930
931static void lookup_sym_in_vmlinux(struct sym_entry *sym)
932{
933 struct source_line *line;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100934 char pattern[PATH_MAX];
935 sprintf(pattern, "<%s>:", sym->sym);
936
Paul Mackerrascbe46552009-03-24 16:52:34 +1100937 for (line = lines; line; line = line->next) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100938 if (strstr(line->line, pattern)) {
Paul Mackerrascbe46552009-03-24 16:52:34 +1100939 sym->source = line;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100940 break;
941 }
Ingo Molnare0143ba2009-03-23 21:29:59 +0100942 }
943}
944
Paul Mackerrascbe46552009-03-24 16:52:34 +1100945static void show_lines(struct source_line *line_queue, int line_queue_count)
Ingo Molnare0143ba2009-03-23 21:29:59 +0100946{
947 int i;
948 struct source_line *line;
949
Paul Mackerrascbe46552009-03-24 16:52:34 +1100950 line = line_queue;
951 for (i = 0; i < line_queue_count; i++) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100952 printf("%8li\t%s\n", line->count, line->line);
Paul Mackerrascbe46552009-03-24 16:52:34 +1100953 line = line->next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100954 }
955}
956
957#define TRACE_COUNT 3
958
959static void show_details(struct sym_entry *sym)
960{
961 struct source_line *line;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100962 struct source_line *line_queue = NULL;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100963 int displayed = 0;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100964 int line_queue_count = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100965
966 if (!sym->source)
967 lookup_sym_in_vmlinux(sym);
968 if (!sym->source)
969 return;
970
971 printf("Showing details for %s\n", sym->sym);
972
Paul Mackerrascbe46552009-03-24 16:52:34 +1100973 line = sym->source;
974 while (line) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100975 if (displayed && strstr(line->line, ">:"))
976 break;
977
Paul Mackerrascbe46552009-03-24 16:52:34 +1100978 if (!line_queue_count)
979 line_queue = line;
980 line_queue_count ++;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100981
982 if (line->count >= count_filter) {
Paul Mackerrascbe46552009-03-24 16:52:34 +1100983 show_lines(line_queue, line_queue_count);
984 line_queue_count = 0;
985 line_queue = NULL;
986 } else if (line_queue_count > TRACE_COUNT) {
987 line_queue = line_queue->next;
988 line_queue_count --;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100989 }
990
991 line->count = 0;
992 displayed++;
993 if (displayed > 300)
994 break;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100995 line = line->next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100996 }
997}
998
999/*
1000 * Binary search in the histogram table and record the hit:
1001 */
1002static void record_ip(uint64_t ip, int counter)
1003{
1004 int left_idx, middle_idx, right_idx, idx;
1005 unsigned long left, middle, right;
1006
1007 record_precise_ip(ip);
1008
1009 left_idx = 0;
1010 right_idx = sym_table_count-1;
1011 assert(ip <= max_ip && ip >= min_ip);
1012
1013 while (left_idx + 1 < right_idx) {
1014 middle_idx = (left_idx + right_idx) / 2;
1015
1016 left = sym_table[ left_idx].addr;
1017 middle = sym_table[middle_idx].addr;
1018 right = sym_table[ right_idx].addr;
1019
1020 if (!(left <= middle && middle <= right)) {
1021 printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right);
1022 printf("%d %d %d\n", left_idx, middle_idx, right_idx);
1023 }
1024 assert(left <= middle && middle <= right);
1025 if (!(left <= ip && ip <= right)) {
1026 printf(" left: %016lx\n", left);
Ingo Molnar193e8df2009-03-23 22:23:16 +01001027 printf(" ip: %016lx\n", (unsigned long)ip);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001028 printf("right: %016lx\n", right);
1029 }
1030 assert(left <= ip && ip <= right);
1031 /*
1032 * [ left .... target .... middle .... right ]
1033 * => right := middle
1034 */
1035 if (ip < middle) {
1036 right_idx = middle_idx;
1037 continue;
1038 }
1039 /*
1040 * [ left .... middle ... target ... right ]
1041 * => left := middle
1042 */
1043 left_idx = middle_idx;
1044 }
1045
1046 idx = left_idx;
1047
1048 if (!sym_table[idx].skip)
1049 sym_table[idx].count[counter]++;
1050 else events--;
1051}
1052
1053static void process_event(uint64_t ip, int counter)
1054{
1055 events++;
1056
1057 if (ip < min_ip || ip > max_ip) {
1058 userspace_events++;
1059 return;
1060 }
1061
1062 record_ip(ip, counter);
1063}
1064
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001065static void process_options(int argc, char *argv[])
1066{
1067 int error = 0, counter;
1068
1069 if (strstr(argv[0], "perfstat"))
1070 run_perfstat = 1;
1071
1072 for (;;) {
1073 int option_index = 0;
1074 /** Options for getopt */
1075 static struct option long_options[] = {
1076 {"count", required_argument, NULL, 'c'},
1077 {"cpu", required_argument, NULL, 'C'},
1078 {"delay", required_argument, NULL, 'd'},
1079 {"dump_symtab", no_argument, NULL, 'D'},
1080 {"event", required_argument, NULL, 'e'},
1081 {"filter", required_argument, NULL, 'f'},
1082 {"group", required_argument, NULL, 'g'},
1083 {"help", no_argument, NULL, 'h'},
Paul Mackerras31f004d2009-03-30 19:07:10 +02001084 {"scale", no_argument, NULL, 'l'},
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001085 {"nmi", required_argument, NULL, 'n'},
1086 {"pid", required_argument, NULL, 'p'},
1087 {"vmlinux", required_argument, NULL, 'x'},
1088 {"symbol", required_argument, NULL, 's'},
1089 {"stat", no_argument, NULL, 'S'},
1090 {"zero", no_argument, NULL, 'z'},
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001091 {"mmap_pages", required_argument, NULL, 'm'},
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001092 {"mmap_info", no_argument, NULL, 'M'},
1093 {"munmap_info", no_argument, NULL, 'U'},
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001094 {NULL, 0, NULL, 0 }
1095 };
Paul Mackerras31f004d2009-03-30 19:07:10 +02001096 int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:s:Sx:zMU",
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001097 long_options, &option_index);
1098 if (c == -1)
1099 break;
1100
1101 switch (c) {
1102 case 'a': system_wide = 1; break;
Peter Zijlstra803d4f32009-03-23 18:22:11 +01001103 case 'c': default_interval = atoi(optarg); break;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001104 case 'C':
1105 /* CPU and PID are mutually exclusive */
1106 if (tid != -1) {
1107 printf("WARNING: CPU switch overriding PID\n");
1108 sleep(1);
1109 tid = -1;
1110 }
1111 profile_cpu = atoi(optarg); break;
1112 case 'd': delay_secs = atoi(optarg); break;
1113 case 'D': dump_symtab = 1; break;
1114
1115 case 'e': error = parse_events(optarg); break;
1116
1117 case 'f': count_filter = atoi(optarg); break;
1118 case 'g': group = atoi(optarg); break;
1119 case 'h': display_help(); break;
Paul Mackerras31f004d2009-03-30 19:07:10 +02001120 case 'l': scale = 1; break;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001121 case 'n': nmi = atoi(optarg); break;
1122 case 'p':
1123 /* CPU and PID are mutually exclusive */
1124 if (profile_cpu != -1) {
1125 printf("WARNING: PID switch overriding CPU\n");
1126 sleep(1);
1127 profile_cpu = -1;
1128 }
1129 tid = atoi(optarg); break;
1130 case 's': sym_filter = strdup(optarg); break;
1131 case 'S': run_perfstat = 1; break;
1132 case 'x': vmlinux = strdup(optarg); break;
1133 case 'z': zero = 1; break;
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001134 case 'm': mmap_pages = atoi(optarg); break;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001135 case 'M': use_mmap = 1; break;
1136 case 'U': use_munmap = 1; break;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001137 default: error = 1; break;
1138 }
1139 }
1140 if (error)
1141 display_help();
1142
1143 if (!nr_counters) {
1144 if (run_perfstat)
1145 nr_counters = 8;
1146 else {
1147 nr_counters = 1;
1148 event_id[0] = 0;
1149 }
1150 }
1151
1152 for (counter = 0; counter < nr_counters; counter++) {
1153 if (event_count[counter])
1154 continue;
1155
Peter Zijlstra803d4f32009-03-23 18:22:11 +01001156 event_count[counter] = default_interval;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001157 }
1158}
1159
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001160struct mmap_data {
1161 int counter;
1162 void *base;
1163 unsigned int mask;
1164 unsigned int prev;
1165};
1166
1167static unsigned int mmap_read_head(struct mmap_data *md)
1168{
1169 struct perf_counter_mmap_page *pc = md->base;
Peter Zijlstra19556432009-03-30 19:07:04 +02001170 int head;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001171
1172 head = pc->data_head;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001173 rmb();
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001174
1175 return head;
1176}
1177
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001178struct timeval last_read, this_read;
1179
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001180static void mmap_read(struct mmap_data *md)
1181{
1182 unsigned int head = mmap_read_head(md);
1183 unsigned int old = md->prev;
1184 unsigned char *data = md->base + page_size;
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001185 int diff;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001186
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001187 gettimeofday(&this_read, NULL);
1188
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001189 /*
1190 * If we're further behind than half the buffer, there's a chance
1191 * the writer will bite our tail and screw up the events under us.
1192 *
1193 * If we somehow ended up ahead of the head, we got messed up.
1194 *
1195 * In either case, truncate and restart at head.
1196 */
1197 diff = head - old;
1198 if (diff > md->mask / 2 || diff < 0) {
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001199 struct timeval iv;
1200 unsigned long msecs;
1201
1202 timersub(&this_read, &last_read, &iv);
1203 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
1204
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001205 fprintf(stderr, "WARNING: failed to keep up with mmap data."
1206 " Last read %lu msecs ago.\n", msecs);
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001207
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001208 /*
1209 * head points to a known good entry, start there.
1210 */
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001211 old = head;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001212 }
1213
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001214 last_read = this_read;
1215
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001216 for (; old != head;) {
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001217 struct ip_event {
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001218 struct perf_event_header header;
1219 __u64 ip;
1220 __u32 pid, tid;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001221 };
1222 struct mmap_event {
1223 struct perf_event_header header;
1224 __u32 pid, tid;
1225 __u64 start;
1226 __u64 len;
1227 __u64 pgoff;
1228 char filename[PATH_MAX];
1229 };
1230
1231 typedef union event_union {
1232 struct perf_event_header header;
1233 struct ip_event ip;
1234 struct mmap_event mmap;
1235 } event_t;
1236
1237 event_t *event = (event_t *)&data[old & md->mask];
1238
1239 event_t event_copy;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001240
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001241 unsigned int size = event->header.size;
1242
1243 /*
1244 * Event straddles the mmap boundary -- header should always
1245 * be inside due to u64 alignment of output.
1246 */
1247 if ((old & md->mask) + size != ((old + size) & md->mask)) {
1248 unsigned int offset = old;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001249 unsigned int len = min(sizeof(*event), size), cpy;
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001250 void *dst = &event_copy;
1251
1252 do {
1253 cpy = min(md->mask + 1 - (offset & md->mask), len);
1254 memcpy(dst, &data[offset & md->mask], cpy);
1255 offset += cpy;
1256 dst += cpy;
1257 len -= cpy;
1258 } while (len);
1259
1260 event = &event_copy;
1261 }
1262
1263 old += size;
1264
1265 switch (event->header.type) {
1266 case PERF_EVENT_IP:
1267 case PERF_EVENT_IP | __PERF_EVENT_TID:
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001268 process_event(event->ip.ip, md->counter);
1269 break;
1270
1271 case PERF_EVENT_MMAP:
1272 case PERF_EVENT_MUNMAP:
1273 printf("%s: %Lu %Lu %Lu %s\n",
1274 event->header.type == PERF_EVENT_MMAP
1275 ? "mmap" : "munmap",
1276 event->mmap.start,
1277 event->mmap.len,
1278 event->mmap.pgoff,
1279 event->mmap.filename);
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001280 break;
1281 }
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001282 }
1283
1284 md->prev = old;
1285}
1286
Ingo Molnare0143ba2009-03-23 21:29:59 +01001287int main(int argc, char *argv[])
1288{
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001289 struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001290 struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
Ingo Molnare0143ba2009-03-23 21:29:59 +01001291 struct perf_counter_hw_event hw_event;
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001292 int i, counter, group_fd, nr_poll = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001293 unsigned int cpu;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001294 int ret;
1295
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001296 page_size = sysconf(_SC_PAGE_SIZE);
1297
Ingo Molnare0143ba2009-03-23 21:29:59 +01001298 process_options(argc, argv);
1299
1300 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001301 assert(nr_cpus <= MAX_NR_CPUS);
1302 assert(nr_cpus >= 0);
1303
1304 if (run_perfstat)
1305 return do_perfstat(argc, argv);
1306
Ingo Molnare0143ba2009-03-23 21:29:59 +01001307 if (tid != -1 || profile_cpu != -1)
1308 nr_cpus = 1;
1309
Paul Mackerrascbe46552009-03-24 16:52:34 +11001310 parse_symbols();
1311 if (vmlinux && sym_filter_entry)
1312 parse_vmlinux(vmlinux);
1313
Ingo Molnare0143ba2009-03-23 21:29:59 +01001314 for (i = 0; i < nr_cpus; i++) {
1315 group_fd = -1;
1316 for (counter = 0; counter < nr_counters; counter++) {
1317
1318 cpu = profile_cpu;
1319 if (tid == -1 && profile_cpu == -1)
1320 cpu = i;
1321
1322 memset(&hw_event, 0, sizeof(hw_event));
Peter Zijlstra803d4f32009-03-23 18:22:11 +01001323 hw_event.config = event_id[counter];
Ingo Molnare0143ba2009-03-23 21:29:59 +01001324 hw_event.irq_period = event_count[counter];
1325 hw_event.record_type = PERF_RECORD_IRQ;
1326 hw_event.nmi = nmi;
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001327 hw_event.include_tid = 1;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001328 hw_event.mmap = use_mmap;
1329 hw_event.munmap = use_munmap;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001330
Ingo Molnare0143ba2009-03-23 21:29:59 +01001331 fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001332 if (fd[i][counter] < 0) {
Paul Mackerrascbe46552009-03-24 16:52:34 +11001333 int err = errno;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001334 printf("kerneltop error: syscall returned with %d (%s)\n",
Paul Mackerrascbe46552009-03-24 16:52:34 +11001335 fd[i][counter], strerror(err));
1336 if (err == EPERM)
Ingo Molnare0143ba2009-03-23 21:29:59 +01001337 printf("Are you root?\n");
1338 exit(-1);
1339 }
1340 assert(fd[i][counter] >= 0);
Paul Mackerrascbe46552009-03-24 16:52:34 +11001341 fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001342
1343 /*
1344 * First counter acts as the group leader:
1345 */
1346 if (group && group_fd == -1)
1347 group_fd = fd[i][counter];
1348
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001349 event_array[nr_poll].fd = fd[i][counter];
1350 event_array[nr_poll].events = POLLIN;
1351 nr_poll++;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001352
1353 mmap_array[i][counter].counter = counter;
1354 mmap_array[i][counter].prev = 0;
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001355 mmap_array[i][counter].mask = mmap_pages*page_size - 1;
1356 mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001357 PROT_READ, MAP_SHARED, fd[i][counter], 0);
1358 if (mmap_array[i][counter].base == MAP_FAILED) {
1359 printf("kerneltop error: failed to mmap with %d (%s)\n",
1360 errno, strerror(errno));
1361 exit(-1);
1362 }
Ingo Molnare0143ba2009-03-23 21:29:59 +01001363 }
1364 }
1365
Ingo Molnare0143ba2009-03-23 21:29:59 +01001366 printf("KernelTop refresh period: %d seconds\n", delay_secs);
1367 last_refresh = time(NULL);
1368
1369 while (1) {
1370 int hits = events;
1371
1372 for (i = 0; i < nr_cpus; i++) {
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001373 for (counter = 0; counter < nr_counters; counter++)
1374 mmap_read(&mmap_array[i][counter]);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001375 }
1376
1377 if (time(NULL) >= last_refresh + delay_secs) {
1378 print_sym_table();
1379 events = userspace_events = 0;
1380 }
1381
1382 if (hits == events)
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001383 ret = poll(event_array, nr_poll, 1000);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001384 hits = events;
1385 }
1386
1387 return 0;
1388}