blob: 15f3a5f90198982e853074d6a16b8305f63a6678 [file] [log] [blame]
Ingo Molnare0143ba2009-03-23 21:29:59 +01001/*
2 * kerneltop.c: show top kernel functions - performance counters showcase
3
4 Build with:
5
Paul Mackerrascbe46552009-03-24 16:52:34 +11006 cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
Ingo Molnare0143ba2009-03-23 21:29:59 +01007
8 Sample output:
9
10------------------------------------------------------------------------------
11 KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2)
12------------------------------------------------------------------------------
13
14 weight RIP kernel function
15 ______ ________________ _______________
16
17 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
18 33.00 - ffffffff804cb740 : sock_alloc_send_skb
19 31.26 - ffffffff804ce808 : skb_push
20 22.43 - ffffffff80510004 : tcp_established_options
21 19.00 - ffffffff8027d250 : find_get_page
22 15.76 - ffffffff804e4fc9 : eth_type_trans
23 15.20 - ffffffff804d8baa : dst_release
24 14.86 - ffffffff804cf5d8 : skb_release_head_state
25 14.00 - ffffffff802217d5 : read_hpet
26 12.00 - ffffffff804ffb7f : __ip_local_out
27 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
28 8.54 - ffffffff805001a3 : ip_queue_xmit
Ingo Molnare0143ba2009-03-23 21:29:59 +010029 */
Wu Fengguangf7524bd2009-03-20 10:08:06 +080030
31/*
32 * perfstat: /usr/bin/time -alike performance counter statistics utility
33
34 It summarizes the counter events of all tasks (and child tasks),
35 covering all CPUs that the command (or workload) executes on.
36 It only counts the per-task events of the workload started,
37 independent of how many other tasks run on those CPUs.
38
39 Sample output:
40
41 $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
42
43 Performance counter stats for 'ls':
44
45 163516953 instructions
46 2295 cache-misses
47 2855182 branch-misses
48 */
49
50 /*
51 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
52 *
53 * Improvements and fixes by:
54 *
55 * Arjan van de Ven <arjan@linux.intel.com>
56 * Yanmin Zhang <yanmin.zhang@intel.com>
57 * Wu Fengguang <fengguang.wu@intel.com>
58 * Mike Galbraith <efault@gmx.de>
Paul Mackerrascbe46552009-03-24 16:52:34 +110059 * Paul Mackerras <paulus@samba.org>
Wu Fengguangf7524bd2009-03-20 10:08:06 +080060 *
61 * Released under the GPL v2. (and only v2, not any later version)
62 */
63
Ingo Molnare0143ba2009-03-23 21:29:59 +010064#define _GNU_SOURCE
65#include <sys/types.h>
66#include <sys/stat.h>
67#include <sys/time.h>
68#include <unistd.h>
69#include <stdint.h>
70#include <stdlib.h>
71#include <string.h>
Paul Mackerrascbe46552009-03-24 16:52:34 +110072#include <limits.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010073#include <getopt.h>
74#include <assert.h>
75#include <fcntl.h>
76#include <stdio.h>
77#include <errno.h>
78#include <ctype.h>
79#include <time.h>
Mike Galbraith9dd49982009-03-27 12:13:43 +010080#include <sched.h>
81#include <pthread.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010082
Ingo Molnare0143ba2009-03-23 21:29:59 +010083#include <sys/syscall.h>
84#include <sys/ioctl.h>
85#include <sys/poll.h>
86#include <sys/prctl.h>
87#include <sys/wait.h>
88#include <sys/uio.h>
Peter Zijlstrabcbcb372009-03-23 18:22:12 +010089#include <sys/mman.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010090
91#include <linux/unistd.h>
Paul Mackerrascbe46552009-03-24 16:52:34 +110092#include <linux/types.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010093
Ingo Molnar383c5f82009-03-23 21:49:25 +010094#include "../../include/linux/perf_counter.h"
Ingo Molnare0143ba2009-03-23 21:29:59 +010095
Wu Fengguangf7524bd2009-03-20 10:08:06 +080096
Peter Zijlstra803d4f32009-03-23 18:22:11 +010097/*
98 * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
99 * counters in the current task.
100 */
101#define PR_TASK_PERF_COUNTERS_DISABLE 31
102#define PR_TASK_PERF_COUNTERS_ENABLE 32
103
104#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
105
106#define rdclock() \
107({ \
108 struct timespec ts; \
109 \
110 clock_gettime(CLOCK_MONOTONIC, &ts); \
111 ts.tv_sec * 1000000000ULL + ts.tv_nsec; \
112})
113
114/*
115 * Pick up some kernel type conventions:
116 */
117#define __user
118#define asmlinkage
119
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100120#ifdef __x86_64__
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100121#define __NR_perf_counter_open 295
122#define rmb() asm volatile("lfence" ::: "memory")
123#define cpu_relax() asm volatile("rep; nop" ::: "memory");
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100124#endif
125
126#ifdef __i386__
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100127#define __NR_perf_counter_open 333
128#define rmb() asm volatile("lfence" ::: "memory")
129#define cpu_relax() asm volatile("rep; nop" ::: "memory");
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100130#endif
131
132#ifdef __powerpc__
133#define __NR_perf_counter_open 319
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100134#define rmb() asm volatile ("sync" ::: "memory")
135#define cpu_relax() asm volatile ("" ::: "memory");
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100136#endif
137
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100138#define unlikely(x) __builtin_expect(!!(x), 0)
Peter Zijlstra00f0ad72009-03-25 12:30:27 +0100139#define min(x, y) ({ \
140 typeof(x) _min1 = (x); \
141 typeof(y) _min2 = (y); \
142 (void) (&_min1 == &_min2); \
143 _min1 < _min2 ? _min1 : _min2; })
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100144
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100145asmlinkage int sys_perf_counter_open(
146 struct perf_counter_hw_event *hw_event_uptr __user,
147 pid_t pid,
148 int cpu,
149 int group_fd,
150 unsigned long flags)
151{
Paul Mackerrascbe46552009-03-24 16:52:34 +1100152 return syscall(
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100153 __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100154}
155
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800156#define MAX_COUNTERS 64
157#define MAX_NR_CPUS 256
158
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100159#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800160
161static int run_perfstat = 0;
162static int system_wide = 0;
163
164static int nr_counters = 0;
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100165static __u64 event_id[MAX_COUNTERS] = {
166 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
Paul Mackerrascbe46552009-03-24 16:52:34 +1100167 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100168 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
169 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
170
171 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
172 EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
173 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
174 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
175};
176static int default_interval = 100000;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800177static int event_count[MAX_COUNTERS];
178static int fd[MAX_NR_CPUS][MAX_COUNTERS];
Ingo Molnare0143ba2009-03-23 21:29:59 +0100179
Ingo Molnare0143ba2009-03-23 21:29:59 +0100180static __u64 count_filter = 100;
181
Ingo Molnare0143ba2009-03-23 21:29:59 +0100182static int tid = -1;
183static int profile_cpu = -1;
184static int nr_cpus = 0;
185static int nmi = 1;
Mike Galbraith9dd49982009-03-27 12:13:43 +0100186static unsigned int realtime_prio = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100187static int group = 0;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100188static unsigned int page_size;
Peter Zijlstra00f0ad72009-03-25 12:30:27 +0100189static unsigned int mmap_pages = 16;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +0200190static int use_mmap = 0;
191static int use_munmap = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100192
193static char *vmlinux;
194
195static char *sym_filter;
196static unsigned long filter_start;
197static unsigned long filter_end;
198
199static int delay_secs = 2;
200static int zero;
201static int dump_symtab;
202
Paul Mackerras31f004d2009-03-30 19:07:10 +0200203static int scale;
204
Ingo Molnare0143ba2009-03-23 21:29:59 +0100205struct source_line {
206 uint64_t EIP;
207 unsigned long count;
208 char *line;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100209 struct source_line *next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100210};
211
Paul Mackerrascbe46552009-03-24 16:52:34 +1100212static struct source_line *lines;
213static struct source_line **lines_tail;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800214
215const unsigned int default_count[] = {
Ingo Molnar81cdbe02009-03-23 22:29:50 +0100216 1000000,
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800217 1000000,
218 10000,
219 10000,
220 1000000,
221 10000,
222};
223
224static char *hw_event_names[] = {
225 "CPU cycles",
226 "instructions",
227 "cache references",
228 "cache misses",
229 "branches",
230 "branch misses",
231 "bus cycles",
232};
233
234static char *sw_event_names[] = {
235 "cpu clock ticks",
236 "task clock ticks",
237 "pagefaults",
238 "context switches",
239 "CPU migrations",
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100240 "minor faults",
241 "major faults",
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800242};
243
244struct event_symbol {
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100245 __u64 event;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800246 char *symbol;
247};
248
249static struct event_symbol event_symbols[] = {
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100250 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", },
251 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", },
252 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", },
253 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", },
254 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", },
255 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", },
256 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", },
257 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", },
258 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", },
259
260 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", },
261 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", },
262 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", },
263 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", },
264 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", },
265 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", },
266 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", },
267 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", },
268 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", },
269 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", },
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800270};
271
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100272#define __PERF_COUNTER_FIELD(config, name) \
273 ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
274
275#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW)
276#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG)
277#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE)
278#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT)
279
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800280static void display_events_help(void)
281{
282 unsigned int i;
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100283 __u64 e;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800284
285 printf(
286 " -e EVENT --event=EVENT # symbolic-name abbreviations");
287
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100288 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
289 int type, id;
290
291 e = event_symbols[i].event;
292 type = PERF_COUNTER_TYPE(e);
293 id = PERF_COUNTER_ID(e);
294
295 printf("\n %d:%d: %-20s",
296 type, id, event_symbols[i].symbol);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800297 }
298
299 printf("\n"
300 " rNNN: raw PMU events (eventsel+umask)\n\n");
301}
302
303static void display_perfstat_help(void)
304{
305 printf(
306 "Usage: perfstat [<events...>] <cmd...>\n\n"
307 "PerfStat Options (up to %d event types can be specified):\n\n",
308 MAX_COUNTERS);
309
310 display_events_help();
311
312 printf(
Paul Mackerras31f004d2009-03-30 19:07:10 +0200313 " -l # scale counter values\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800314 " -a # system-wide collection\n");
315 exit(0);
316}
Ingo Molnare0143ba2009-03-23 21:29:59 +0100317
318static void display_help(void)
319{
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800320 if (run_perfstat)
321 return display_perfstat_help();
322
Ingo Molnare0143ba2009-03-23 21:29:59 +0100323 printf(
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800324 "Usage: kerneltop [<options>]\n"
325 " Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100326 "KernelTop Options (up to %d event types can be specified at once):\n\n",
327 MAX_COUNTERS);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800328
329 display_events_help();
330
Ingo Molnare0143ba2009-03-23 21:29:59 +0100331 printf(
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800332 " -S --stat # perfstat COMMAND\n"
333 " -a # system-wide collection (for perfstat)\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100334 " -c CNT --count=CNT # event period to sample\n\n"
335 " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n"
336 " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n"
Paul Mackerras31f004d2009-03-30 19:07:10 +0200337 " -l # show scale factor for RR events\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100338 " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800339 " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n"
Mike Galbraith9dd49982009-03-27 12:13:43 +0100340 " -r prio --realtime=<prio> # event acquisition runs with SCHED_FIFO policy\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100341 " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800342 " -x path --vmlinux=<path> # the vmlinux binary, required for -s use\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100343 " -z --zero # zero counts after display\n"
344 " -D --dump_symtab # dump symbol table to stderr on startup\n"
Peter Zijlstra4c4ba212009-03-25 12:30:26 +0100345 " -m pages --mmap_pages=<pages> # number of mmap data pages\n"
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +0200346 " -M --mmap_info # print mmap info stream\n"
347 " -U --munmap_info # print munmap info stream\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800348 );
Ingo Molnare0143ba2009-03-23 21:29:59 +0100349
350 exit(0);
351}
352
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800353static char *event_name(int ctr)
354{
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100355 __u64 config = event_id[ctr];
356 int type = PERF_COUNTER_TYPE(config);
357 int id = PERF_COUNTER_ID(config);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800358 static char buf[32];
359
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100360 if (PERF_COUNTER_RAW(config)) {
361 sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800362 return buf;
363 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800364
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100365 switch (type) {
366 case PERF_TYPE_HARDWARE:
367 if (id < PERF_HW_EVENTS_MAX)
368 return hw_event_names[id];
369 return "unknown-hardware";
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800370
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100371 case PERF_TYPE_SOFTWARE:
372 if (id < PERF_SW_EVENTS_MAX)
373 return sw_event_names[id];
374 return "unknown-software";
375
376 default:
377 break;
378 }
379
380 return "unknown";
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800381}
382
383/*
384 * Each event can have multiple symbolic names.
385 * Symbolic names are (almost) exactly matched.
386 */
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100387static __u64 match_event_symbols(char *str)
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800388{
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100389 __u64 config, id;
390 int type;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800391 unsigned int i;
392
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100393 if (sscanf(str, "r%llx", &config) == 1)
394 return config | PERF_COUNTER_RAW_MASK;
395
396 if (sscanf(str, "%d:%llu", &type, &id) == 2)
397 return EID(type, id);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800398
399 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
400 if (!strncmp(str, event_symbols[i].symbol,
401 strlen(event_symbols[i].symbol)))
402 return event_symbols[i].event;
403 }
404
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100405 return ~0ULL;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800406}
407
408static int parse_events(char *str)
409{
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100410 __u64 config;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800411
412again:
413 if (nr_counters == MAX_COUNTERS)
414 return -1;
415
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100416 config = match_event_symbols(str);
417 if (config == ~0ULL)
418 return -1;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800419
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100420 event_id[nr_counters] = config;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800421 nr_counters++;
422
423 str = strstr(str, ",");
424 if (str) {
425 str++;
426 goto again;
427 }
428
429 return 0;
430}
431
432
433/*
434 * perfstat
435 */
436
437char fault_here[1000000];
438
439static void create_perfstat_counter(int counter)
440{
441 struct perf_counter_hw_event hw_event;
442
443 memset(&hw_event, 0, sizeof(hw_event));
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100444 hw_event.config = event_id[counter];
Peter Zijlstra3df70fd2009-04-02 11:12:02 +0200445 hw_event.record_type = 0;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800446 hw_event.nmi = 0;
Paul Mackerras31f004d2009-03-30 19:07:10 +0200447 if (scale)
448 hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
449 PERF_FORMAT_TOTAL_TIME_RUNNING;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800450
451 if (system_wide) {
452 int cpu;
453 for (cpu = 0; cpu < nr_cpus; cpu ++) {
454 fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
455 if (fd[cpu][counter] < 0) {
456 printf("perfstat error: syscall returned with %d (%s)\n",
457 fd[cpu][counter], strerror(errno));
458 exit(-1);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100459 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800460 }
461 } else {
462 hw_event.inherit = 1;
463 hw_event.disabled = 1;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100464
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800465 fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
466 if (fd[0][counter] < 0) {
467 printf("perfstat error: syscall returned with %d (%s)\n",
468 fd[0][counter], strerror(errno));
469 exit(-1);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100470 }
471 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800472}
Ingo Molnare0143ba2009-03-23 21:29:59 +0100473
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800474int do_perfstat(int argc, char *argv[])
475{
476 unsigned long long t0, t1;
477 int counter;
478 ssize_t res;
479 int status;
480 int pid;
481
482 if (!system_wide)
483 nr_cpus = 1;
484
485 for (counter = 0; counter < nr_counters; counter++)
486 create_perfstat_counter(counter);
487
488 argc -= optind;
489 argv += optind;
490
Wu Fengguangaf9522c2009-03-20 10:08:10 +0800491 if (!argc)
492 display_help();
493
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800494 /*
495 * Enable counters and exec the command:
496 */
497 t0 = rdclock();
498 prctl(PR_TASK_PERF_COUNTERS_ENABLE);
499
500 if ((pid = fork()) < 0)
501 perror("failed to fork");
502 if (!pid) {
503 if (execvp(argv[0], argv)) {
504 perror(argv[0]);
505 exit(-1);
506 }
Wu Fengguang95bb3be2009-03-20 10:08:04 +0800507 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800508 while (wait(&status) >= 0)
509 ;
510 prctl(PR_TASK_PERF_COUNTERS_DISABLE);
511 t1 = rdclock();
512
513 fflush(stdout);
514
515 fprintf(stderr, "\n");
516 fprintf(stderr, " Performance counter stats for \'%s\':\n",
517 argv[0]);
518 fprintf(stderr, "\n");
Ingo Molnare0143ba2009-03-23 21:29:59 +0100519
520 for (counter = 0; counter < nr_counters; counter++) {
Paul Mackerras31f004d2009-03-30 19:07:10 +0200521 int cpu, nv;
522 __u64 count[3], single_count[3];
523 int scaled;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100524
Paul Mackerras31f004d2009-03-30 19:07:10 +0200525 count[0] = count[1] = count[2] = 0;
526 nv = scale ? 3 : 1;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800527 for (cpu = 0; cpu < nr_cpus; cpu ++) {
528 res = read(fd[cpu][counter],
Paul Mackerras31f004d2009-03-30 19:07:10 +0200529 single_count, nv * sizeof(__u64));
530 assert(res == nv * sizeof(__u64));
531
532 count[0] += single_count[0];
533 if (scale) {
534 count[1] += single_count[1];
535 count[2] += single_count[2];
536 }
537 }
538
539 scaled = 0;
540 if (scale) {
541 if (count[2] == 0) {
542 fprintf(stderr, " %14s %-20s\n",
543 "<not counted>", event_name(counter));
544 continue;
545 }
546 if (count[2] < count[1]) {
547 scaled = 1;
548 count[0] = (unsigned long long)
549 ((double)count[0] * count[1] / count[2] + 0.5);
550 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800551 }
552
Paul Mackerrascbe46552009-03-24 16:52:34 +1100553 if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
554 event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800555
Paul Mackerras31f004d2009-03-30 19:07:10 +0200556 double msecs = (double)count[0] / 1000000;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800557
Paul Mackerras31f004d2009-03-30 19:07:10 +0200558 fprintf(stderr, " %14.6f %-20s (msecs)",
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800559 msecs, event_name(counter));
560 } else {
Paul Mackerras31f004d2009-03-30 19:07:10 +0200561 fprintf(stderr, " %14Ld %-20s (events)",
562 count[0], event_name(counter));
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800563 }
Paul Mackerras31f004d2009-03-30 19:07:10 +0200564 if (scaled)
565 fprintf(stderr, " (scaled from %.2f%%)",
566 (double) count[2] / count[1] * 100);
567 fprintf(stderr, "\n");
Ingo Molnare0143ba2009-03-23 21:29:59 +0100568 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800569 fprintf(stderr, "\n");
570 fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
571 (double)(t1-t0)/1e6);
572 fprintf(stderr, "\n");
573
574 return 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100575}
576
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800577/*
578 * Symbols
579 */
580
Ingo Molnare0143ba2009-03-23 21:29:59 +0100581static uint64_t min_ip;
582static uint64_t max_ip = -1ll;
583
584struct sym_entry {
585 unsigned long long addr;
586 char *sym;
587 unsigned long count[MAX_COUNTERS];
588 int skip;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100589 struct source_line *source;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100590};
591
592#define MAX_SYMS 100000
593
594static int sym_table_count;
595
596struct sym_entry *sym_filter_entry;
597
598static struct sym_entry sym_table[MAX_SYMS];
599
600static void show_details(struct sym_entry *sym);
601
602/*
Wu Fengguangef45fa92009-03-20 10:08:07 +0800603 * Ordering weight: count-1 * count-2 * ... / count-n
Ingo Molnare0143ba2009-03-23 21:29:59 +0100604 */
605static double sym_weight(const struct sym_entry *sym)
606{
607 double weight;
608 int counter;
609
610 weight = sym->count[0];
611
612 for (counter = 1; counter < nr_counters-1; counter++)
613 weight *= sym->count[counter];
614
615 weight /= (sym->count[counter] + 1);
616
617 return weight;
618}
619
620static int compare(const void *__sym1, const void *__sym2)
621{
622 const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
623
624 return sym_weight(sym1) < sym_weight(sym2);
625}
626
Ingo Molnare0143ba2009-03-23 21:29:59 +0100627static long events;
628static long userspace_events;
629static const char CONSOLE_CLEAR[] = "";
630
631static struct sym_entry tmp[MAX_SYMS];
632
633static void print_sym_table(void)
634{
635 int i, printed;
636 int counter;
637 float events_per_sec = events/delay_secs;
638 float kevents_per_sec = (events-userspace_events)/delay_secs;
Mike Galbraith6278af62009-04-02 10:40:28 +0200639 float sum_kevents = 0.0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100640
Mike Galbraith9dd49982009-03-27 12:13:43 +0100641 events = userspace_events = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100642 memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count);
643 qsort(tmp, sym_table_count, sizeof(tmp[0]), compare);
644
Mike Galbraith6278af62009-04-02 10:40:28 +0200645 for (i = 0; i < sym_table_count && tmp[i].count[0]; i++)
646 sum_kevents += tmp[i].count[0];
647
Ingo Molnare0143ba2009-03-23 21:29:59 +0100648 write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
649
650 printf(
651"------------------------------------------------------------------------------\n");
Mike Galbraith6278af62009-04-02 10:40:28 +0200652 printf( " KernelTop:%8.0f irqs/sec kernel:%4.1f%% [%s, ",
Ingo Molnare0143ba2009-03-23 21:29:59 +0100653 events_per_sec,
654 100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)),
655 nmi ? "NMI" : "IRQ");
656
657 if (nr_counters == 1)
658 printf("%d ", event_count[0]);
659
660 for (counter = 0; counter < nr_counters; counter++) {
661 if (counter)
662 printf("/");
663
Wu Fengguange3908612009-03-20 10:08:05 +0800664 printf("%s", event_name(counter));
Ingo Molnare0143ba2009-03-23 21:29:59 +0100665 }
666
667 printf( "], ");
668
669 if (tid != -1)
670 printf(" (tid: %d", tid);
671 else
672 printf(" (all");
673
674 if (profile_cpu != -1)
675 printf(", cpu: %d)\n", profile_cpu);
676 else {
677 if (tid != -1)
678 printf(")\n");
679 else
680 printf(", %d CPUs)\n", nr_cpus);
681 }
682
683 printf("------------------------------------------------------------------------------\n\n");
684
685 if (nr_counters == 1)
Mike Galbraith6278af62009-04-02 10:40:28 +0200686 printf(" events pcnt");
Ingo Molnare0143ba2009-03-23 21:29:59 +0100687 else
Mike Galbraith6278af62009-04-02 10:40:28 +0200688 printf(" weight events pcnt");
Ingo Molnare0143ba2009-03-23 21:29:59 +0100689
690 printf(" RIP kernel function\n"
Mike Galbraith6278af62009-04-02 10:40:28 +0200691 " ______ ______ _____ ________________ _______________\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100692 );
693
Mike Galbraith6278af62009-04-02 10:40:28 +0200694 for (i = 0, printed = 0; i < sym_table_count; i++) {
695 float pcnt;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100696 int count;
697
Mike Galbraith6278af62009-04-02 10:40:28 +0200698 if (printed <= 18 && tmp[i].count[0] >= count_filter) {
699 pcnt = 100.0 - (100.0*((sum_kevents-tmp[i].count[0])/sum_kevents));
700
701 if (nr_counters == 1)
702 printf("%19.2f - %4.1f%% - %016llx : %s\n",
703 sym_weight(tmp + i),
704 pcnt, tmp[i].addr, tmp[i].sym);
705 else
706 printf("%8.1f %10ld - %4.1f%% - %016llx : %s\n",
707 sym_weight(tmp + i),
708 tmp[i].count[0],
709 pcnt, tmp[i].addr, tmp[i].sym);
710 printed++;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100711 }
712 /*
713 * Add decay to the counts:
714 */
715 for (count = 0; count < nr_counters; count++)
716 sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8;
717 }
718
719 if (sym_filter_entry)
720 show_details(sym_filter_entry);
721
Ingo Molnare0143ba2009-03-23 21:29:59 +0100722 {
723 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
724
725 if (poll(&stdin_poll, 1, 0) == 1) {
726 printf("key pressed - exiting.\n");
727 exit(0);
728 }
729 }
730}
731
Mike Galbraith9dd49982009-03-27 12:13:43 +0100732static void *display_thread(void *arg)
733{
734 printf("KernelTop refresh period: %d seconds\n", delay_secs);
735
736 while (!sleep(delay_secs))
737 print_sym_table();
738
739 return NULL;
740}
741
Ingo Molnare0143ba2009-03-23 21:29:59 +0100742static int read_symbol(FILE *in, struct sym_entry *s)
743{
744 static int filter_match = 0;
745 char *sym, stype;
746 char str[500];
747 int rc, pos;
748
749 rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str);
750 if (rc == EOF)
751 return -1;
752
753 assert(rc == 3);
754
755 /* skip until end of line: */
756 pos = strlen(str);
757 do {
758 rc = fgetc(in);
759 if (rc == '\n' || rc == EOF || pos >= 499)
760 break;
761 str[pos] = rc;
762 pos++;
763 } while (1);
764 str[pos] = 0;
765
766 sym = str;
767
768 /* Filter out known duplicates and non-text symbols. */
769 if (!strcmp(sym, "_text"))
770 return 1;
771 if (!min_ip && !strcmp(sym, "_stext"))
772 return 1;
773 if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext"))
774 return 1;
775 if (stype != 'T' && stype != 't')
776 return 1;
777 if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14))
778 return 1;
779 if (strstr(sym, "_text_start") || strstr(sym, "_text_end"))
780 return 1;
781
782 s->sym = malloc(strlen(str));
783 assert(s->sym);
784
785 strcpy((char *)s->sym, str);
786 s->skip = 0;
787
788 /* Tag events to be skipped. */
789 if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym))
790 s->skip = 1;
Peter Zijlstra4c4ba212009-03-25 12:30:26 +0100791 else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
792 s->skip = 1;
793 else if (!strcmp("mwait_idle", s->sym))
Ingo Molnare0143ba2009-03-23 21:29:59 +0100794 s->skip = 1;
795
796 if (filter_match == 1) {
797 filter_end = s->addr;
798 filter_match = -1;
799 if (filter_end - filter_start > 10000) {
800 printf("hm, too large filter symbol <%s> - skipping.\n",
801 sym_filter);
802 printf("symbol filter start: %016lx\n", filter_start);
803 printf(" end: %016lx\n", filter_end);
804 filter_end = filter_start = 0;
805 sym_filter = NULL;
806 sleep(1);
807 }
808 }
809 if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) {
810 filter_match = 1;
811 filter_start = s->addr;
812 }
813
814 return 0;
815}
816
817int compare_addr(const void *__sym1, const void *__sym2)
818{
819 const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
820
821 return sym1->addr > sym2->addr;
822}
823
824static void sort_symbol_table(void)
825{
826 int i, dups;
827
828 do {
829 qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr);
830 for (i = 0, dups = 0; i < sym_table_count; i++) {
831 if (sym_table[i].addr == sym_table[i+1].addr) {
832 sym_table[i+1].addr = -1ll;
833 dups++;
834 }
835 }
836 sym_table_count -= dups;
837 } while(dups);
838}
839
840static void parse_symbols(void)
841{
842 struct sym_entry *last;
843
844 FILE *kallsyms = fopen("/proc/kallsyms", "r");
845
846 if (!kallsyms) {
847 printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n");
848 exit(-1);
849 }
850
851 while (!feof(kallsyms)) {
852 if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) {
853 sym_table_count++;
854 assert(sym_table_count <= MAX_SYMS);
855 }
856 }
857
858 sort_symbol_table();
859 min_ip = sym_table[0].addr;
860 max_ip = sym_table[sym_table_count-1].addr;
861 last = sym_table + sym_table_count++;
862
863 last->addr = -1ll;
864 last->sym = "<end>";
865
866 if (filter_end) {
867 int count;
868 for (count=0; count < sym_table_count; count ++) {
869 if (!strcmp(sym_table[count].sym, sym_filter)) {
870 sym_filter_entry = &sym_table[count];
871 break;
872 }
873 }
874 }
875 if (dump_symtab) {
876 int i;
877
878 for (i = 0; i < sym_table_count; i++)
879 fprintf(stderr, "%llx %s\n",
880 sym_table[i].addr, sym_table[i].sym);
881 }
882}
883
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800884/*
885 * Source lines
886 */
Ingo Molnare0143ba2009-03-23 21:29:59 +0100887
888static void parse_vmlinux(char *filename)
889{
890 FILE *file;
891 char command[PATH_MAX*2];
892 if (!filename)
893 return;
894
895 sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename);
896
897 file = popen(command, "r");
898 if (!file)
899 return;
900
Paul Mackerrascbe46552009-03-24 16:52:34 +1100901 lines_tail = &lines;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100902 while (!feof(file)) {
903 struct source_line *src;
904 size_t dummy = 0;
905 char *c;
906
907 src = malloc(sizeof(struct source_line));
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800908 assert(src != NULL);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100909 memset(src, 0, sizeof(struct source_line));
910
911 if (getline(&src->line, &dummy, file) < 0)
912 break;
913 if (!src->line)
914 break;
915
916 c = strchr(src->line, '\n');
917 if (c)
918 *c = 0;
919
Paul Mackerrascbe46552009-03-24 16:52:34 +1100920 src->next = NULL;
921 *lines_tail = src;
922 lines_tail = &src->next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100923
924 if (strlen(src->line)>8 && src->line[8] == ':')
925 src->EIP = strtoull(src->line, NULL, 16);
926 if (strlen(src->line)>8 && src->line[16] == ':')
927 src->EIP = strtoull(src->line, NULL, 16);
928 }
929 pclose(file);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100930}
931
932static void record_precise_ip(uint64_t ip)
933{
934 struct source_line *line;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100935
Paul Mackerrascbe46552009-03-24 16:52:34 +1100936 for (line = lines; line; line = line->next) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100937 if (line->EIP == ip)
938 line->count++;
939 if (line->EIP > ip)
940 break;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100941 }
942}
943
944static void lookup_sym_in_vmlinux(struct sym_entry *sym)
945{
946 struct source_line *line;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100947 char pattern[PATH_MAX];
948 sprintf(pattern, "<%s>:", sym->sym);
949
Paul Mackerrascbe46552009-03-24 16:52:34 +1100950 for (line = lines; line; line = line->next) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100951 if (strstr(line->line, pattern)) {
Paul Mackerrascbe46552009-03-24 16:52:34 +1100952 sym->source = line;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100953 break;
954 }
Ingo Molnare0143ba2009-03-23 21:29:59 +0100955 }
956}
957
Paul Mackerrascbe46552009-03-24 16:52:34 +1100958static void show_lines(struct source_line *line_queue, int line_queue_count)
Ingo Molnare0143ba2009-03-23 21:29:59 +0100959{
960 int i;
961 struct source_line *line;
962
Paul Mackerrascbe46552009-03-24 16:52:34 +1100963 line = line_queue;
964 for (i = 0; i < line_queue_count; i++) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100965 printf("%8li\t%s\n", line->count, line->line);
Paul Mackerrascbe46552009-03-24 16:52:34 +1100966 line = line->next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100967 }
968}
969
970#define TRACE_COUNT 3
971
972static void show_details(struct sym_entry *sym)
973{
974 struct source_line *line;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100975 struct source_line *line_queue = NULL;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100976 int displayed = 0;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100977 int line_queue_count = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100978
979 if (!sym->source)
980 lookup_sym_in_vmlinux(sym);
981 if (!sym->source)
982 return;
983
984 printf("Showing details for %s\n", sym->sym);
985
Paul Mackerrascbe46552009-03-24 16:52:34 +1100986 line = sym->source;
987 while (line) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100988 if (displayed && strstr(line->line, ">:"))
989 break;
990
Paul Mackerrascbe46552009-03-24 16:52:34 +1100991 if (!line_queue_count)
992 line_queue = line;
993 line_queue_count ++;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100994
995 if (line->count >= count_filter) {
Paul Mackerrascbe46552009-03-24 16:52:34 +1100996 show_lines(line_queue, line_queue_count);
997 line_queue_count = 0;
998 line_queue = NULL;
999 } else if (line_queue_count > TRACE_COUNT) {
1000 line_queue = line_queue->next;
1001 line_queue_count --;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001002 }
1003
1004 line->count = 0;
1005 displayed++;
1006 if (displayed > 300)
1007 break;
Paul Mackerrascbe46552009-03-24 16:52:34 +11001008 line = line->next;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001009 }
1010}
1011
1012/*
1013 * Binary search in the histogram table and record the hit:
1014 */
1015static void record_ip(uint64_t ip, int counter)
1016{
1017 int left_idx, middle_idx, right_idx, idx;
1018 unsigned long left, middle, right;
1019
1020 record_precise_ip(ip);
1021
1022 left_idx = 0;
1023 right_idx = sym_table_count-1;
1024 assert(ip <= max_ip && ip >= min_ip);
1025
1026 while (left_idx + 1 < right_idx) {
1027 middle_idx = (left_idx + right_idx) / 2;
1028
1029 left = sym_table[ left_idx].addr;
1030 middle = sym_table[middle_idx].addr;
1031 right = sym_table[ right_idx].addr;
1032
1033 if (!(left <= middle && middle <= right)) {
1034 printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right);
1035 printf("%d %d %d\n", left_idx, middle_idx, right_idx);
1036 }
1037 assert(left <= middle && middle <= right);
1038 if (!(left <= ip && ip <= right)) {
1039 printf(" left: %016lx\n", left);
Ingo Molnar193e8df2009-03-23 22:23:16 +01001040 printf(" ip: %016lx\n", (unsigned long)ip);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001041 printf("right: %016lx\n", right);
1042 }
1043 assert(left <= ip && ip <= right);
1044 /*
1045 * [ left .... target .... middle .... right ]
1046 * => right := middle
1047 */
1048 if (ip < middle) {
1049 right_idx = middle_idx;
1050 continue;
1051 }
1052 /*
1053 * [ left .... middle ... target ... right ]
1054 * => left := middle
1055 */
1056 left_idx = middle_idx;
1057 }
1058
1059 idx = left_idx;
1060
1061 if (!sym_table[idx].skip)
1062 sym_table[idx].count[counter]++;
1063 else events--;
1064}
1065
1066static void process_event(uint64_t ip, int counter)
1067{
1068 events++;
1069
1070 if (ip < min_ip || ip > max_ip) {
1071 userspace_events++;
1072 return;
1073 }
1074
1075 record_ip(ip, counter);
1076}
1077
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001078static void process_options(int argc, char *argv[])
1079{
1080 int error = 0, counter;
1081
1082 if (strstr(argv[0], "perfstat"))
1083 run_perfstat = 1;
1084
1085 for (;;) {
1086 int option_index = 0;
1087 /** Options for getopt */
1088 static struct option long_options[] = {
1089 {"count", required_argument, NULL, 'c'},
1090 {"cpu", required_argument, NULL, 'C'},
1091 {"delay", required_argument, NULL, 'd'},
1092 {"dump_symtab", no_argument, NULL, 'D'},
1093 {"event", required_argument, NULL, 'e'},
1094 {"filter", required_argument, NULL, 'f'},
1095 {"group", required_argument, NULL, 'g'},
1096 {"help", no_argument, NULL, 'h'},
1097 {"nmi", required_argument, NULL, 'n'},
Mike Galbraith9dd49982009-03-27 12:13:43 +01001098 {"mmap_info", no_argument, NULL, 'M'},
1099 {"mmap_pages", required_argument, NULL, 'm'},
1100 {"munmap_info", no_argument, NULL, 'U'},
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001101 {"pid", required_argument, NULL, 'p'},
Mike Galbraith9dd49982009-03-27 12:13:43 +01001102 {"realtime", required_argument, NULL, 'r'},
1103 {"scale", no_argument, NULL, 'l'},
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001104 {"symbol", required_argument, NULL, 's'},
1105 {"stat", no_argument, NULL, 'S'},
Mike Galbraith9dd49982009-03-27 12:13:43 +01001106 {"vmlinux", required_argument, NULL, 'x'},
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001107 {"zero", no_argument, NULL, 'z'},
1108 {NULL, 0, NULL, 0 }
1109 };
Mike Galbraith9dd49982009-03-27 12:13:43 +01001110 int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001111 long_options, &option_index);
1112 if (c == -1)
1113 break;
1114
1115 switch (c) {
1116 case 'a': system_wide = 1; break;
Peter Zijlstra803d4f32009-03-23 18:22:11 +01001117 case 'c': default_interval = atoi(optarg); break;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001118 case 'C':
1119 /* CPU and PID are mutually exclusive */
1120 if (tid != -1) {
1121 printf("WARNING: CPU switch overriding PID\n");
1122 sleep(1);
1123 tid = -1;
1124 }
1125 profile_cpu = atoi(optarg); break;
1126 case 'd': delay_secs = atoi(optarg); break;
1127 case 'D': dump_symtab = 1; break;
1128
1129 case 'e': error = parse_events(optarg); break;
1130
1131 case 'f': count_filter = atoi(optarg); break;
1132 case 'g': group = atoi(optarg); break;
1133 case 'h': display_help(); break;
Paul Mackerras31f004d2009-03-30 19:07:10 +02001134 case 'l': scale = 1; break;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001135 case 'n': nmi = atoi(optarg); break;
1136 case 'p':
1137 /* CPU and PID are mutually exclusive */
1138 if (profile_cpu != -1) {
1139 printf("WARNING: PID switch overriding CPU\n");
1140 sleep(1);
1141 profile_cpu = -1;
1142 }
1143 tid = atoi(optarg); break;
Mike Galbraith9dd49982009-03-27 12:13:43 +01001144 case 'r': realtime_prio = atoi(optarg); break;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001145 case 's': sym_filter = strdup(optarg); break;
1146 case 'S': run_perfstat = 1; break;
1147 case 'x': vmlinux = strdup(optarg); break;
1148 case 'z': zero = 1; break;
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001149 case 'm': mmap_pages = atoi(optarg); break;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001150 case 'M': use_mmap = 1; break;
1151 case 'U': use_munmap = 1; break;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001152 default: error = 1; break;
1153 }
1154 }
1155 if (error)
1156 display_help();
1157
1158 if (!nr_counters) {
1159 if (run_perfstat)
1160 nr_counters = 8;
1161 else {
1162 nr_counters = 1;
1163 event_id[0] = 0;
1164 }
1165 }
1166
1167 for (counter = 0; counter < nr_counters; counter++) {
1168 if (event_count[counter])
1169 continue;
1170
Peter Zijlstra803d4f32009-03-23 18:22:11 +01001171 event_count[counter] = default_interval;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001172 }
1173}
1174
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001175struct mmap_data {
1176 int counter;
1177 void *base;
1178 unsigned int mask;
1179 unsigned int prev;
1180};
1181
1182static unsigned int mmap_read_head(struct mmap_data *md)
1183{
1184 struct perf_counter_mmap_page *pc = md->base;
Peter Zijlstra19556432009-03-30 19:07:04 +02001185 int head;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001186
1187 head = pc->data_head;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001188 rmb();
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001189
1190 return head;
1191}
1192
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001193struct timeval last_read, this_read;
1194
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001195static void mmap_read(struct mmap_data *md)
1196{
1197 unsigned int head = mmap_read_head(md);
1198 unsigned int old = md->prev;
1199 unsigned char *data = md->base + page_size;
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001200 int diff;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001201
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001202 gettimeofday(&this_read, NULL);
1203
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001204 /*
1205 * If we're further behind than half the buffer, there's a chance
1206 * the writer will bite our tail and screw up the events under us.
1207 *
1208 * If we somehow ended up ahead of the head, we got messed up.
1209 *
1210 * In either case, truncate and restart at head.
1211 */
1212 diff = head - old;
1213 if (diff > md->mask / 2 || diff < 0) {
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001214 struct timeval iv;
1215 unsigned long msecs;
1216
1217 timersub(&this_read, &last_read, &iv);
1218 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
1219
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001220 fprintf(stderr, "WARNING: failed to keep up with mmap data."
1221 " Last read %lu msecs ago.\n", msecs);
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001222
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001223 /*
1224 * head points to a known good entry, start there.
1225 */
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001226 old = head;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001227 }
1228
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001229 last_read = this_read;
1230
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001231 for (; old != head;) {
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001232 struct ip_event {
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001233 struct perf_event_header header;
1234 __u64 ip;
1235 __u32 pid, tid;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001236 };
1237 struct mmap_event {
1238 struct perf_event_header header;
1239 __u32 pid, tid;
1240 __u64 start;
1241 __u64 len;
1242 __u64 pgoff;
1243 char filename[PATH_MAX];
1244 };
1245
1246 typedef union event_union {
1247 struct perf_event_header header;
1248 struct ip_event ip;
1249 struct mmap_event mmap;
1250 } event_t;
1251
1252 event_t *event = (event_t *)&data[old & md->mask];
1253
1254 event_t event_copy;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001255
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001256 unsigned int size = event->header.size;
1257
1258 /*
1259 * Event straddles the mmap boundary -- header should always
1260 * be inside due to u64 alignment of output.
1261 */
1262 if ((old & md->mask) + size != ((old + size) & md->mask)) {
1263 unsigned int offset = old;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001264 unsigned int len = min(sizeof(*event), size), cpy;
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001265 void *dst = &event_copy;
1266
1267 do {
1268 cpy = min(md->mask + 1 - (offset & md->mask), len);
1269 memcpy(dst, &data[offset & md->mask], cpy);
1270 offset += cpy;
1271 dst += cpy;
1272 len -= cpy;
1273 } while (len);
1274
1275 event = &event_copy;
1276 }
1277
1278 old += size;
1279
1280 switch (event->header.type) {
Peter Zijlstra3df70fd2009-04-02 11:12:02 +02001281 case PERF_EVENT_COUNTER_OVERFLOW | __PERF_EVENT_IP:
1282 case PERF_EVENT_COUNTER_OVERFLOW | __PERF_EVENT_IP | __PERF_EVENT_TID:
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001283 process_event(event->ip.ip, md->counter);
1284 break;
1285
1286 case PERF_EVENT_MMAP:
1287 case PERF_EVENT_MUNMAP:
1288 printf("%s: %Lu %Lu %Lu %s\n",
1289 event->header.type == PERF_EVENT_MMAP
1290 ? "mmap" : "munmap",
1291 event->mmap.start,
1292 event->mmap.len,
1293 event->mmap.pgoff,
1294 event->mmap.filename);
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001295 break;
1296 }
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001297 }
1298
1299 md->prev = old;
1300}
1301
Ingo Molnare0143ba2009-03-23 21:29:59 +01001302int main(int argc, char *argv[])
1303{
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001304 struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001305 struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
Ingo Molnare0143ba2009-03-23 21:29:59 +01001306 struct perf_counter_hw_event hw_event;
Mike Galbraith9dd49982009-03-27 12:13:43 +01001307 pthread_t thread;
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001308 int i, counter, group_fd, nr_poll = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001309 unsigned int cpu;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001310 int ret;
1311
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001312 page_size = sysconf(_SC_PAGE_SIZE);
1313
Ingo Molnare0143ba2009-03-23 21:29:59 +01001314 process_options(argc, argv);
1315
1316 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001317 assert(nr_cpus <= MAX_NR_CPUS);
1318 assert(nr_cpus >= 0);
1319
1320 if (run_perfstat)
1321 return do_perfstat(argc, argv);
1322
Ingo Molnare0143ba2009-03-23 21:29:59 +01001323 if (tid != -1 || profile_cpu != -1)
1324 nr_cpus = 1;
1325
Paul Mackerrascbe46552009-03-24 16:52:34 +11001326 parse_symbols();
1327 if (vmlinux && sym_filter_entry)
1328 parse_vmlinux(vmlinux);
1329
Ingo Molnare0143ba2009-03-23 21:29:59 +01001330 for (i = 0; i < nr_cpus; i++) {
1331 group_fd = -1;
1332 for (counter = 0; counter < nr_counters; counter++) {
1333
1334 cpu = profile_cpu;
1335 if (tid == -1 && profile_cpu == -1)
1336 cpu = i;
1337
1338 memset(&hw_event, 0, sizeof(hw_event));
Peter Zijlstra803d4f32009-03-23 18:22:11 +01001339 hw_event.config = event_id[counter];
Ingo Molnare0143ba2009-03-23 21:29:59 +01001340 hw_event.irq_period = event_count[counter];
Peter Zijlstra3df70fd2009-04-02 11:12:02 +02001341 hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001342 hw_event.nmi = nmi;
Peter Zijlstra3c1ba6f2009-03-30 19:07:06 +02001343 hw_event.mmap = use_mmap;
1344 hw_event.munmap = use_munmap;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001345
Ingo Molnare0143ba2009-03-23 21:29:59 +01001346 fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001347 if (fd[i][counter] < 0) {
Paul Mackerrascbe46552009-03-24 16:52:34 +11001348 int err = errno;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001349 printf("kerneltop error: syscall returned with %d (%s)\n",
Paul Mackerrascbe46552009-03-24 16:52:34 +11001350 fd[i][counter], strerror(err));
1351 if (err == EPERM)
Ingo Molnare0143ba2009-03-23 21:29:59 +01001352 printf("Are you root?\n");
1353 exit(-1);
1354 }
1355 assert(fd[i][counter] >= 0);
Paul Mackerrascbe46552009-03-24 16:52:34 +11001356 fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001357
1358 /*
1359 * First counter acts as the group leader:
1360 */
1361 if (group && group_fd == -1)
1362 group_fd = fd[i][counter];
1363
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001364 event_array[nr_poll].fd = fd[i][counter];
1365 event_array[nr_poll].events = POLLIN;
1366 nr_poll++;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001367
1368 mmap_array[i][counter].counter = counter;
1369 mmap_array[i][counter].prev = 0;
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001370 mmap_array[i][counter].mask = mmap_pages*page_size - 1;
1371 mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001372 PROT_READ, MAP_SHARED, fd[i][counter], 0);
1373 if (mmap_array[i][counter].base == MAP_FAILED) {
1374 printf("kerneltop error: failed to mmap with %d (%s)\n",
1375 errno, strerror(errno));
1376 exit(-1);
1377 }
Ingo Molnare0143ba2009-03-23 21:29:59 +01001378 }
1379 }
1380
Mike Galbraith9dd49982009-03-27 12:13:43 +01001381 if (pthread_create(&thread, NULL, display_thread, NULL)) {
1382 printf("Could not create display thread.\n");
1383 exit(-1);
1384 }
1385
1386 if (realtime_prio) {
1387 struct sched_param param;
1388
1389 param.sched_priority = realtime_prio;
1390 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1391 printf("Could not set realtime priority.\n");
1392 exit(-1);
1393 }
1394 }
Ingo Molnare0143ba2009-03-23 21:29:59 +01001395
1396 while (1) {
1397 int hits = events;
1398
1399 for (i = 0; i < nr_cpus; i++) {
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001400 for (counter = 0; counter < nr_counters; counter++)
1401 mmap_read(&mmap_array[i][counter]);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001402 }
1403
Ingo Molnare0143ba2009-03-23 21:29:59 +01001404 if (hits == events)
Mike Galbraith9dd49982009-03-27 12:13:43 +01001405 ret = poll(event_array, nr_poll, 100);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001406 }
1407
1408 return 0;
1409}