blob: 3e45bf6591b2e256b51300596cba872c925e6ae7 [file] [log] [blame]
Ingo Molnare0143ba2009-03-23 21:29:59 +01001/*
2 * kerneltop.c: show top kernel functions - performance counters showcase
3
4 Build with:
5
Paul Mackerrascbe46552009-03-24 16:52:34 +11006 cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
Ingo Molnare0143ba2009-03-23 21:29:59 +01007
8 Sample output:
9
10------------------------------------------------------------------------------
11 KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2)
12------------------------------------------------------------------------------
13
14 weight RIP kernel function
15 ______ ________________ _______________
16
17 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
18 33.00 - ffffffff804cb740 : sock_alloc_send_skb
19 31.26 - ffffffff804ce808 : skb_push
20 22.43 - ffffffff80510004 : tcp_established_options
21 19.00 - ffffffff8027d250 : find_get_page
22 15.76 - ffffffff804e4fc9 : eth_type_trans
23 15.20 - ffffffff804d8baa : dst_release
24 14.86 - ffffffff804cf5d8 : skb_release_head_state
25 14.00 - ffffffff802217d5 : read_hpet
26 12.00 - ffffffff804ffb7f : __ip_local_out
27 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
28 8.54 - ffffffff805001a3 : ip_queue_xmit
Ingo Molnare0143ba2009-03-23 21:29:59 +010029 */
Wu Fengguangf7524bd2009-03-20 10:08:06 +080030
31/*
32 * perfstat: /usr/bin/time -alike performance counter statistics utility
33
34 It summarizes the counter events of all tasks (and child tasks),
35 covering all CPUs that the command (or workload) executes on.
36 It only counts the per-task events of the workload started,
37 independent of how many other tasks run on those CPUs.
38
39 Sample output:
40
41 $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
42
43 Performance counter stats for 'ls':
44
45 163516953 instructions
46 2295 cache-misses
47 2855182 branch-misses
48 */
49
50 /*
51 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
52 *
53 * Improvements and fixes by:
54 *
55 * Arjan van de Ven <arjan@linux.intel.com>
56 * Yanmin Zhang <yanmin.zhang@intel.com>
57 * Wu Fengguang <fengguang.wu@intel.com>
58 * Mike Galbraith <efault@gmx.de>
Paul Mackerrascbe46552009-03-24 16:52:34 +110059 * Paul Mackerras <paulus@samba.org>
Wu Fengguangf7524bd2009-03-20 10:08:06 +080060 *
61 * Released under the GPL v2. (and only v2, not any later version)
62 */
63
Ingo Molnare0143ba2009-03-23 21:29:59 +010064#define _GNU_SOURCE
65#include <sys/types.h>
66#include <sys/stat.h>
67#include <sys/time.h>
68#include <unistd.h>
69#include <stdint.h>
70#include <stdlib.h>
71#include <string.h>
Paul Mackerrascbe46552009-03-24 16:52:34 +110072#include <limits.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010073#include <getopt.h>
74#include <assert.h>
75#include <fcntl.h>
76#include <stdio.h>
77#include <errno.h>
78#include <ctype.h>
79#include <time.h>
80
Ingo Molnare0143ba2009-03-23 21:29:59 +010081#include <sys/syscall.h>
82#include <sys/ioctl.h>
83#include <sys/poll.h>
84#include <sys/prctl.h>
85#include <sys/wait.h>
86#include <sys/uio.h>
Peter Zijlstrabcbcb372009-03-23 18:22:12 +010087#include <sys/mman.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010088
89#include <linux/unistd.h>
Paul Mackerrascbe46552009-03-24 16:52:34 +110090#include <linux/types.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010091
Ingo Molnar383c5f82009-03-23 21:49:25 +010092#include "../../include/linux/perf_counter.h"
Ingo Molnare0143ba2009-03-23 21:29:59 +010093
Wu Fengguangf7524bd2009-03-20 10:08:06 +080094
Peter Zijlstra803d4f32009-03-23 18:22:11 +010095/*
96 * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
97 * counters in the current task.
98 */
99#define PR_TASK_PERF_COUNTERS_DISABLE 31
100#define PR_TASK_PERF_COUNTERS_ENABLE 32
101
102#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
103
104#define rdclock() \
105({ \
106 struct timespec ts; \
107 \
108 clock_gettime(CLOCK_MONOTONIC, &ts); \
109 ts.tv_sec * 1000000000ULL + ts.tv_nsec; \
110})
111
112/*
113 * Pick up some kernel type conventions:
114 */
115#define __user
116#define asmlinkage
117
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100118#ifdef __x86_64__
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100119#define __NR_perf_counter_open 295
120#define rmb() asm volatile("lfence" ::: "memory")
121#define cpu_relax() asm volatile("rep; nop" ::: "memory");
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100122#endif
123
124#ifdef __i386__
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100125#define __NR_perf_counter_open 333
126#define rmb() asm volatile("lfence" ::: "memory")
127#define cpu_relax() asm volatile("rep; nop" ::: "memory");
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100128#endif
129
130#ifdef __powerpc__
131#define __NR_perf_counter_open 319
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100132#define rmb() asm volatile ("sync" ::: "memory")
133#define cpu_relax() asm volatile ("" ::: "memory");
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100134#endif
135
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100136#define unlikely(x) __builtin_expect(!!(x), 0)
137
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100138asmlinkage int sys_perf_counter_open(
139 struct perf_counter_hw_event *hw_event_uptr __user,
140 pid_t pid,
141 int cpu,
142 int group_fd,
143 unsigned long flags)
144{
Paul Mackerrascbe46552009-03-24 16:52:34 +1100145 return syscall(
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100146 __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100147}
148
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800149#define MAX_COUNTERS 64
150#define MAX_NR_CPUS 256
151
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100152#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800153
154static int run_perfstat = 0;
155static int system_wide = 0;
156
157static int nr_counters = 0;
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100158static __u64 event_id[MAX_COUNTERS] = {
159 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
Paul Mackerrascbe46552009-03-24 16:52:34 +1100160 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100161 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
162 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
163
164 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
165 EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
166 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
167 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
168};
169static int default_interval = 100000;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800170static int event_count[MAX_COUNTERS];
171static int fd[MAX_NR_CPUS][MAX_COUNTERS];
Ingo Molnare0143ba2009-03-23 21:29:59 +0100172
Ingo Molnare0143ba2009-03-23 21:29:59 +0100173static __u64 count_filter = 100;
174
Ingo Molnare0143ba2009-03-23 21:29:59 +0100175static int tid = -1;
176static int profile_cpu = -1;
177static int nr_cpus = 0;
178static int nmi = 1;
179static int group = 0;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100180static unsigned int page_size;
Peter Zijlstra4c4ba212009-03-25 12:30:26 +0100181static unsigned int mmap_pages = 4;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100182
183static char *vmlinux;
184
185static char *sym_filter;
186static unsigned long filter_start;
187static unsigned long filter_end;
188
189static int delay_secs = 2;
190static int zero;
191static int dump_symtab;
192
193struct source_line {
194 uint64_t EIP;
195 unsigned long count;
196 char *line;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100197 struct source_line *next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100198};
199
Paul Mackerrascbe46552009-03-24 16:52:34 +1100200static struct source_line *lines;
201static struct source_line **lines_tail;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800202
203const unsigned int default_count[] = {
Ingo Molnar81cdbe02009-03-23 22:29:50 +0100204 1000000,
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800205 1000000,
206 10000,
207 10000,
208 1000000,
209 10000,
210};
211
212static char *hw_event_names[] = {
213 "CPU cycles",
214 "instructions",
215 "cache references",
216 "cache misses",
217 "branches",
218 "branch misses",
219 "bus cycles",
220};
221
222static char *sw_event_names[] = {
223 "cpu clock ticks",
224 "task clock ticks",
225 "pagefaults",
226 "context switches",
227 "CPU migrations",
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100228 "minor faults",
229 "major faults",
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800230};
231
232struct event_symbol {
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100233 __u64 event;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800234 char *symbol;
235};
236
237static struct event_symbol event_symbols[] = {
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100238 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", },
239 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", },
240 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", },
241 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", },
242 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", },
243 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", },
244 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", },
245 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", },
246 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", },
247
248 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", },
249 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", },
250 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", },
251 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", },
252 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", },
253 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", },
254 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", },
255 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", },
256 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", },
257 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", },
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800258};
259
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100260#define __PERF_COUNTER_FIELD(config, name) \
261 ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
262
263#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW)
264#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG)
265#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE)
266#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT)
267
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800268static void display_events_help(void)
269{
270 unsigned int i;
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100271 __u64 e;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800272
273 printf(
274 " -e EVENT --event=EVENT # symbolic-name abbreviations");
275
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100276 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
277 int type, id;
278
279 e = event_symbols[i].event;
280 type = PERF_COUNTER_TYPE(e);
281 id = PERF_COUNTER_ID(e);
282
283 printf("\n %d:%d: %-20s",
284 type, id, event_symbols[i].symbol);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800285 }
286
287 printf("\n"
288 " rNNN: raw PMU events (eventsel+umask)\n\n");
289}
290
291static void display_perfstat_help(void)
292{
293 printf(
294 "Usage: perfstat [<events...>] <cmd...>\n\n"
295 "PerfStat Options (up to %d event types can be specified):\n\n",
296 MAX_COUNTERS);
297
298 display_events_help();
299
300 printf(
301 " -a # system-wide collection\n");
302 exit(0);
303}
Ingo Molnare0143ba2009-03-23 21:29:59 +0100304
305static void display_help(void)
306{
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800307 if (run_perfstat)
308 return display_perfstat_help();
309
Ingo Molnare0143ba2009-03-23 21:29:59 +0100310 printf(
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800311 "Usage: kerneltop [<options>]\n"
312 " Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100313 "KernelTop Options (up to %d event types can be specified at once):\n\n",
314 MAX_COUNTERS);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800315
316 display_events_help();
317
Ingo Molnare0143ba2009-03-23 21:29:59 +0100318 printf(
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800319 " -S --stat # perfstat COMMAND\n"
320 " -a # system-wide collection (for perfstat)\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100321 " -c CNT --count=CNT # event period to sample\n\n"
322 " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n"
323 " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n"
324 " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800325 " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100326 " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800327 " -x path --vmlinux=<path> # the vmlinux binary, required for -s use\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100328 " -z --zero # zero counts after display\n"
329 " -D --dump_symtab # dump symbol table to stderr on startup\n"
Peter Zijlstra4c4ba212009-03-25 12:30:26 +0100330 " -m pages --mmap_pages=<pages> # number of mmap data pages\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800331 );
Ingo Molnare0143ba2009-03-23 21:29:59 +0100332
333 exit(0);
334}
335
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800336static char *event_name(int ctr)
337{
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100338 __u64 config = event_id[ctr];
339 int type = PERF_COUNTER_TYPE(config);
340 int id = PERF_COUNTER_ID(config);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800341 static char buf[32];
342
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100343 if (PERF_COUNTER_RAW(config)) {
344 sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800345 return buf;
346 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800347
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100348 switch (type) {
349 case PERF_TYPE_HARDWARE:
350 if (id < PERF_HW_EVENTS_MAX)
351 return hw_event_names[id];
352 return "unknown-hardware";
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800353
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100354 case PERF_TYPE_SOFTWARE:
355 if (id < PERF_SW_EVENTS_MAX)
356 return sw_event_names[id];
357 return "unknown-software";
358
359 default:
360 break;
361 }
362
363 return "unknown";
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800364}
365
366/*
367 * Each event can have multiple symbolic names.
368 * Symbolic names are (almost) exactly matched.
369 */
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100370static __u64 match_event_symbols(char *str)
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800371{
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100372 __u64 config, id;
373 int type;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800374 unsigned int i;
375
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100376 if (sscanf(str, "r%llx", &config) == 1)
377 return config | PERF_COUNTER_RAW_MASK;
378
379 if (sscanf(str, "%d:%llu", &type, &id) == 2)
380 return EID(type, id);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800381
382 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
383 if (!strncmp(str, event_symbols[i].symbol,
384 strlen(event_symbols[i].symbol)))
385 return event_symbols[i].event;
386 }
387
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100388 return ~0ULL;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800389}
390
391static int parse_events(char *str)
392{
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100393 __u64 config;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800394
395again:
396 if (nr_counters == MAX_COUNTERS)
397 return -1;
398
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100399 config = match_event_symbols(str);
400 if (config == ~0ULL)
401 return -1;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800402
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100403 event_id[nr_counters] = config;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800404 nr_counters++;
405
406 str = strstr(str, ",");
407 if (str) {
408 str++;
409 goto again;
410 }
411
412 return 0;
413}
414
415
416/*
417 * perfstat
418 */
419
420char fault_here[1000000];
421
422static void create_perfstat_counter(int counter)
423{
424 struct perf_counter_hw_event hw_event;
425
426 memset(&hw_event, 0, sizeof(hw_event));
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100427 hw_event.config = event_id[counter];
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800428 hw_event.record_type = PERF_RECORD_SIMPLE;
429 hw_event.nmi = 0;
430
431 if (system_wide) {
432 int cpu;
433 for (cpu = 0; cpu < nr_cpus; cpu ++) {
434 fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
435 if (fd[cpu][counter] < 0) {
436 printf("perfstat error: syscall returned with %d (%s)\n",
437 fd[cpu][counter], strerror(errno));
438 exit(-1);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100439 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800440 }
441 } else {
442 hw_event.inherit = 1;
443 hw_event.disabled = 1;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100444
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800445 fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
446 if (fd[0][counter] < 0) {
447 printf("perfstat error: syscall returned with %d (%s)\n",
448 fd[0][counter], strerror(errno));
449 exit(-1);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100450 }
451 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800452}
Ingo Molnare0143ba2009-03-23 21:29:59 +0100453
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800454int do_perfstat(int argc, char *argv[])
455{
456 unsigned long long t0, t1;
457 int counter;
458 ssize_t res;
459 int status;
460 int pid;
461
462 if (!system_wide)
463 nr_cpus = 1;
464
465 for (counter = 0; counter < nr_counters; counter++)
466 create_perfstat_counter(counter);
467
468 argc -= optind;
469 argv += optind;
470
Wu Fengguangaf9522c2009-03-20 10:08:10 +0800471 if (!argc)
472 display_help();
473
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800474 /*
475 * Enable counters and exec the command:
476 */
477 t0 = rdclock();
478 prctl(PR_TASK_PERF_COUNTERS_ENABLE);
479
480 if ((pid = fork()) < 0)
481 perror("failed to fork");
482 if (!pid) {
483 if (execvp(argv[0], argv)) {
484 perror(argv[0]);
485 exit(-1);
486 }
Wu Fengguang95bb3be2009-03-20 10:08:04 +0800487 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800488 while (wait(&status) >= 0)
489 ;
490 prctl(PR_TASK_PERF_COUNTERS_DISABLE);
491 t1 = rdclock();
492
493 fflush(stdout);
494
495 fprintf(stderr, "\n");
496 fprintf(stderr, " Performance counter stats for \'%s\':\n",
497 argv[0]);
498 fprintf(stderr, "\n");
Ingo Molnare0143ba2009-03-23 21:29:59 +0100499
500 for (counter = 0; counter < nr_counters; counter++) {
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800501 int cpu;
502 __u64 count, single_count;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100503
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800504 count = 0;
505 for (cpu = 0; cpu < nr_cpus; cpu ++) {
506 res = read(fd[cpu][counter],
507 (char *) &single_count, sizeof(single_count));
508 assert(res == sizeof(single_count));
509 count += single_count;
510 }
511
Paul Mackerrascbe46552009-03-24 16:52:34 +1100512 if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
513 event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800514
515 double msecs = (double)count / 1000000;
516
517 fprintf(stderr, " %14.6f %-20s (msecs)\n",
518 msecs, event_name(counter));
519 } else {
520 fprintf(stderr, " %14Ld %-20s (events)\n",
521 count, event_name(counter));
522 }
Ingo Molnare0143ba2009-03-23 21:29:59 +0100523 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800524 fprintf(stderr, "\n");
525 fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
526 (double)(t1-t0)/1e6);
527 fprintf(stderr, "\n");
528
529 return 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100530}
531
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800532/*
533 * Symbols
534 */
535
Ingo Molnare0143ba2009-03-23 21:29:59 +0100536static uint64_t min_ip;
537static uint64_t max_ip = -1ll;
538
539struct sym_entry {
540 unsigned long long addr;
541 char *sym;
542 unsigned long count[MAX_COUNTERS];
543 int skip;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100544 struct source_line *source;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100545};
546
547#define MAX_SYMS 100000
548
549static int sym_table_count;
550
551struct sym_entry *sym_filter_entry;
552
553static struct sym_entry sym_table[MAX_SYMS];
554
555static void show_details(struct sym_entry *sym);
556
557/*
Wu Fengguangef45fa92009-03-20 10:08:07 +0800558 * Ordering weight: count-1 * count-2 * ... / count-n
Ingo Molnare0143ba2009-03-23 21:29:59 +0100559 */
560static double sym_weight(const struct sym_entry *sym)
561{
562 double weight;
563 int counter;
564
565 weight = sym->count[0];
566
567 for (counter = 1; counter < nr_counters-1; counter++)
568 weight *= sym->count[counter];
569
570 weight /= (sym->count[counter] + 1);
571
572 return weight;
573}
574
575static int compare(const void *__sym1, const void *__sym2)
576{
577 const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
578
579 return sym_weight(sym1) < sym_weight(sym2);
580}
581
582static time_t last_refresh;
583static long events;
584static long userspace_events;
585static const char CONSOLE_CLEAR[] = "";
586
587static struct sym_entry tmp[MAX_SYMS];
588
589static void print_sym_table(void)
590{
591 int i, printed;
592 int counter;
593 float events_per_sec = events/delay_secs;
594 float kevents_per_sec = (events-userspace_events)/delay_secs;
595
596 memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count);
597 qsort(tmp, sym_table_count, sizeof(tmp[0]), compare);
598
599 write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
600
601 printf(
602"------------------------------------------------------------------------------\n");
603 printf( " KernelTop:%8.0f irqs/sec kernel:%3.1f%% [%s, ",
604 events_per_sec,
605 100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)),
606 nmi ? "NMI" : "IRQ");
607
608 if (nr_counters == 1)
609 printf("%d ", event_count[0]);
610
611 for (counter = 0; counter < nr_counters; counter++) {
612 if (counter)
613 printf("/");
614
Wu Fengguange3908612009-03-20 10:08:05 +0800615 printf("%s", event_name(counter));
Ingo Molnare0143ba2009-03-23 21:29:59 +0100616 }
617
618 printf( "], ");
619
620 if (tid != -1)
621 printf(" (tid: %d", tid);
622 else
623 printf(" (all");
624
625 if (profile_cpu != -1)
626 printf(", cpu: %d)\n", profile_cpu);
627 else {
628 if (tid != -1)
629 printf(")\n");
630 else
631 printf(", %d CPUs)\n", nr_cpus);
632 }
633
634 printf("------------------------------------------------------------------------------\n\n");
635
636 if (nr_counters == 1)
637 printf(" events");
638 else
639 printf(" weight events");
640
641 printf(" RIP kernel function\n"
642 " ______ ______ ________________ _______________\n\n"
643 );
644
645 printed = 0;
646 for (i = 0; i < sym_table_count; i++) {
647 int count;
648
649 if (nr_counters == 1) {
650 if (printed <= 18 &&
651 tmp[i].count[0] >= count_filter) {
652 printf("%19.2f - %016llx : %s\n",
653 sym_weight(tmp + i), tmp[i].addr, tmp[i].sym);
654 printed++;
655 }
656 } else {
657 if (printed <= 18 &&
658 tmp[i].count[0] >= count_filter) {
659 printf("%8.1f %10ld - %016llx : %s\n",
660 sym_weight(tmp + i),
661 tmp[i].count[0],
662 tmp[i].addr, tmp[i].sym);
663 printed++;
664 }
665 }
666 /*
667 * Add decay to the counts:
668 */
669 for (count = 0; count < nr_counters; count++)
670 sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8;
671 }
672
673 if (sym_filter_entry)
674 show_details(sym_filter_entry);
675
676 last_refresh = time(NULL);
677
678 {
679 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
680
681 if (poll(&stdin_poll, 1, 0) == 1) {
682 printf("key pressed - exiting.\n");
683 exit(0);
684 }
685 }
686}
687
688static int read_symbol(FILE *in, struct sym_entry *s)
689{
690 static int filter_match = 0;
691 char *sym, stype;
692 char str[500];
693 int rc, pos;
694
695 rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str);
696 if (rc == EOF)
697 return -1;
698
699 assert(rc == 3);
700
701 /* skip until end of line: */
702 pos = strlen(str);
703 do {
704 rc = fgetc(in);
705 if (rc == '\n' || rc == EOF || pos >= 499)
706 break;
707 str[pos] = rc;
708 pos++;
709 } while (1);
710 str[pos] = 0;
711
712 sym = str;
713
714 /* Filter out known duplicates and non-text symbols. */
715 if (!strcmp(sym, "_text"))
716 return 1;
717 if (!min_ip && !strcmp(sym, "_stext"))
718 return 1;
719 if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext"))
720 return 1;
721 if (stype != 'T' && stype != 't')
722 return 1;
723 if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14))
724 return 1;
725 if (strstr(sym, "_text_start") || strstr(sym, "_text_end"))
726 return 1;
727
728 s->sym = malloc(strlen(str));
729 assert(s->sym);
730
731 strcpy((char *)s->sym, str);
732 s->skip = 0;
733
734 /* Tag events to be skipped. */
735 if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym))
736 s->skip = 1;
Peter Zijlstra4c4ba212009-03-25 12:30:26 +0100737 else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
738 s->skip = 1;
739 else if (!strcmp("mwait_idle", s->sym))
Ingo Molnare0143ba2009-03-23 21:29:59 +0100740 s->skip = 1;
741
742 if (filter_match == 1) {
743 filter_end = s->addr;
744 filter_match = -1;
745 if (filter_end - filter_start > 10000) {
746 printf("hm, too large filter symbol <%s> - skipping.\n",
747 sym_filter);
748 printf("symbol filter start: %016lx\n", filter_start);
749 printf(" end: %016lx\n", filter_end);
750 filter_end = filter_start = 0;
751 sym_filter = NULL;
752 sleep(1);
753 }
754 }
755 if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) {
756 filter_match = 1;
757 filter_start = s->addr;
758 }
759
760 return 0;
761}
762
763int compare_addr(const void *__sym1, const void *__sym2)
764{
765 const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
766
767 return sym1->addr > sym2->addr;
768}
769
770static void sort_symbol_table(void)
771{
772 int i, dups;
773
774 do {
775 qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr);
776 for (i = 0, dups = 0; i < sym_table_count; i++) {
777 if (sym_table[i].addr == sym_table[i+1].addr) {
778 sym_table[i+1].addr = -1ll;
779 dups++;
780 }
781 }
782 sym_table_count -= dups;
783 } while(dups);
784}
785
786static void parse_symbols(void)
787{
788 struct sym_entry *last;
789
790 FILE *kallsyms = fopen("/proc/kallsyms", "r");
791
792 if (!kallsyms) {
793 printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n");
794 exit(-1);
795 }
796
797 while (!feof(kallsyms)) {
798 if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) {
799 sym_table_count++;
800 assert(sym_table_count <= MAX_SYMS);
801 }
802 }
803
804 sort_symbol_table();
805 min_ip = sym_table[0].addr;
806 max_ip = sym_table[sym_table_count-1].addr;
807 last = sym_table + sym_table_count++;
808
809 last->addr = -1ll;
810 last->sym = "<end>";
811
812 if (filter_end) {
813 int count;
814 for (count=0; count < sym_table_count; count ++) {
815 if (!strcmp(sym_table[count].sym, sym_filter)) {
816 sym_filter_entry = &sym_table[count];
817 break;
818 }
819 }
820 }
821 if (dump_symtab) {
822 int i;
823
824 for (i = 0; i < sym_table_count; i++)
825 fprintf(stderr, "%llx %s\n",
826 sym_table[i].addr, sym_table[i].sym);
827 }
828}
829
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800830/*
831 * Source lines
832 */
Ingo Molnare0143ba2009-03-23 21:29:59 +0100833
834static void parse_vmlinux(char *filename)
835{
836 FILE *file;
837 char command[PATH_MAX*2];
838 if (!filename)
839 return;
840
841 sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename);
842
843 file = popen(command, "r");
844 if (!file)
845 return;
846
Paul Mackerrascbe46552009-03-24 16:52:34 +1100847 lines_tail = &lines;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100848 while (!feof(file)) {
849 struct source_line *src;
850 size_t dummy = 0;
851 char *c;
852
853 src = malloc(sizeof(struct source_line));
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800854 assert(src != NULL);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100855 memset(src, 0, sizeof(struct source_line));
856
857 if (getline(&src->line, &dummy, file) < 0)
858 break;
859 if (!src->line)
860 break;
861
862 c = strchr(src->line, '\n');
863 if (c)
864 *c = 0;
865
Paul Mackerrascbe46552009-03-24 16:52:34 +1100866 src->next = NULL;
867 *lines_tail = src;
868 lines_tail = &src->next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100869
870 if (strlen(src->line)>8 && src->line[8] == ':')
871 src->EIP = strtoull(src->line, NULL, 16);
872 if (strlen(src->line)>8 && src->line[16] == ':')
873 src->EIP = strtoull(src->line, NULL, 16);
874 }
875 pclose(file);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100876}
877
878static void record_precise_ip(uint64_t ip)
879{
880 struct source_line *line;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100881
Paul Mackerrascbe46552009-03-24 16:52:34 +1100882 for (line = lines; line; line = line->next) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100883 if (line->EIP == ip)
884 line->count++;
885 if (line->EIP > ip)
886 break;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100887 }
888}
889
890static void lookup_sym_in_vmlinux(struct sym_entry *sym)
891{
892 struct source_line *line;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100893 char pattern[PATH_MAX];
894 sprintf(pattern, "<%s>:", sym->sym);
895
Paul Mackerrascbe46552009-03-24 16:52:34 +1100896 for (line = lines; line; line = line->next) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100897 if (strstr(line->line, pattern)) {
Paul Mackerrascbe46552009-03-24 16:52:34 +1100898 sym->source = line;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100899 break;
900 }
Ingo Molnare0143ba2009-03-23 21:29:59 +0100901 }
902}
903
Paul Mackerrascbe46552009-03-24 16:52:34 +1100904static void show_lines(struct source_line *line_queue, int line_queue_count)
Ingo Molnare0143ba2009-03-23 21:29:59 +0100905{
906 int i;
907 struct source_line *line;
908
Paul Mackerrascbe46552009-03-24 16:52:34 +1100909 line = line_queue;
910 for (i = 0; i < line_queue_count; i++) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100911 printf("%8li\t%s\n", line->count, line->line);
Paul Mackerrascbe46552009-03-24 16:52:34 +1100912 line = line->next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100913 }
914}
915
916#define TRACE_COUNT 3
917
918static void show_details(struct sym_entry *sym)
919{
920 struct source_line *line;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100921 struct source_line *line_queue = NULL;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100922 int displayed = 0;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100923 int line_queue_count = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100924
925 if (!sym->source)
926 lookup_sym_in_vmlinux(sym);
927 if (!sym->source)
928 return;
929
930 printf("Showing details for %s\n", sym->sym);
931
Paul Mackerrascbe46552009-03-24 16:52:34 +1100932 line = sym->source;
933 while (line) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100934 if (displayed && strstr(line->line, ">:"))
935 break;
936
Paul Mackerrascbe46552009-03-24 16:52:34 +1100937 if (!line_queue_count)
938 line_queue = line;
939 line_queue_count ++;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100940
941 if (line->count >= count_filter) {
Paul Mackerrascbe46552009-03-24 16:52:34 +1100942 show_lines(line_queue, line_queue_count);
943 line_queue_count = 0;
944 line_queue = NULL;
945 } else if (line_queue_count > TRACE_COUNT) {
946 line_queue = line_queue->next;
947 line_queue_count --;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100948 }
949
950 line->count = 0;
951 displayed++;
952 if (displayed > 300)
953 break;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100954 line = line->next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100955 }
956}
957
958/*
959 * Binary search in the histogram table and record the hit:
960 */
961static void record_ip(uint64_t ip, int counter)
962{
963 int left_idx, middle_idx, right_idx, idx;
964 unsigned long left, middle, right;
965
966 record_precise_ip(ip);
967
968 left_idx = 0;
969 right_idx = sym_table_count-1;
970 assert(ip <= max_ip && ip >= min_ip);
971
972 while (left_idx + 1 < right_idx) {
973 middle_idx = (left_idx + right_idx) / 2;
974
975 left = sym_table[ left_idx].addr;
976 middle = sym_table[middle_idx].addr;
977 right = sym_table[ right_idx].addr;
978
979 if (!(left <= middle && middle <= right)) {
980 printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right);
981 printf("%d %d %d\n", left_idx, middle_idx, right_idx);
982 }
983 assert(left <= middle && middle <= right);
984 if (!(left <= ip && ip <= right)) {
985 printf(" left: %016lx\n", left);
Ingo Molnar193e8df2009-03-23 22:23:16 +0100986 printf(" ip: %016lx\n", (unsigned long)ip);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100987 printf("right: %016lx\n", right);
988 }
989 assert(left <= ip && ip <= right);
990 /*
991 * [ left .... target .... middle .... right ]
992 * => right := middle
993 */
994 if (ip < middle) {
995 right_idx = middle_idx;
996 continue;
997 }
998 /*
999 * [ left .... middle ... target ... right ]
1000 * => left := middle
1001 */
1002 left_idx = middle_idx;
1003 }
1004
1005 idx = left_idx;
1006
1007 if (!sym_table[idx].skip)
1008 sym_table[idx].count[counter]++;
1009 else events--;
1010}
1011
1012static void process_event(uint64_t ip, int counter)
1013{
1014 events++;
1015
1016 if (ip < min_ip || ip > max_ip) {
1017 userspace_events++;
1018 return;
1019 }
1020
1021 record_ip(ip, counter);
1022}
1023
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001024static void process_options(int argc, char *argv[])
1025{
1026 int error = 0, counter;
1027
1028 if (strstr(argv[0], "perfstat"))
1029 run_perfstat = 1;
1030
1031 for (;;) {
1032 int option_index = 0;
1033 /** Options for getopt */
1034 static struct option long_options[] = {
1035 {"count", required_argument, NULL, 'c'},
1036 {"cpu", required_argument, NULL, 'C'},
1037 {"delay", required_argument, NULL, 'd'},
1038 {"dump_symtab", no_argument, NULL, 'D'},
1039 {"event", required_argument, NULL, 'e'},
1040 {"filter", required_argument, NULL, 'f'},
1041 {"group", required_argument, NULL, 'g'},
1042 {"help", no_argument, NULL, 'h'},
1043 {"nmi", required_argument, NULL, 'n'},
1044 {"pid", required_argument, NULL, 'p'},
1045 {"vmlinux", required_argument, NULL, 'x'},
1046 {"symbol", required_argument, NULL, 's'},
1047 {"stat", no_argument, NULL, 'S'},
1048 {"zero", no_argument, NULL, 'z'},
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001049 {"mmap_pages", required_argument, NULL, 'm'},
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001050 {NULL, 0, NULL, 0 }
1051 };
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001052 int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:m:p:s:Sx:z",
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001053 long_options, &option_index);
1054 if (c == -1)
1055 break;
1056
1057 switch (c) {
1058 case 'a': system_wide = 1; break;
Peter Zijlstra803d4f32009-03-23 18:22:11 +01001059 case 'c': default_interval = atoi(optarg); break;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001060 case 'C':
1061 /* CPU and PID are mutually exclusive */
1062 if (tid != -1) {
1063 printf("WARNING: CPU switch overriding PID\n");
1064 sleep(1);
1065 tid = -1;
1066 }
1067 profile_cpu = atoi(optarg); break;
1068 case 'd': delay_secs = atoi(optarg); break;
1069 case 'D': dump_symtab = 1; break;
1070
1071 case 'e': error = parse_events(optarg); break;
1072
1073 case 'f': count_filter = atoi(optarg); break;
1074 case 'g': group = atoi(optarg); break;
1075 case 'h': display_help(); break;
1076 case 'n': nmi = atoi(optarg); break;
1077 case 'p':
1078 /* CPU and PID are mutually exclusive */
1079 if (profile_cpu != -1) {
1080 printf("WARNING: PID switch overriding CPU\n");
1081 sleep(1);
1082 profile_cpu = -1;
1083 }
1084 tid = atoi(optarg); break;
1085 case 's': sym_filter = strdup(optarg); break;
1086 case 'S': run_perfstat = 1; break;
1087 case 'x': vmlinux = strdup(optarg); break;
1088 case 'z': zero = 1; break;
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001089 case 'm': mmap_pages = atoi(optarg); break;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001090 default: error = 1; break;
1091 }
1092 }
1093 if (error)
1094 display_help();
1095
1096 if (!nr_counters) {
1097 if (run_perfstat)
1098 nr_counters = 8;
1099 else {
1100 nr_counters = 1;
1101 event_id[0] = 0;
1102 }
1103 }
1104
1105 for (counter = 0; counter < nr_counters; counter++) {
1106 if (event_count[counter])
1107 continue;
1108
Peter Zijlstra803d4f32009-03-23 18:22:11 +01001109 event_count[counter] = default_interval;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001110 }
1111}
1112
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001113struct mmap_data {
1114 int counter;
1115 void *base;
1116 unsigned int mask;
1117 unsigned int prev;
1118};
1119
1120static unsigned int mmap_read_head(struct mmap_data *md)
1121{
1122 struct perf_counter_mmap_page *pc = md->base;
1123 unsigned int seq, head;
1124
1125repeat:
1126 rmb();
1127 seq = pc->lock;
1128
1129 if (unlikely(seq & 1)) {
1130 cpu_relax();
1131 goto repeat;
1132 }
1133
1134 head = pc->data_head;
1135
1136 rmb();
1137 if (pc->lock != seq)
1138 goto repeat;
1139
1140 return head;
1141}
1142
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001143struct timeval last_read, this_read;
1144
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001145static void mmap_read(struct mmap_data *md)
1146{
1147 unsigned int head = mmap_read_head(md);
1148 unsigned int old = md->prev;
1149 unsigned char *data = md->base + page_size;
1150
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001151 gettimeofday(&this_read, NULL);
1152
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001153 if (head - old > md->mask) {
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001154 struct timeval iv;
1155 unsigned long msecs;
1156
1157 timersub(&this_read, &last_read, &iv);
1158 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
1159
1160 fprintf(stderr, "WARNING: failed to keep up with mmap data. Last read %lu msecs ago.\n", msecs);
1161
1162 old = head;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001163 }
1164
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001165 last_read = this_read;
1166
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001167 for (; old != head;) {
1168 __u64 *ptr = (__u64 *)&data[old & md->mask];
1169 old += sizeof(__u64);
1170
1171 process_event(*ptr, md->counter);
1172 }
1173
1174 md->prev = old;
1175}
1176
Ingo Molnare0143ba2009-03-23 21:29:59 +01001177int main(int argc, char *argv[])
1178{
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001179 struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001180 struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
Ingo Molnare0143ba2009-03-23 21:29:59 +01001181 struct perf_counter_hw_event hw_event;
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001182 int i, counter, group_fd, nr_poll = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001183 unsigned int cpu;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001184 int ret;
1185
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001186 page_size = sysconf(_SC_PAGE_SIZE);
1187
Ingo Molnare0143ba2009-03-23 21:29:59 +01001188 process_options(argc, argv);
1189
1190 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001191 assert(nr_cpus <= MAX_NR_CPUS);
1192 assert(nr_cpus >= 0);
1193
1194 if (run_perfstat)
1195 return do_perfstat(argc, argv);
1196
Ingo Molnare0143ba2009-03-23 21:29:59 +01001197 if (tid != -1 || profile_cpu != -1)
1198 nr_cpus = 1;
1199
Paul Mackerrascbe46552009-03-24 16:52:34 +11001200 parse_symbols();
1201 if (vmlinux && sym_filter_entry)
1202 parse_vmlinux(vmlinux);
1203
Ingo Molnare0143ba2009-03-23 21:29:59 +01001204 for (i = 0; i < nr_cpus; i++) {
1205 group_fd = -1;
1206 for (counter = 0; counter < nr_counters; counter++) {
1207
1208 cpu = profile_cpu;
1209 if (tid == -1 && profile_cpu == -1)
1210 cpu = i;
1211
1212 memset(&hw_event, 0, sizeof(hw_event));
Peter Zijlstra803d4f32009-03-23 18:22:11 +01001213 hw_event.config = event_id[counter];
Ingo Molnare0143ba2009-03-23 21:29:59 +01001214 hw_event.irq_period = event_count[counter];
1215 hw_event.record_type = PERF_RECORD_IRQ;
1216 hw_event.nmi = nmi;
1217
Ingo Molnare0143ba2009-03-23 21:29:59 +01001218 fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001219 if (fd[i][counter] < 0) {
Paul Mackerrascbe46552009-03-24 16:52:34 +11001220 int err = errno;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001221 printf("kerneltop error: syscall returned with %d (%s)\n",
Paul Mackerrascbe46552009-03-24 16:52:34 +11001222 fd[i][counter], strerror(err));
1223 if (err == EPERM)
Ingo Molnare0143ba2009-03-23 21:29:59 +01001224 printf("Are you root?\n");
1225 exit(-1);
1226 }
1227 assert(fd[i][counter] >= 0);
Paul Mackerrascbe46552009-03-24 16:52:34 +11001228 fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001229
1230 /*
1231 * First counter acts as the group leader:
1232 */
1233 if (group && group_fd == -1)
1234 group_fd = fd[i][counter];
1235
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001236 event_array[nr_poll].fd = fd[i][counter];
1237 event_array[nr_poll].events = POLLIN;
1238 nr_poll++;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001239
1240 mmap_array[i][counter].counter = counter;
1241 mmap_array[i][counter].prev = 0;
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001242 mmap_array[i][counter].mask = mmap_pages*page_size - 1;
1243 mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001244 PROT_READ, MAP_SHARED, fd[i][counter], 0);
1245 if (mmap_array[i][counter].base == MAP_FAILED) {
1246 printf("kerneltop error: failed to mmap with %d (%s)\n",
1247 errno, strerror(errno));
1248 exit(-1);
1249 }
Ingo Molnare0143ba2009-03-23 21:29:59 +01001250 }
1251 }
1252
Ingo Molnare0143ba2009-03-23 21:29:59 +01001253 printf("KernelTop refresh period: %d seconds\n", delay_secs);
1254 last_refresh = time(NULL);
1255
1256 while (1) {
1257 int hits = events;
1258
1259 for (i = 0; i < nr_cpus; i++) {
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001260 for (counter = 0; counter < nr_counters; counter++)
1261 mmap_read(&mmap_array[i][counter]);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001262 }
1263
1264 if (time(NULL) >= last_refresh + delay_secs) {
1265 print_sym_table();
1266 events = userspace_events = 0;
1267 }
1268
1269 if (hits == events)
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001270 ret = poll(event_array, nr_poll, 1000);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001271 hits = events;
1272 }
1273
1274 return 0;
1275}