blob: fda1438365dc09747eff56d824907b596743dcdb [file] [log] [blame]
Ingo Molnare0143ba2009-03-23 21:29:59 +01001/*
2 * kerneltop.c: show top kernel functions - performance counters showcase
3
4 Build with:
5
Paul Mackerrascbe46552009-03-24 16:52:34 +11006 cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
Ingo Molnare0143ba2009-03-23 21:29:59 +01007
8 Sample output:
9
10------------------------------------------------------------------------------
11 KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2)
12------------------------------------------------------------------------------
13
14 weight RIP kernel function
15 ______ ________________ _______________
16
17 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
18 33.00 - ffffffff804cb740 : sock_alloc_send_skb
19 31.26 - ffffffff804ce808 : skb_push
20 22.43 - ffffffff80510004 : tcp_established_options
21 19.00 - ffffffff8027d250 : find_get_page
22 15.76 - ffffffff804e4fc9 : eth_type_trans
23 15.20 - ffffffff804d8baa : dst_release
24 14.86 - ffffffff804cf5d8 : skb_release_head_state
25 14.00 - ffffffff802217d5 : read_hpet
26 12.00 - ffffffff804ffb7f : __ip_local_out
27 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
28 8.54 - ffffffff805001a3 : ip_queue_xmit
Ingo Molnare0143ba2009-03-23 21:29:59 +010029 */
Wu Fengguangf7524bd2009-03-20 10:08:06 +080030
31/*
32 * perfstat: /usr/bin/time -alike performance counter statistics utility
33
34 It summarizes the counter events of all tasks (and child tasks),
35 covering all CPUs that the command (or workload) executes on.
36 It only counts the per-task events of the workload started,
37 independent of how many other tasks run on those CPUs.
38
39 Sample output:
40
41 $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
42
43 Performance counter stats for 'ls':
44
45 163516953 instructions
46 2295 cache-misses
47 2855182 branch-misses
48 */
49
50 /*
51 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
52 *
53 * Improvements and fixes by:
54 *
55 * Arjan van de Ven <arjan@linux.intel.com>
56 * Yanmin Zhang <yanmin.zhang@intel.com>
57 * Wu Fengguang <fengguang.wu@intel.com>
58 * Mike Galbraith <efault@gmx.de>
Paul Mackerrascbe46552009-03-24 16:52:34 +110059 * Paul Mackerras <paulus@samba.org>
Wu Fengguangf7524bd2009-03-20 10:08:06 +080060 *
61 * Released under the GPL v2. (and only v2, not any later version)
62 */
63
Ingo Molnare0143ba2009-03-23 21:29:59 +010064#define _GNU_SOURCE
65#include <sys/types.h>
66#include <sys/stat.h>
67#include <sys/time.h>
68#include <unistd.h>
69#include <stdint.h>
70#include <stdlib.h>
71#include <string.h>
Paul Mackerrascbe46552009-03-24 16:52:34 +110072#include <limits.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010073#include <getopt.h>
74#include <assert.h>
75#include <fcntl.h>
76#include <stdio.h>
77#include <errno.h>
78#include <ctype.h>
79#include <time.h>
80
Ingo Molnare0143ba2009-03-23 21:29:59 +010081#include <sys/syscall.h>
82#include <sys/ioctl.h>
83#include <sys/poll.h>
84#include <sys/prctl.h>
85#include <sys/wait.h>
86#include <sys/uio.h>
Peter Zijlstrabcbcb372009-03-23 18:22:12 +010087#include <sys/mman.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010088
89#include <linux/unistd.h>
Paul Mackerrascbe46552009-03-24 16:52:34 +110090#include <linux/types.h>
Ingo Molnare0143ba2009-03-23 21:29:59 +010091
Ingo Molnar383c5f82009-03-23 21:49:25 +010092#include "../../include/linux/perf_counter.h"
Ingo Molnare0143ba2009-03-23 21:29:59 +010093
Wu Fengguangf7524bd2009-03-20 10:08:06 +080094
Peter Zijlstra803d4f32009-03-23 18:22:11 +010095/*
96 * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
97 * counters in the current task.
98 */
99#define PR_TASK_PERF_COUNTERS_DISABLE 31
100#define PR_TASK_PERF_COUNTERS_ENABLE 32
101
102#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
103
104#define rdclock() \
105({ \
106 struct timespec ts; \
107 \
108 clock_gettime(CLOCK_MONOTONIC, &ts); \
109 ts.tv_sec * 1000000000ULL + ts.tv_nsec; \
110})
111
112/*
113 * Pick up some kernel type conventions:
114 */
115#define __user
116#define asmlinkage
117
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100118#ifdef __x86_64__
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100119#define __NR_perf_counter_open 295
120#define rmb() asm volatile("lfence" ::: "memory")
121#define cpu_relax() asm volatile("rep; nop" ::: "memory");
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100122#endif
123
124#ifdef __i386__
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100125#define __NR_perf_counter_open 333
126#define rmb() asm volatile("lfence" ::: "memory")
127#define cpu_relax() asm volatile("rep; nop" ::: "memory");
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100128#endif
129
130#ifdef __powerpc__
131#define __NR_perf_counter_open 319
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100132#define rmb() asm volatile ("sync" ::: "memory")
133#define cpu_relax() asm volatile ("" ::: "memory");
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100134#endif
135
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100136#define unlikely(x) __builtin_expect(!!(x), 0)
Peter Zijlstra00f0ad72009-03-25 12:30:27 +0100137#define min(x, y) ({ \
138 typeof(x) _min1 = (x); \
139 typeof(y) _min2 = (y); \
140 (void) (&_min1 == &_min2); \
141 _min1 < _min2 ? _min1 : _min2; })
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100142
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100143asmlinkage int sys_perf_counter_open(
144 struct perf_counter_hw_event *hw_event_uptr __user,
145 pid_t pid,
146 int cpu,
147 int group_fd,
148 unsigned long flags)
149{
Paul Mackerrascbe46552009-03-24 16:52:34 +1100150 return syscall(
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100151 __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100152}
153
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800154#define MAX_COUNTERS 64
155#define MAX_NR_CPUS 256
156
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100157#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800158
159static int run_perfstat = 0;
160static int system_wide = 0;
161
162static int nr_counters = 0;
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100163static __u64 event_id[MAX_COUNTERS] = {
164 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
Paul Mackerrascbe46552009-03-24 16:52:34 +1100165 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100166 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
167 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
168
169 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
170 EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
171 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
172 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
173};
174static int default_interval = 100000;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800175static int event_count[MAX_COUNTERS];
176static int fd[MAX_NR_CPUS][MAX_COUNTERS];
Ingo Molnare0143ba2009-03-23 21:29:59 +0100177
Ingo Molnare0143ba2009-03-23 21:29:59 +0100178static __u64 count_filter = 100;
179
Ingo Molnare0143ba2009-03-23 21:29:59 +0100180static int tid = -1;
181static int profile_cpu = -1;
182static int nr_cpus = 0;
183static int nmi = 1;
184static int group = 0;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +0100185static unsigned int page_size;
Peter Zijlstra00f0ad72009-03-25 12:30:27 +0100186static unsigned int mmap_pages = 16;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100187
188static char *vmlinux;
189
190static char *sym_filter;
191static unsigned long filter_start;
192static unsigned long filter_end;
193
194static int delay_secs = 2;
195static int zero;
196static int dump_symtab;
197
198struct source_line {
199 uint64_t EIP;
200 unsigned long count;
201 char *line;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100202 struct source_line *next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100203};
204
Paul Mackerrascbe46552009-03-24 16:52:34 +1100205static struct source_line *lines;
206static struct source_line **lines_tail;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800207
208const unsigned int default_count[] = {
Ingo Molnar81cdbe02009-03-23 22:29:50 +0100209 1000000,
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800210 1000000,
211 10000,
212 10000,
213 1000000,
214 10000,
215};
216
217static char *hw_event_names[] = {
218 "CPU cycles",
219 "instructions",
220 "cache references",
221 "cache misses",
222 "branches",
223 "branch misses",
224 "bus cycles",
225};
226
227static char *sw_event_names[] = {
228 "cpu clock ticks",
229 "task clock ticks",
230 "pagefaults",
231 "context switches",
232 "CPU migrations",
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100233 "minor faults",
234 "major faults",
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800235};
236
237struct event_symbol {
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100238 __u64 event;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800239 char *symbol;
240};
241
242static struct event_symbol event_symbols[] = {
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100243 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", },
244 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", },
245 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", },
246 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", },
247 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", },
248 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", },
249 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", },
250 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", },
251 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", },
252
253 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", },
254 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", },
255 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", },
256 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", },
257 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", },
258 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", },
259 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", },
260 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", },
261 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", },
262 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", },
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800263};
264
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100265#define __PERF_COUNTER_FIELD(config, name) \
266 ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
267
268#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW)
269#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG)
270#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE)
271#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT)
272
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800273static void display_events_help(void)
274{
275 unsigned int i;
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100276 __u64 e;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800277
278 printf(
279 " -e EVENT --event=EVENT # symbolic-name abbreviations");
280
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100281 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
282 int type, id;
283
284 e = event_symbols[i].event;
285 type = PERF_COUNTER_TYPE(e);
286 id = PERF_COUNTER_ID(e);
287
288 printf("\n %d:%d: %-20s",
289 type, id, event_symbols[i].symbol);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800290 }
291
292 printf("\n"
293 " rNNN: raw PMU events (eventsel+umask)\n\n");
294}
295
296static void display_perfstat_help(void)
297{
298 printf(
299 "Usage: perfstat [<events...>] <cmd...>\n\n"
300 "PerfStat Options (up to %d event types can be specified):\n\n",
301 MAX_COUNTERS);
302
303 display_events_help();
304
305 printf(
306 " -a # system-wide collection\n");
307 exit(0);
308}
Ingo Molnare0143ba2009-03-23 21:29:59 +0100309
310static void display_help(void)
311{
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800312 if (run_perfstat)
313 return display_perfstat_help();
314
Ingo Molnare0143ba2009-03-23 21:29:59 +0100315 printf(
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800316 "Usage: kerneltop [<options>]\n"
317 " Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100318 "KernelTop Options (up to %d event types can be specified at once):\n\n",
319 MAX_COUNTERS);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800320
321 display_events_help();
322
Ingo Molnare0143ba2009-03-23 21:29:59 +0100323 printf(
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800324 " -S --stat # perfstat COMMAND\n"
325 " -a # system-wide collection (for perfstat)\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100326 " -c CNT --count=CNT # event period to sample\n\n"
327 " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n"
328 " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n"
329 " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800330 " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100331 " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800332 " -x path --vmlinux=<path> # the vmlinux binary, required for -s use\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100333 " -z --zero # zero counts after display\n"
334 " -D --dump_symtab # dump symbol table to stderr on startup\n"
Peter Zijlstra4c4ba212009-03-25 12:30:26 +0100335 " -m pages --mmap_pages=<pages> # number of mmap data pages\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800336 );
Ingo Molnare0143ba2009-03-23 21:29:59 +0100337
338 exit(0);
339}
340
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800341static char *event_name(int ctr)
342{
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100343 __u64 config = event_id[ctr];
344 int type = PERF_COUNTER_TYPE(config);
345 int id = PERF_COUNTER_ID(config);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800346 static char buf[32];
347
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100348 if (PERF_COUNTER_RAW(config)) {
349 sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800350 return buf;
351 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800352
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100353 switch (type) {
354 case PERF_TYPE_HARDWARE:
355 if (id < PERF_HW_EVENTS_MAX)
356 return hw_event_names[id];
357 return "unknown-hardware";
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800358
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100359 case PERF_TYPE_SOFTWARE:
360 if (id < PERF_SW_EVENTS_MAX)
361 return sw_event_names[id];
362 return "unknown-software";
363
364 default:
365 break;
366 }
367
368 return "unknown";
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800369}
370
371/*
372 * Each event can have multiple symbolic names.
373 * Symbolic names are (almost) exactly matched.
374 */
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100375static __u64 match_event_symbols(char *str)
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800376{
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100377 __u64 config, id;
378 int type;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800379 unsigned int i;
380
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100381 if (sscanf(str, "r%llx", &config) == 1)
382 return config | PERF_COUNTER_RAW_MASK;
383
384 if (sscanf(str, "%d:%llu", &type, &id) == 2)
385 return EID(type, id);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800386
387 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
388 if (!strncmp(str, event_symbols[i].symbol,
389 strlen(event_symbols[i].symbol)))
390 return event_symbols[i].event;
391 }
392
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100393 return ~0ULL;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800394}
395
396static int parse_events(char *str)
397{
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100398 __u64 config;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800399
400again:
401 if (nr_counters == MAX_COUNTERS)
402 return -1;
403
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100404 config = match_event_symbols(str);
405 if (config == ~0ULL)
406 return -1;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800407
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100408 event_id[nr_counters] = config;
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800409 nr_counters++;
410
411 str = strstr(str, ",");
412 if (str) {
413 str++;
414 goto again;
415 }
416
417 return 0;
418}
419
420
421/*
422 * perfstat
423 */
424
425char fault_here[1000000];
426
427static void create_perfstat_counter(int counter)
428{
429 struct perf_counter_hw_event hw_event;
430
431 memset(&hw_event, 0, sizeof(hw_event));
Peter Zijlstra803d4f32009-03-23 18:22:11 +0100432 hw_event.config = event_id[counter];
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800433 hw_event.record_type = PERF_RECORD_SIMPLE;
434 hw_event.nmi = 0;
435
436 if (system_wide) {
437 int cpu;
438 for (cpu = 0; cpu < nr_cpus; cpu ++) {
439 fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
440 if (fd[cpu][counter] < 0) {
441 printf("perfstat error: syscall returned with %d (%s)\n",
442 fd[cpu][counter], strerror(errno));
443 exit(-1);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100444 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800445 }
446 } else {
447 hw_event.inherit = 1;
448 hw_event.disabled = 1;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100449
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800450 fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
451 if (fd[0][counter] < 0) {
452 printf("perfstat error: syscall returned with %d (%s)\n",
453 fd[0][counter], strerror(errno));
454 exit(-1);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100455 }
456 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800457}
Ingo Molnare0143ba2009-03-23 21:29:59 +0100458
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800459int do_perfstat(int argc, char *argv[])
460{
461 unsigned long long t0, t1;
462 int counter;
463 ssize_t res;
464 int status;
465 int pid;
466
467 if (!system_wide)
468 nr_cpus = 1;
469
470 for (counter = 0; counter < nr_counters; counter++)
471 create_perfstat_counter(counter);
472
473 argc -= optind;
474 argv += optind;
475
Wu Fengguangaf9522c2009-03-20 10:08:10 +0800476 if (!argc)
477 display_help();
478
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800479 /*
480 * Enable counters and exec the command:
481 */
482 t0 = rdclock();
483 prctl(PR_TASK_PERF_COUNTERS_ENABLE);
484
485 if ((pid = fork()) < 0)
486 perror("failed to fork");
487 if (!pid) {
488 if (execvp(argv[0], argv)) {
489 perror(argv[0]);
490 exit(-1);
491 }
Wu Fengguang95bb3be2009-03-20 10:08:04 +0800492 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800493 while (wait(&status) >= 0)
494 ;
495 prctl(PR_TASK_PERF_COUNTERS_DISABLE);
496 t1 = rdclock();
497
498 fflush(stdout);
499
500 fprintf(stderr, "\n");
501 fprintf(stderr, " Performance counter stats for \'%s\':\n",
502 argv[0]);
503 fprintf(stderr, "\n");
Ingo Molnare0143ba2009-03-23 21:29:59 +0100504
505 for (counter = 0; counter < nr_counters; counter++) {
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800506 int cpu;
507 __u64 count, single_count;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100508
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800509 count = 0;
510 for (cpu = 0; cpu < nr_cpus; cpu ++) {
511 res = read(fd[cpu][counter],
512 (char *) &single_count, sizeof(single_count));
513 assert(res == sizeof(single_count));
514 count += single_count;
515 }
516
Paul Mackerrascbe46552009-03-24 16:52:34 +1100517 if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
518 event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800519
520 double msecs = (double)count / 1000000;
521
522 fprintf(stderr, " %14.6f %-20s (msecs)\n",
523 msecs, event_name(counter));
524 } else {
525 fprintf(stderr, " %14Ld %-20s (events)\n",
526 count, event_name(counter));
527 }
Ingo Molnare0143ba2009-03-23 21:29:59 +0100528 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800529 fprintf(stderr, "\n");
530 fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
531 (double)(t1-t0)/1e6);
532 fprintf(stderr, "\n");
533
534 return 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100535}
536
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800537/*
538 * Symbols
539 */
540
Ingo Molnare0143ba2009-03-23 21:29:59 +0100541static uint64_t min_ip;
542static uint64_t max_ip = -1ll;
543
544struct sym_entry {
545 unsigned long long addr;
546 char *sym;
547 unsigned long count[MAX_COUNTERS];
548 int skip;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100549 struct source_line *source;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100550};
551
552#define MAX_SYMS 100000
553
554static int sym_table_count;
555
556struct sym_entry *sym_filter_entry;
557
558static struct sym_entry sym_table[MAX_SYMS];
559
560static void show_details(struct sym_entry *sym);
561
562/*
Wu Fengguangef45fa92009-03-20 10:08:07 +0800563 * Ordering weight: count-1 * count-2 * ... / count-n
Ingo Molnare0143ba2009-03-23 21:29:59 +0100564 */
565static double sym_weight(const struct sym_entry *sym)
566{
567 double weight;
568 int counter;
569
570 weight = sym->count[0];
571
572 for (counter = 1; counter < nr_counters-1; counter++)
573 weight *= sym->count[counter];
574
575 weight /= (sym->count[counter] + 1);
576
577 return weight;
578}
579
580static int compare(const void *__sym1, const void *__sym2)
581{
582 const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
583
584 return sym_weight(sym1) < sym_weight(sym2);
585}
586
587static time_t last_refresh;
588static long events;
589static long userspace_events;
590static const char CONSOLE_CLEAR[] = "";
591
592static struct sym_entry tmp[MAX_SYMS];
593
594static void print_sym_table(void)
595{
596 int i, printed;
597 int counter;
598 float events_per_sec = events/delay_secs;
599 float kevents_per_sec = (events-userspace_events)/delay_secs;
600
601 memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count);
602 qsort(tmp, sym_table_count, sizeof(tmp[0]), compare);
603
604 write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
605
606 printf(
607"------------------------------------------------------------------------------\n");
608 printf( " KernelTop:%8.0f irqs/sec kernel:%3.1f%% [%s, ",
609 events_per_sec,
610 100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)),
611 nmi ? "NMI" : "IRQ");
612
613 if (nr_counters == 1)
614 printf("%d ", event_count[0]);
615
616 for (counter = 0; counter < nr_counters; counter++) {
617 if (counter)
618 printf("/");
619
Wu Fengguange3908612009-03-20 10:08:05 +0800620 printf("%s", event_name(counter));
Ingo Molnare0143ba2009-03-23 21:29:59 +0100621 }
622
623 printf( "], ");
624
625 if (tid != -1)
626 printf(" (tid: %d", tid);
627 else
628 printf(" (all");
629
630 if (profile_cpu != -1)
631 printf(", cpu: %d)\n", profile_cpu);
632 else {
633 if (tid != -1)
634 printf(")\n");
635 else
636 printf(", %d CPUs)\n", nr_cpus);
637 }
638
639 printf("------------------------------------------------------------------------------\n\n");
640
641 if (nr_counters == 1)
642 printf(" events");
643 else
644 printf(" weight events");
645
646 printf(" RIP kernel function\n"
647 " ______ ______ ________________ _______________\n\n"
648 );
649
650 printed = 0;
651 for (i = 0; i < sym_table_count; i++) {
652 int count;
653
654 if (nr_counters == 1) {
655 if (printed <= 18 &&
656 tmp[i].count[0] >= count_filter) {
657 printf("%19.2f - %016llx : %s\n",
658 sym_weight(tmp + i), tmp[i].addr, tmp[i].sym);
659 printed++;
660 }
661 } else {
662 if (printed <= 18 &&
663 tmp[i].count[0] >= count_filter) {
664 printf("%8.1f %10ld - %016llx : %s\n",
665 sym_weight(tmp + i),
666 tmp[i].count[0],
667 tmp[i].addr, tmp[i].sym);
668 printed++;
669 }
670 }
671 /*
672 * Add decay to the counts:
673 */
674 for (count = 0; count < nr_counters; count++)
675 sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8;
676 }
677
678 if (sym_filter_entry)
679 show_details(sym_filter_entry);
680
681 last_refresh = time(NULL);
682
683 {
684 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
685
686 if (poll(&stdin_poll, 1, 0) == 1) {
687 printf("key pressed - exiting.\n");
688 exit(0);
689 }
690 }
691}
692
693static int read_symbol(FILE *in, struct sym_entry *s)
694{
695 static int filter_match = 0;
696 char *sym, stype;
697 char str[500];
698 int rc, pos;
699
700 rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str);
701 if (rc == EOF)
702 return -1;
703
704 assert(rc == 3);
705
706 /* skip until end of line: */
707 pos = strlen(str);
708 do {
709 rc = fgetc(in);
710 if (rc == '\n' || rc == EOF || pos >= 499)
711 break;
712 str[pos] = rc;
713 pos++;
714 } while (1);
715 str[pos] = 0;
716
717 sym = str;
718
719 /* Filter out known duplicates and non-text symbols. */
720 if (!strcmp(sym, "_text"))
721 return 1;
722 if (!min_ip && !strcmp(sym, "_stext"))
723 return 1;
724 if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext"))
725 return 1;
726 if (stype != 'T' && stype != 't')
727 return 1;
728 if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14))
729 return 1;
730 if (strstr(sym, "_text_start") || strstr(sym, "_text_end"))
731 return 1;
732
733 s->sym = malloc(strlen(str));
734 assert(s->sym);
735
736 strcpy((char *)s->sym, str);
737 s->skip = 0;
738
739 /* Tag events to be skipped. */
740 if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym))
741 s->skip = 1;
Peter Zijlstra4c4ba212009-03-25 12:30:26 +0100742 else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
743 s->skip = 1;
744 else if (!strcmp("mwait_idle", s->sym))
Ingo Molnare0143ba2009-03-23 21:29:59 +0100745 s->skip = 1;
746
747 if (filter_match == 1) {
748 filter_end = s->addr;
749 filter_match = -1;
750 if (filter_end - filter_start > 10000) {
751 printf("hm, too large filter symbol <%s> - skipping.\n",
752 sym_filter);
753 printf("symbol filter start: %016lx\n", filter_start);
754 printf(" end: %016lx\n", filter_end);
755 filter_end = filter_start = 0;
756 sym_filter = NULL;
757 sleep(1);
758 }
759 }
760 if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) {
761 filter_match = 1;
762 filter_start = s->addr;
763 }
764
765 return 0;
766}
767
768int compare_addr(const void *__sym1, const void *__sym2)
769{
770 const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
771
772 return sym1->addr > sym2->addr;
773}
774
775static void sort_symbol_table(void)
776{
777 int i, dups;
778
779 do {
780 qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr);
781 for (i = 0, dups = 0; i < sym_table_count; i++) {
782 if (sym_table[i].addr == sym_table[i+1].addr) {
783 sym_table[i+1].addr = -1ll;
784 dups++;
785 }
786 }
787 sym_table_count -= dups;
788 } while(dups);
789}
790
791static void parse_symbols(void)
792{
793 struct sym_entry *last;
794
795 FILE *kallsyms = fopen("/proc/kallsyms", "r");
796
797 if (!kallsyms) {
798 printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n");
799 exit(-1);
800 }
801
802 while (!feof(kallsyms)) {
803 if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) {
804 sym_table_count++;
805 assert(sym_table_count <= MAX_SYMS);
806 }
807 }
808
809 sort_symbol_table();
810 min_ip = sym_table[0].addr;
811 max_ip = sym_table[sym_table_count-1].addr;
812 last = sym_table + sym_table_count++;
813
814 last->addr = -1ll;
815 last->sym = "<end>";
816
817 if (filter_end) {
818 int count;
819 for (count=0; count < sym_table_count; count ++) {
820 if (!strcmp(sym_table[count].sym, sym_filter)) {
821 sym_filter_entry = &sym_table[count];
822 break;
823 }
824 }
825 }
826 if (dump_symtab) {
827 int i;
828
829 for (i = 0; i < sym_table_count; i++)
830 fprintf(stderr, "%llx %s\n",
831 sym_table[i].addr, sym_table[i].sym);
832 }
833}
834
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800835/*
836 * Source lines
837 */
Ingo Molnare0143ba2009-03-23 21:29:59 +0100838
839static void parse_vmlinux(char *filename)
840{
841 FILE *file;
842 char command[PATH_MAX*2];
843 if (!filename)
844 return;
845
846 sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename);
847
848 file = popen(command, "r");
849 if (!file)
850 return;
851
Paul Mackerrascbe46552009-03-24 16:52:34 +1100852 lines_tail = &lines;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100853 while (!feof(file)) {
854 struct source_line *src;
855 size_t dummy = 0;
856 char *c;
857
858 src = malloc(sizeof(struct source_line));
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800859 assert(src != NULL);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100860 memset(src, 0, sizeof(struct source_line));
861
862 if (getline(&src->line, &dummy, file) < 0)
863 break;
864 if (!src->line)
865 break;
866
867 c = strchr(src->line, '\n');
868 if (c)
869 *c = 0;
870
Paul Mackerrascbe46552009-03-24 16:52:34 +1100871 src->next = NULL;
872 *lines_tail = src;
873 lines_tail = &src->next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100874
875 if (strlen(src->line)>8 && src->line[8] == ':')
876 src->EIP = strtoull(src->line, NULL, 16);
877 if (strlen(src->line)>8 && src->line[16] == ':')
878 src->EIP = strtoull(src->line, NULL, 16);
879 }
880 pclose(file);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100881}
882
883static void record_precise_ip(uint64_t ip)
884{
885 struct source_line *line;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100886
Paul Mackerrascbe46552009-03-24 16:52:34 +1100887 for (line = lines; line; line = line->next) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100888 if (line->EIP == ip)
889 line->count++;
890 if (line->EIP > ip)
891 break;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100892 }
893}
894
895static void lookup_sym_in_vmlinux(struct sym_entry *sym)
896{
897 struct source_line *line;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100898 char pattern[PATH_MAX];
899 sprintf(pattern, "<%s>:", sym->sym);
900
Paul Mackerrascbe46552009-03-24 16:52:34 +1100901 for (line = lines; line; line = line->next) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100902 if (strstr(line->line, pattern)) {
Paul Mackerrascbe46552009-03-24 16:52:34 +1100903 sym->source = line;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100904 break;
905 }
Ingo Molnare0143ba2009-03-23 21:29:59 +0100906 }
907}
908
Paul Mackerrascbe46552009-03-24 16:52:34 +1100909static void show_lines(struct source_line *line_queue, int line_queue_count)
Ingo Molnare0143ba2009-03-23 21:29:59 +0100910{
911 int i;
912 struct source_line *line;
913
Paul Mackerrascbe46552009-03-24 16:52:34 +1100914 line = line_queue;
915 for (i = 0; i < line_queue_count; i++) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100916 printf("%8li\t%s\n", line->count, line->line);
Paul Mackerrascbe46552009-03-24 16:52:34 +1100917 line = line->next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100918 }
919}
920
921#define TRACE_COUNT 3
922
923static void show_details(struct sym_entry *sym)
924{
925 struct source_line *line;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100926 struct source_line *line_queue = NULL;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100927 int displayed = 0;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100928 int line_queue_count = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100929
930 if (!sym->source)
931 lookup_sym_in_vmlinux(sym);
932 if (!sym->source)
933 return;
934
935 printf("Showing details for %s\n", sym->sym);
936
Paul Mackerrascbe46552009-03-24 16:52:34 +1100937 line = sym->source;
938 while (line) {
Ingo Molnare0143ba2009-03-23 21:29:59 +0100939 if (displayed && strstr(line->line, ">:"))
940 break;
941
Paul Mackerrascbe46552009-03-24 16:52:34 +1100942 if (!line_queue_count)
943 line_queue = line;
944 line_queue_count ++;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100945
946 if (line->count >= count_filter) {
Paul Mackerrascbe46552009-03-24 16:52:34 +1100947 show_lines(line_queue, line_queue_count);
948 line_queue_count = 0;
949 line_queue = NULL;
950 } else if (line_queue_count > TRACE_COUNT) {
951 line_queue = line_queue->next;
952 line_queue_count --;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100953 }
954
955 line->count = 0;
956 displayed++;
957 if (displayed > 300)
958 break;
Paul Mackerrascbe46552009-03-24 16:52:34 +1100959 line = line->next;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100960 }
961}
962
963/*
964 * Binary search in the histogram table and record the hit:
965 */
966static void record_ip(uint64_t ip, int counter)
967{
968 int left_idx, middle_idx, right_idx, idx;
969 unsigned long left, middle, right;
970
971 record_precise_ip(ip);
972
973 left_idx = 0;
974 right_idx = sym_table_count-1;
975 assert(ip <= max_ip && ip >= min_ip);
976
977 while (left_idx + 1 < right_idx) {
978 middle_idx = (left_idx + right_idx) / 2;
979
980 left = sym_table[ left_idx].addr;
981 middle = sym_table[middle_idx].addr;
982 right = sym_table[ right_idx].addr;
983
984 if (!(left <= middle && middle <= right)) {
985 printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right);
986 printf("%d %d %d\n", left_idx, middle_idx, right_idx);
987 }
988 assert(left <= middle && middle <= right);
989 if (!(left <= ip && ip <= right)) {
990 printf(" left: %016lx\n", left);
Ingo Molnar193e8df2009-03-23 22:23:16 +0100991 printf(" ip: %016lx\n", (unsigned long)ip);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100992 printf("right: %016lx\n", right);
993 }
994 assert(left <= ip && ip <= right);
995 /*
996 * [ left .... target .... middle .... right ]
997 * => right := middle
998 */
999 if (ip < middle) {
1000 right_idx = middle_idx;
1001 continue;
1002 }
1003 /*
1004 * [ left .... middle ... target ... right ]
1005 * => left := middle
1006 */
1007 left_idx = middle_idx;
1008 }
1009
1010 idx = left_idx;
1011
1012 if (!sym_table[idx].skip)
1013 sym_table[idx].count[counter]++;
1014 else events--;
1015}
1016
1017static void process_event(uint64_t ip, int counter)
1018{
1019 events++;
1020
1021 if (ip < min_ip || ip > max_ip) {
1022 userspace_events++;
1023 return;
1024 }
1025
1026 record_ip(ip, counter);
1027}
1028
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001029static void process_options(int argc, char *argv[])
1030{
1031 int error = 0, counter;
1032
1033 if (strstr(argv[0], "perfstat"))
1034 run_perfstat = 1;
1035
1036 for (;;) {
1037 int option_index = 0;
1038 /** Options for getopt */
1039 static struct option long_options[] = {
1040 {"count", required_argument, NULL, 'c'},
1041 {"cpu", required_argument, NULL, 'C'},
1042 {"delay", required_argument, NULL, 'd'},
1043 {"dump_symtab", no_argument, NULL, 'D'},
1044 {"event", required_argument, NULL, 'e'},
1045 {"filter", required_argument, NULL, 'f'},
1046 {"group", required_argument, NULL, 'g'},
1047 {"help", no_argument, NULL, 'h'},
1048 {"nmi", required_argument, NULL, 'n'},
1049 {"pid", required_argument, NULL, 'p'},
1050 {"vmlinux", required_argument, NULL, 'x'},
1051 {"symbol", required_argument, NULL, 's'},
1052 {"stat", no_argument, NULL, 'S'},
1053 {"zero", no_argument, NULL, 'z'},
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001054 {"mmap_pages", required_argument, NULL, 'm'},
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001055 {NULL, 0, NULL, 0 }
1056 };
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001057 int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:m:p:s:Sx:z",
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001058 long_options, &option_index);
1059 if (c == -1)
1060 break;
1061
1062 switch (c) {
1063 case 'a': system_wide = 1; break;
Peter Zijlstra803d4f32009-03-23 18:22:11 +01001064 case 'c': default_interval = atoi(optarg); break;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001065 case 'C':
1066 /* CPU and PID are mutually exclusive */
1067 if (tid != -1) {
1068 printf("WARNING: CPU switch overriding PID\n");
1069 sleep(1);
1070 tid = -1;
1071 }
1072 profile_cpu = atoi(optarg); break;
1073 case 'd': delay_secs = atoi(optarg); break;
1074 case 'D': dump_symtab = 1; break;
1075
1076 case 'e': error = parse_events(optarg); break;
1077
1078 case 'f': count_filter = atoi(optarg); break;
1079 case 'g': group = atoi(optarg); break;
1080 case 'h': display_help(); break;
1081 case 'n': nmi = atoi(optarg); break;
1082 case 'p':
1083 /* CPU and PID are mutually exclusive */
1084 if (profile_cpu != -1) {
1085 printf("WARNING: PID switch overriding CPU\n");
1086 sleep(1);
1087 profile_cpu = -1;
1088 }
1089 tid = atoi(optarg); break;
1090 case 's': sym_filter = strdup(optarg); break;
1091 case 'S': run_perfstat = 1; break;
1092 case 'x': vmlinux = strdup(optarg); break;
1093 case 'z': zero = 1; break;
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001094 case 'm': mmap_pages = atoi(optarg); break;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001095 default: error = 1; break;
1096 }
1097 }
1098 if (error)
1099 display_help();
1100
1101 if (!nr_counters) {
1102 if (run_perfstat)
1103 nr_counters = 8;
1104 else {
1105 nr_counters = 1;
1106 event_id[0] = 0;
1107 }
1108 }
1109
1110 for (counter = 0; counter < nr_counters; counter++) {
1111 if (event_count[counter])
1112 continue;
1113
Peter Zijlstra803d4f32009-03-23 18:22:11 +01001114 event_count[counter] = default_interval;
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001115 }
1116}
1117
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001118struct mmap_data {
1119 int counter;
1120 void *base;
1121 unsigned int mask;
1122 unsigned int prev;
1123};
1124
1125static unsigned int mmap_read_head(struct mmap_data *md)
1126{
1127 struct perf_counter_mmap_page *pc = md->base;
1128 unsigned int seq, head;
1129
1130repeat:
1131 rmb();
1132 seq = pc->lock;
1133
1134 if (unlikely(seq & 1)) {
1135 cpu_relax();
1136 goto repeat;
1137 }
1138
1139 head = pc->data_head;
1140
1141 rmb();
1142 if (pc->lock != seq)
1143 goto repeat;
1144
1145 return head;
1146}
1147
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001148struct timeval last_read, this_read;
1149
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001150static void mmap_read(struct mmap_data *md)
1151{
1152 unsigned int head = mmap_read_head(md);
1153 unsigned int old = md->prev;
1154 unsigned char *data = md->base + page_size;
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001155 int diff;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001156
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001157 gettimeofday(&this_read, NULL);
1158
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001159 /*
1160 * If we're further behind than half the buffer, there's a chance
1161 * the writer will bite our tail and screw up the events under us.
1162 *
1163 * If we somehow ended up ahead of the head, we got messed up.
1164 *
1165 * In either case, truncate and restart at head.
1166 */
1167 diff = head - old;
1168 if (diff > md->mask / 2 || diff < 0) {
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001169 struct timeval iv;
1170 unsigned long msecs;
1171
1172 timersub(&this_read, &last_read, &iv);
1173 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
1174
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001175 fprintf(stderr, "WARNING: failed to keep up with mmap data."
1176 " Last read %lu msecs ago.\n", msecs);
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001177
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001178 /*
1179 * head points to a known good entry, start there.
1180 */
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001181 old = head;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001182 }
1183
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001184 last_read = this_read;
1185
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001186 for (; old != head;) {
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001187 struct event_struct {
1188 struct perf_event_header header;
1189 __u64 ip;
1190 __u32 pid, tid;
1191 } *event = (struct event_struct *)&data[old & md->mask];
1192 struct event_struct event_copy;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001193
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001194 unsigned int size = event->header.size;
1195
1196 /*
1197 * Event straddles the mmap boundary -- header should always
1198 * be inside due to u64 alignment of output.
1199 */
1200 if ((old & md->mask) + size != ((old + size) & md->mask)) {
1201 unsigned int offset = old;
1202 unsigned int len = sizeof(*event), cpy;
1203 void *dst = &event_copy;
1204
1205 do {
1206 cpy = min(md->mask + 1 - (offset & md->mask), len);
1207 memcpy(dst, &data[offset & md->mask], cpy);
1208 offset += cpy;
1209 dst += cpy;
1210 len -= cpy;
1211 } while (len);
1212
1213 event = &event_copy;
1214 }
1215
1216 old += size;
1217
1218 switch (event->header.type) {
1219 case PERF_EVENT_IP:
1220 case PERF_EVENT_IP | __PERF_EVENT_TID:
1221 process_event(event->ip, md->counter);
1222 break;
1223 }
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001224 }
1225
1226 md->prev = old;
1227}
1228
Ingo Molnare0143ba2009-03-23 21:29:59 +01001229int main(int argc, char *argv[])
1230{
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001231 struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001232 struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
Ingo Molnare0143ba2009-03-23 21:29:59 +01001233 struct perf_counter_hw_event hw_event;
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001234 int i, counter, group_fd, nr_poll = 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001235 unsigned int cpu;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001236 int ret;
1237
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001238 page_size = sysconf(_SC_PAGE_SIZE);
1239
Ingo Molnare0143ba2009-03-23 21:29:59 +01001240 process_options(argc, argv);
1241
1242 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001243 assert(nr_cpus <= MAX_NR_CPUS);
1244 assert(nr_cpus >= 0);
1245
1246 if (run_perfstat)
1247 return do_perfstat(argc, argv);
1248
Ingo Molnare0143ba2009-03-23 21:29:59 +01001249 if (tid != -1 || profile_cpu != -1)
1250 nr_cpus = 1;
1251
Paul Mackerrascbe46552009-03-24 16:52:34 +11001252 parse_symbols();
1253 if (vmlinux && sym_filter_entry)
1254 parse_vmlinux(vmlinux);
1255
Ingo Molnare0143ba2009-03-23 21:29:59 +01001256 for (i = 0; i < nr_cpus; i++) {
1257 group_fd = -1;
1258 for (counter = 0; counter < nr_counters; counter++) {
1259
1260 cpu = profile_cpu;
1261 if (tid == -1 && profile_cpu == -1)
1262 cpu = i;
1263
1264 memset(&hw_event, 0, sizeof(hw_event));
Peter Zijlstra803d4f32009-03-23 18:22:11 +01001265 hw_event.config = event_id[counter];
Ingo Molnare0143ba2009-03-23 21:29:59 +01001266 hw_event.irq_period = event_count[counter];
1267 hw_event.record_type = PERF_RECORD_IRQ;
1268 hw_event.nmi = nmi;
Peter Zijlstra00f0ad72009-03-25 12:30:27 +01001269 hw_event.include_tid = 1;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001270
Ingo Molnare0143ba2009-03-23 21:29:59 +01001271 fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001272 if (fd[i][counter] < 0) {
Paul Mackerrascbe46552009-03-24 16:52:34 +11001273 int err = errno;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001274 printf("kerneltop error: syscall returned with %d (%s)\n",
Paul Mackerrascbe46552009-03-24 16:52:34 +11001275 fd[i][counter], strerror(err));
1276 if (err == EPERM)
Ingo Molnare0143ba2009-03-23 21:29:59 +01001277 printf("Are you root?\n");
1278 exit(-1);
1279 }
1280 assert(fd[i][counter] >= 0);
Paul Mackerrascbe46552009-03-24 16:52:34 +11001281 fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001282
1283 /*
1284 * First counter acts as the group leader:
1285 */
1286 if (group && group_fd == -1)
1287 group_fd = fd[i][counter];
1288
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001289 event_array[nr_poll].fd = fd[i][counter];
1290 event_array[nr_poll].events = POLLIN;
1291 nr_poll++;
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001292
1293 mmap_array[i][counter].counter = counter;
1294 mmap_array[i][counter].prev = 0;
Peter Zijlstra4c4ba212009-03-25 12:30:26 +01001295 mmap_array[i][counter].mask = mmap_pages*page_size - 1;
1296 mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001297 PROT_READ, MAP_SHARED, fd[i][counter], 0);
1298 if (mmap_array[i][counter].base == MAP_FAILED) {
1299 printf("kerneltop error: failed to mmap with %d (%s)\n",
1300 errno, strerror(errno));
1301 exit(-1);
1302 }
Ingo Molnare0143ba2009-03-23 21:29:59 +01001303 }
1304 }
1305
Ingo Molnare0143ba2009-03-23 21:29:59 +01001306 printf("KernelTop refresh period: %d seconds\n", delay_secs);
1307 last_refresh = time(NULL);
1308
1309 while (1) {
1310 int hits = events;
1311
1312 for (i = 0; i < nr_cpus; i++) {
Peter Zijlstrabcbcb372009-03-23 18:22:12 +01001313 for (counter = 0; counter < nr_counters; counter++)
1314 mmap_read(&mmap_array[i][counter]);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001315 }
1316
1317 if (time(NULL) >= last_refresh + delay_secs) {
1318 print_sym_table();
1319 events = userspace_events = 0;
1320 }
1321
1322 if (hits == events)
Peter Zijlstra0fd112e2009-03-24 10:50:24 +01001323 ret = poll(event_array, nr_poll, 1000);
Ingo Molnare0143ba2009-03-23 21:29:59 +01001324 hits = events;
1325 }
1326
1327 return 0;
1328}