blob: 9db65a4f104263444337a341090ac9914c3b506d [file] [log] [blame]
Ingo Molnare0143ba2009-03-23 21:29:59 +01001/*
2 * kerneltop.c: show top kernel functions - performance counters showcase
3
4 Build with:
5
Wu Fengguangf7524bd2009-03-20 10:08:06 +08006 cc -O6 -Wall -lrt `pkg-config --cflags --libs glib-2.0` -o kerneltop kerneltop.c
Ingo Molnare0143ba2009-03-23 21:29:59 +01007
8 Sample output:
9
10------------------------------------------------------------------------------
11 KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2)
12------------------------------------------------------------------------------
13
14 weight RIP kernel function
15 ______ ________________ _______________
16
17 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
18 33.00 - ffffffff804cb740 : sock_alloc_send_skb
19 31.26 - ffffffff804ce808 : skb_push
20 22.43 - ffffffff80510004 : tcp_established_options
21 19.00 - ffffffff8027d250 : find_get_page
22 15.76 - ffffffff804e4fc9 : eth_type_trans
23 15.20 - ffffffff804d8baa : dst_release
24 14.86 - ffffffff804cf5d8 : skb_release_head_state
25 14.00 - ffffffff802217d5 : read_hpet
26 12.00 - ffffffff804ffb7f : __ip_local_out
27 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
28 8.54 - ffffffff805001a3 : ip_queue_xmit
Ingo Molnare0143ba2009-03-23 21:29:59 +010029 */
Wu Fengguangf7524bd2009-03-20 10:08:06 +080030
31/*
32 * perfstat: /usr/bin/time -alike performance counter statistics utility
33
34 It summarizes the counter events of all tasks (and child tasks),
35 covering all CPUs that the command (or workload) executes on.
36 It only counts the per-task events of the workload started,
37 independent of how many other tasks run on those CPUs.
38
39 Sample output:
40
41 $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
42
43 Performance counter stats for 'ls':
44
45 163516953 instructions
46 2295 cache-misses
47 2855182 branch-misses
48 */
49
50 /*
51 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
52 *
53 * Improvements and fixes by:
54 *
55 * Arjan van de Ven <arjan@linux.intel.com>
56 * Yanmin Zhang <yanmin.zhang@intel.com>
57 * Wu Fengguang <fengguang.wu@intel.com>
58 * Mike Galbraith <efault@gmx.de>
59 *
60 * Released under the GPL v2. (and only v2, not any later version)
61 */
62
Ingo Molnare0143ba2009-03-23 21:29:59 +010063#define _GNU_SOURCE
64#include <sys/types.h>
65#include <sys/stat.h>
66#include <sys/time.h>
67#include <unistd.h>
68#include <stdint.h>
69#include <stdlib.h>
70#include <string.h>
71#include <getopt.h>
72#include <assert.h>
73#include <fcntl.h>
74#include <stdio.h>
75#include <errno.h>
76#include <ctype.h>
77#include <time.h>
78
79#include <glib.h>
80
81#include <sys/syscall.h>
82#include <sys/ioctl.h>
83#include <sys/poll.h>
84#include <sys/prctl.h>
85#include <sys/wait.h>
86#include <sys/uio.h>
87
88#include <linux/unistd.h>
89
Wu Fengguangcea92ce2009-03-20 10:08:02 +080090#include "perfcounters.h"
Ingo Molnare0143ba2009-03-23 21:29:59 +010091
Wu Fengguangf7524bd2009-03-20 10:08:06 +080092
93#define MAX_COUNTERS 64
94#define MAX_NR_CPUS 256
95
96#define DEF_PERFSTAT_EVENTS { -2, -5, -4, -3, 0, 1, 2, 3}
97
98static int run_perfstat = 0;
99static int system_wide = 0;
100
101static int nr_counters = 0;
102static long event_id[MAX_COUNTERS] = DEF_PERFSTAT_EVENTS;
103static int event_raw[MAX_COUNTERS];
104static int event_count[MAX_COUNTERS];
105static int fd[MAX_NR_CPUS][MAX_COUNTERS];
Ingo Molnare0143ba2009-03-23 21:29:59 +0100106
Ingo Molnare0143ba2009-03-23 21:29:59 +0100107static __u64 count_filter = 100;
108
Ingo Molnare0143ba2009-03-23 21:29:59 +0100109static int tid = -1;
110static int profile_cpu = -1;
111static int nr_cpus = 0;
112static int nmi = 1;
113static int group = 0;
114
115static char *vmlinux;
116
117static char *sym_filter;
118static unsigned long filter_start;
119static unsigned long filter_end;
120
121static int delay_secs = 2;
122static int zero;
123static int dump_symtab;
124
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800125static GList *lines;
126
Ingo Molnare0143ba2009-03-23 21:29:59 +0100127struct source_line {
128 uint64_t EIP;
129 unsigned long count;
130 char *line;
131};
132
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800133
134const unsigned int default_count[] = {
135 1000000,
136 1000000,
137 10000,
138 10000,
139 1000000,
140 10000,
141};
142
143static char *hw_event_names[] = {
144 "CPU cycles",
145 "instructions",
146 "cache references",
147 "cache misses",
148 "branches",
149 "branch misses",
150 "bus cycles",
151};
152
153static char *sw_event_names[] = {
154 "cpu clock ticks",
155 "task clock ticks",
156 "pagefaults",
157 "context switches",
158 "CPU migrations",
159};
160
161struct event_symbol {
162 int event;
163 char *symbol;
164};
165
166static struct event_symbol event_symbols[] = {
167 {PERF_COUNT_CPU_CYCLES, "cpu-cycles", },
168 {PERF_COUNT_CPU_CYCLES, "cycles", },
169 {PERF_COUNT_INSTRUCTIONS, "instructions", },
170 {PERF_COUNT_CACHE_REFERENCES, "cache-references", },
171 {PERF_COUNT_CACHE_MISSES, "cache-misses", },
172 {PERF_COUNT_BRANCH_INSTRUCTIONS, "branch-instructions", },
173 {PERF_COUNT_BRANCH_INSTRUCTIONS, "branches", },
174 {PERF_COUNT_BRANCH_MISSES, "branch-misses", },
175 {PERF_COUNT_BUS_CYCLES, "bus-cycles", },
176 {PERF_COUNT_CPU_CLOCK, "cpu-ticks", },
177 {PERF_COUNT_CPU_CLOCK, "ticks", },
178 {PERF_COUNT_TASK_CLOCK, "task-ticks", },
179 {PERF_COUNT_PAGE_FAULTS, "page-faults", },
180 {PERF_COUNT_PAGE_FAULTS, "faults", },
181 {PERF_COUNT_CONTEXT_SWITCHES, "context-switches", },
182 {PERF_COUNT_CONTEXT_SWITCHES, "cs", },
183 {PERF_COUNT_CPU_MIGRATIONS, "cpu-migrations", },
184 {PERF_COUNT_CPU_MIGRATIONS, "migrations", },
185};
186
187static void display_events_help(void)
188{
189 unsigned int i;
190 int e;
191
192 printf(
193 " -e EVENT --event=EVENT # symbolic-name abbreviations");
194
195 for (i = 0, e = PERF_HW_EVENTS_MAX; i < ARRAY_SIZE(event_symbols); i++) {
196 if (e != event_symbols[i].event) {
197 e = event_symbols[i].event;
198 printf(
199 "\n %2d: %-20s", e, event_symbols[i].symbol);
200 } else
201 printf(" %s", event_symbols[i].symbol);
202 }
203
204 printf("\n"
205 " rNNN: raw PMU events (eventsel+umask)\n\n");
206}
207
208static void display_perfstat_help(void)
209{
210 printf(
211 "Usage: perfstat [<events...>] <cmd...>\n\n"
212 "PerfStat Options (up to %d event types can be specified):\n\n",
213 MAX_COUNTERS);
214
215 display_events_help();
216
217 printf(
218 " -a # system-wide collection\n");
219 exit(0);
220}
Ingo Molnare0143ba2009-03-23 21:29:59 +0100221
222static void display_help(void)
223{
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800224 if (run_perfstat)
225 return display_perfstat_help();
226
Ingo Molnare0143ba2009-03-23 21:29:59 +0100227 printf(
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800228 "Usage: kerneltop [<options>]\n"
229 " Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100230 "KernelTop Options (up to %d event types can be specified at once):\n\n",
231 MAX_COUNTERS);
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800232
233 display_events_help();
234
Ingo Molnare0143ba2009-03-23 21:29:59 +0100235 printf(
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800236 " -S --stat # perfstat COMMAND\n"
237 " -a # system-wide collection (for perfstat)\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100238 " -c CNT --count=CNT # event period to sample\n\n"
239 " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n"
240 " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n"
241 " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800242 " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100243 " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800244 " -x path --vmlinux=<path> # the vmlinux binary, required for -s use\n"
Ingo Molnare0143ba2009-03-23 21:29:59 +0100245 " -z --zero # zero counts after display\n"
246 " -D --dump_symtab # dump symbol table to stderr on startup\n"
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800247 );
Ingo Molnare0143ba2009-03-23 21:29:59 +0100248
249 exit(0);
250}
251
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800252static int type_valid(int type)
Ingo Molnare0143ba2009-03-23 21:29:59 +0100253{
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800254 if (type >= PERF_HW_EVENTS_MAX)
255 return 0;
256 if (type <= PERF_SW_EVENTS_MIN)
257 return 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100258
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800259 return 1;
260}
Ingo Molnare0143ba2009-03-23 21:29:59 +0100261
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800262static char *event_name(int ctr)
263{
264 int type = event_id[ctr];
265 static char buf[32];
266
267 if (event_raw[ctr]) {
268 sprintf(buf, "raw 0x%x", type);
269 return buf;
270 }
271 if (!type_valid(type))
272 return "unknown";
273
274 if (type >= 0)
275 return hw_event_names[type];
276
277 return sw_event_names[-type-1];
278}
279
280/*
281 * Each event can have multiple symbolic names.
282 * Symbolic names are (almost) exactly matched.
283 */
284static int match_event_symbols(char *str)
285{
286 unsigned int i;
287
288 if (isdigit(str[0]) || str[0] == '-')
289 return atoi(str);
290
291 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
292 if (!strncmp(str, event_symbols[i].symbol,
293 strlen(event_symbols[i].symbol)))
294 return event_symbols[i].event;
295 }
296
297 return PERF_HW_EVENTS_MAX;
298}
299
300static int parse_events(char *str)
301{
302 int type, raw;
303
304again:
305 if (nr_counters == MAX_COUNTERS)
306 return -1;
307
308 raw = 0;
309 if (*str == 'r') {
310 raw = 1;
311 ++str;
312 type = strtol(str, NULL, 16);
313 } else {
314 type = match_event_symbols(str);
315 if (!type_valid(type))
316 return -1;
317 }
318
319 event_id[nr_counters] = type;
320 event_raw[nr_counters] = raw;
321 nr_counters++;
322
323 str = strstr(str, ",");
324 if (str) {
325 str++;
326 goto again;
327 }
328
329 return 0;
330}
331
332
333/*
334 * perfstat
335 */
336
337char fault_here[1000000];
338
339static void create_perfstat_counter(int counter)
340{
341 struct perf_counter_hw_event hw_event;
342
343 memset(&hw_event, 0, sizeof(hw_event));
344 hw_event.type = event_id[counter];
345 hw_event.raw = event_raw[counter];
346 hw_event.record_type = PERF_RECORD_SIMPLE;
347 hw_event.nmi = 0;
348
349 if (system_wide) {
350 int cpu;
351 for (cpu = 0; cpu < nr_cpus; cpu ++) {
352 fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
353 if (fd[cpu][counter] < 0) {
354 printf("perfstat error: syscall returned with %d (%s)\n",
355 fd[cpu][counter], strerror(errno));
356 exit(-1);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100357 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800358 }
359 } else {
360 hw_event.inherit = 1;
361 hw_event.disabled = 1;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100362
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800363 fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
364 if (fd[0][counter] < 0) {
365 printf("perfstat error: syscall returned with %d (%s)\n",
366 fd[0][counter], strerror(errno));
367 exit(-1);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100368 }
369 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800370}
Ingo Molnare0143ba2009-03-23 21:29:59 +0100371
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800372int do_perfstat(int argc, char *argv[])
373{
374 unsigned long long t0, t1;
375 int counter;
376 ssize_t res;
377 int status;
378 int pid;
379
380 if (!system_wide)
381 nr_cpus = 1;
382
383 for (counter = 0; counter < nr_counters; counter++)
384 create_perfstat_counter(counter);
385
386 argc -= optind;
387 argv += optind;
388
389 /*
390 * Enable counters and exec the command:
391 */
392 t0 = rdclock();
393 prctl(PR_TASK_PERF_COUNTERS_ENABLE);
394
395 if ((pid = fork()) < 0)
396 perror("failed to fork");
397 if (!pid) {
398 if (execvp(argv[0], argv)) {
399 perror(argv[0]);
400 exit(-1);
401 }
Wu Fengguang95bb3be2009-03-20 10:08:04 +0800402 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800403 while (wait(&status) >= 0)
404 ;
405 prctl(PR_TASK_PERF_COUNTERS_DISABLE);
406 t1 = rdclock();
407
408 fflush(stdout);
409
410 fprintf(stderr, "\n");
411 fprintf(stderr, " Performance counter stats for \'%s\':\n",
412 argv[0]);
413 fprintf(stderr, "\n");
Ingo Molnare0143ba2009-03-23 21:29:59 +0100414
415 for (counter = 0; counter < nr_counters; counter++) {
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800416 int cpu;
417 __u64 count, single_count;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100418
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800419 count = 0;
420 for (cpu = 0; cpu < nr_cpus; cpu ++) {
421 res = read(fd[cpu][counter],
422 (char *) &single_count, sizeof(single_count));
423 assert(res == sizeof(single_count));
424 count += single_count;
425 }
426
427 if (!event_raw[counter] &&
428 (event_id[counter] == PERF_COUNT_CPU_CLOCK ||
429 event_id[counter] == PERF_COUNT_TASK_CLOCK)) {
430
431 double msecs = (double)count / 1000000;
432
433 fprintf(stderr, " %14.6f %-20s (msecs)\n",
434 msecs, event_name(counter));
435 } else {
436 fprintf(stderr, " %14Ld %-20s (events)\n",
437 count, event_name(counter));
438 }
439 if (!counter)
440 fprintf(stderr, "\n");
Ingo Molnare0143ba2009-03-23 21:29:59 +0100441 }
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800442 fprintf(stderr, "\n");
443 fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
444 (double)(t1-t0)/1e6);
445 fprintf(stderr, "\n");
446
447 return 0;
Ingo Molnare0143ba2009-03-23 21:29:59 +0100448}
449
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800450/*
451 * Symbols
452 */
453
Ingo Molnare0143ba2009-03-23 21:29:59 +0100454static uint64_t min_ip;
455static uint64_t max_ip = -1ll;
456
457struct sym_entry {
458 unsigned long long addr;
459 char *sym;
460 unsigned long count[MAX_COUNTERS];
461 int skip;
462 GList *source;
463};
464
465#define MAX_SYMS 100000
466
467static int sym_table_count;
468
469struct sym_entry *sym_filter_entry;
470
471static struct sym_entry sym_table[MAX_SYMS];
472
473static void show_details(struct sym_entry *sym);
474
475/*
476 * Ordering weight: count-1 * count-1 * ... / count-n
477 */
478static double sym_weight(const struct sym_entry *sym)
479{
480 double weight;
481 int counter;
482
483 weight = sym->count[0];
484
485 for (counter = 1; counter < nr_counters-1; counter++)
486 weight *= sym->count[counter];
487
488 weight /= (sym->count[counter] + 1);
489
490 return weight;
491}
492
493static int compare(const void *__sym1, const void *__sym2)
494{
495 const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
496
497 return sym_weight(sym1) < sym_weight(sym2);
498}
499
500static time_t last_refresh;
501static long events;
502static long userspace_events;
503static const char CONSOLE_CLEAR[] = "";
504
505static struct sym_entry tmp[MAX_SYMS];
506
507static void print_sym_table(void)
508{
509 int i, printed;
510 int counter;
511 float events_per_sec = events/delay_secs;
512 float kevents_per_sec = (events-userspace_events)/delay_secs;
513
514 memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count);
515 qsort(tmp, sym_table_count, sizeof(tmp[0]), compare);
516
517 write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
518
519 printf(
520"------------------------------------------------------------------------------\n");
521 printf( " KernelTop:%8.0f irqs/sec kernel:%3.1f%% [%s, ",
522 events_per_sec,
523 100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)),
524 nmi ? "NMI" : "IRQ");
525
526 if (nr_counters == 1)
527 printf("%d ", event_count[0]);
528
529 for (counter = 0; counter < nr_counters; counter++) {
530 if (counter)
531 printf("/");
532
Wu Fengguange3908612009-03-20 10:08:05 +0800533 printf("%s", event_name(counter));
Ingo Molnare0143ba2009-03-23 21:29:59 +0100534 }
535
536 printf( "], ");
537
538 if (tid != -1)
539 printf(" (tid: %d", tid);
540 else
541 printf(" (all");
542
543 if (profile_cpu != -1)
544 printf(", cpu: %d)\n", profile_cpu);
545 else {
546 if (tid != -1)
547 printf(")\n");
548 else
549 printf(", %d CPUs)\n", nr_cpus);
550 }
551
552 printf("------------------------------------------------------------------------------\n\n");
553
554 if (nr_counters == 1)
555 printf(" events");
556 else
557 printf(" weight events");
558
559 printf(" RIP kernel function\n"
560 " ______ ______ ________________ _______________\n\n"
561 );
562
563 printed = 0;
564 for (i = 0; i < sym_table_count; i++) {
565 int count;
566
567 if (nr_counters == 1) {
568 if (printed <= 18 &&
569 tmp[i].count[0] >= count_filter) {
570 printf("%19.2f - %016llx : %s\n",
571 sym_weight(tmp + i), tmp[i].addr, tmp[i].sym);
572 printed++;
573 }
574 } else {
575 if (printed <= 18 &&
576 tmp[i].count[0] >= count_filter) {
577 printf("%8.1f %10ld - %016llx : %s\n",
578 sym_weight(tmp + i),
579 tmp[i].count[0],
580 tmp[i].addr, tmp[i].sym);
581 printed++;
582 }
583 }
584 /*
585 * Add decay to the counts:
586 */
587 for (count = 0; count < nr_counters; count++)
588 sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8;
589 }
590
591 if (sym_filter_entry)
592 show_details(sym_filter_entry);
593
594 last_refresh = time(NULL);
595
596 {
597 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
598
599 if (poll(&stdin_poll, 1, 0) == 1) {
600 printf("key pressed - exiting.\n");
601 exit(0);
602 }
603 }
604}
605
606static int read_symbol(FILE *in, struct sym_entry *s)
607{
608 static int filter_match = 0;
609 char *sym, stype;
610 char str[500];
611 int rc, pos;
612
613 rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str);
614 if (rc == EOF)
615 return -1;
616
617 assert(rc == 3);
618
619 /* skip until end of line: */
620 pos = strlen(str);
621 do {
622 rc = fgetc(in);
623 if (rc == '\n' || rc == EOF || pos >= 499)
624 break;
625 str[pos] = rc;
626 pos++;
627 } while (1);
628 str[pos] = 0;
629
630 sym = str;
631
632 /* Filter out known duplicates and non-text symbols. */
633 if (!strcmp(sym, "_text"))
634 return 1;
635 if (!min_ip && !strcmp(sym, "_stext"))
636 return 1;
637 if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext"))
638 return 1;
639 if (stype != 'T' && stype != 't')
640 return 1;
641 if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14))
642 return 1;
643 if (strstr(sym, "_text_start") || strstr(sym, "_text_end"))
644 return 1;
645
646 s->sym = malloc(strlen(str));
647 assert(s->sym);
648
649 strcpy((char *)s->sym, str);
650 s->skip = 0;
651
652 /* Tag events to be skipped. */
653 if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym))
654 s->skip = 1;
655 if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
656 s->skip = 1;
657
658 if (filter_match == 1) {
659 filter_end = s->addr;
660 filter_match = -1;
661 if (filter_end - filter_start > 10000) {
662 printf("hm, too large filter symbol <%s> - skipping.\n",
663 sym_filter);
664 printf("symbol filter start: %016lx\n", filter_start);
665 printf(" end: %016lx\n", filter_end);
666 filter_end = filter_start = 0;
667 sym_filter = NULL;
668 sleep(1);
669 }
670 }
671 if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) {
672 filter_match = 1;
673 filter_start = s->addr;
674 }
675
676 return 0;
677}
678
679int compare_addr(const void *__sym1, const void *__sym2)
680{
681 const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
682
683 return sym1->addr > sym2->addr;
684}
685
686static void sort_symbol_table(void)
687{
688 int i, dups;
689
690 do {
691 qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr);
692 for (i = 0, dups = 0; i < sym_table_count; i++) {
693 if (sym_table[i].addr == sym_table[i+1].addr) {
694 sym_table[i+1].addr = -1ll;
695 dups++;
696 }
697 }
698 sym_table_count -= dups;
699 } while(dups);
700}
701
702static void parse_symbols(void)
703{
704 struct sym_entry *last;
705
706 FILE *kallsyms = fopen("/proc/kallsyms", "r");
707
708 if (!kallsyms) {
709 printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n");
710 exit(-1);
711 }
712
713 while (!feof(kallsyms)) {
714 if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) {
715 sym_table_count++;
716 assert(sym_table_count <= MAX_SYMS);
717 }
718 }
719
720 sort_symbol_table();
721 min_ip = sym_table[0].addr;
722 max_ip = sym_table[sym_table_count-1].addr;
723 last = sym_table + sym_table_count++;
724
725 last->addr = -1ll;
726 last->sym = "<end>";
727
728 if (filter_end) {
729 int count;
730 for (count=0; count < sym_table_count; count ++) {
731 if (!strcmp(sym_table[count].sym, sym_filter)) {
732 sym_filter_entry = &sym_table[count];
733 break;
734 }
735 }
736 }
737 if (dump_symtab) {
738 int i;
739
740 for (i = 0; i < sym_table_count; i++)
741 fprintf(stderr, "%llx %s\n",
742 sym_table[i].addr, sym_table[i].sym);
743 }
744}
745
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800746/*
747 * Source lines
748 */
Ingo Molnare0143ba2009-03-23 21:29:59 +0100749
750static void parse_vmlinux(char *filename)
751{
752 FILE *file;
753 char command[PATH_MAX*2];
754 if (!filename)
755 return;
756
757 sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename);
758
759 file = popen(command, "r");
760 if (!file)
761 return;
762
763 while (!feof(file)) {
764 struct source_line *src;
765 size_t dummy = 0;
766 char *c;
767
768 src = malloc(sizeof(struct source_line));
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800769 assert(src != NULL);
Ingo Molnare0143ba2009-03-23 21:29:59 +0100770 memset(src, 0, sizeof(struct source_line));
771
772 if (getline(&src->line, &dummy, file) < 0)
773 break;
774 if (!src->line)
775 break;
776
777 c = strchr(src->line, '\n');
778 if (c)
779 *c = 0;
780
781 lines = g_list_prepend(lines, src);
782
783 if (strlen(src->line)>8 && src->line[8] == ':')
784 src->EIP = strtoull(src->line, NULL, 16);
785 if (strlen(src->line)>8 && src->line[16] == ':')
786 src->EIP = strtoull(src->line, NULL, 16);
787 }
788 pclose(file);
789 lines = g_list_reverse(lines);
790}
791
792static void record_precise_ip(uint64_t ip)
793{
794 struct source_line *line;
795 GList *item;
796
797 item = g_list_first(lines);
798 while (item) {
799 line = item->data;
800 if (line->EIP == ip)
801 line->count++;
802 if (line->EIP > ip)
803 break;
804 item = g_list_next(item);
805 }
806}
807
808static void lookup_sym_in_vmlinux(struct sym_entry *sym)
809{
810 struct source_line *line;
811 GList *item;
812 char pattern[PATH_MAX];
813 sprintf(pattern, "<%s>:", sym->sym);
814
815 item = g_list_first(lines);
816 while (item) {
817 line = item->data;
818 if (strstr(line->line, pattern)) {
819 sym->source = item;
820 break;
821 }
822 item = g_list_next(item);
823 }
824}
825
826void show_lines(GList *item_queue, int item_queue_count)
827{
828 int i;
829 struct source_line *line;
830
831 for (i = 0; i < item_queue_count; i++) {
832 line = item_queue->data;
833 printf("%8li\t%s\n", line->count, line->line);
834 item_queue = g_list_next(item_queue);
835 }
836}
837
838#define TRACE_COUNT 3
839
840static void show_details(struct sym_entry *sym)
841{
842 struct source_line *line;
843 GList *item;
844 int displayed = 0;
845 GList *item_queue = NULL;
846 int item_queue_count = 0;
847
848 if (!sym->source)
849 lookup_sym_in_vmlinux(sym);
850 if (!sym->source)
851 return;
852
853 printf("Showing details for %s\n", sym->sym);
854
855 item = sym->source;
856 while (item) {
857 line = item->data;
858 if (displayed && strstr(line->line, ">:"))
859 break;
860
861 if (!item_queue_count)
862 item_queue = item;
863 item_queue_count ++;
864
865 if (line->count >= count_filter) {
866 show_lines(item_queue, item_queue_count);
867 item_queue_count = 0;
868 item_queue = NULL;
869 } else if (item_queue_count > TRACE_COUNT) {
870 item_queue = g_list_next(item_queue);
871 item_queue_count --;
872 }
873
874 line->count = 0;
875 displayed++;
876 if (displayed > 300)
877 break;
878 item = g_list_next(item);
879 }
880}
881
882/*
883 * Binary search in the histogram table and record the hit:
884 */
885static void record_ip(uint64_t ip, int counter)
886{
887 int left_idx, middle_idx, right_idx, idx;
888 unsigned long left, middle, right;
889
890 record_precise_ip(ip);
891
892 left_idx = 0;
893 right_idx = sym_table_count-1;
894 assert(ip <= max_ip && ip >= min_ip);
895
896 while (left_idx + 1 < right_idx) {
897 middle_idx = (left_idx + right_idx) / 2;
898
899 left = sym_table[ left_idx].addr;
900 middle = sym_table[middle_idx].addr;
901 right = sym_table[ right_idx].addr;
902
903 if (!(left <= middle && middle <= right)) {
904 printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right);
905 printf("%d %d %d\n", left_idx, middle_idx, right_idx);
906 }
907 assert(left <= middle && middle <= right);
908 if (!(left <= ip && ip <= right)) {
909 printf(" left: %016lx\n", left);
910 printf(" ip: %016lx\n", ip);
911 printf("right: %016lx\n", right);
912 }
913 assert(left <= ip && ip <= right);
914 /*
915 * [ left .... target .... middle .... right ]
916 * => right := middle
917 */
918 if (ip < middle) {
919 right_idx = middle_idx;
920 continue;
921 }
922 /*
923 * [ left .... middle ... target ... right ]
924 * => left := middle
925 */
926 left_idx = middle_idx;
927 }
928
929 idx = left_idx;
930
931 if (!sym_table[idx].skip)
932 sym_table[idx].count[counter]++;
933 else events--;
934}
935
936static void process_event(uint64_t ip, int counter)
937{
938 events++;
939
940 if (ip < min_ip || ip > max_ip) {
941 userspace_events++;
942 return;
943 }
944
945 record_ip(ip, counter);
946}
947
Wu Fengguangf7524bd2009-03-20 10:08:06 +0800948static void process_options(int argc, char *argv[])
949{
950 int error = 0, counter;
951
952 if (strstr(argv[0], "perfstat"))
953 run_perfstat = 1;
954
955 for (;;) {
956 int option_index = 0;
957 /** Options for getopt */
958 static struct option long_options[] = {
959 {"count", required_argument, NULL, 'c'},
960 {"cpu", required_argument, NULL, 'C'},
961 {"delay", required_argument, NULL, 'd'},
962 {"dump_symtab", no_argument, NULL, 'D'},
963 {"event", required_argument, NULL, 'e'},
964 {"filter", required_argument, NULL, 'f'},
965 {"group", required_argument, NULL, 'g'},
966 {"help", no_argument, NULL, 'h'},
967 {"nmi", required_argument, NULL, 'n'},
968 {"pid", required_argument, NULL, 'p'},
969 {"vmlinux", required_argument, NULL, 'x'},
970 {"symbol", required_argument, NULL, 's'},
971 {"stat", no_argument, NULL, 'S'},
972 {"zero", no_argument, NULL, 'z'},
973 {NULL, 0, NULL, 0 }
974 };
975 int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:p:s:Sx:z",
976 long_options, &option_index);
977 if (c == -1)
978 break;
979
980 switch (c) {
981 case 'a': system_wide = 1; break;
982 case 'c': event_count[nr_counters] = atoi(optarg); break;
983 case 'C':
984 /* CPU and PID are mutually exclusive */
985 if (tid != -1) {
986 printf("WARNING: CPU switch overriding PID\n");
987 sleep(1);
988 tid = -1;
989 }
990 profile_cpu = atoi(optarg); break;
991 case 'd': delay_secs = atoi(optarg); break;
992 case 'D': dump_symtab = 1; break;
993
994 case 'e': error = parse_events(optarg); break;
995
996 case 'f': count_filter = atoi(optarg); break;
997 case 'g': group = atoi(optarg); break;
998 case 'h': display_help(); break;
999 case 'n': nmi = atoi(optarg); break;
1000 case 'p':
1001 /* CPU and PID are mutually exclusive */
1002 if (profile_cpu != -1) {
1003 printf("WARNING: PID switch overriding CPU\n");
1004 sleep(1);
1005 profile_cpu = -1;
1006 }
1007 tid = atoi(optarg); break;
1008 case 's': sym_filter = strdup(optarg); break;
1009 case 'S': run_perfstat = 1; break;
1010 case 'x': vmlinux = strdup(optarg); break;
1011 case 'z': zero = 1; break;
1012 default: error = 1; break;
1013 }
1014 }
1015 if (error)
1016 display_help();
1017
1018 if (!nr_counters) {
1019 if (run_perfstat)
1020 nr_counters = 8;
1021 else {
1022 nr_counters = 1;
1023 event_id[0] = 0;
1024 }
1025 }
1026
1027 for (counter = 0; counter < nr_counters; counter++) {
1028 if (event_count[counter])
1029 continue;
1030
1031 if (event_id[counter] < PERF_HW_EVENTS_MAX)
1032 event_count[counter] = default_count[event_id[counter]];
1033 else
1034 event_count[counter] = 100000;
1035 }
1036}
1037
Ingo Molnare0143ba2009-03-23 21:29:59 +01001038int main(int argc, char *argv[])
1039{
1040 struct pollfd event_array[MAX_NR_CPUS][MAX_COUNTERS];
1041 struct perf_counter_hw_event hw_event;
Ingo Molnare0143ba2009-03-23 21:29:59 +01001042 int i, counter, group_fd;
1043 unsigned int cpu;
1044 uint64_t ip;
1045 ssize_t res;
1046 int ret;
1047
1048 process_options(argc, argv);
1049
1050 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
Wu Fengguangf7524bd2009-03-20 10:08:06 +08001051 assert(nr_cpus <= MAX_NR_CPUS);
1052 assert(nr_cpus >= 0);
1053
1054 if (run_perfstat)
1055 return do_perfstat(argc, argv);
1056
Ingo Molnare0143ba2009-03-23 21:29:59 +01001057 if (tid != -1 || profile_cpu != -1)
1058 nr_cpus = 1;
1059
Ingo Molnare0143ba2009-03-23 21:29:59 +01001060 for (i = 0; i < nr_cpus; i++) {
1061 group_fd = -1;
1062 for (counter = 0; counter < nr_counters; counter++) {
1063
1064 cpu = profile_cpu;
1065 if (tid == -1 && profile_cpu == -1)
1066 cpu = i;
1067
1068 memset(&hw_event, 0, sizeof(hw_event));
1069 hw_event.type = event_id[counter];
1070 hw_event.raw = event_raw[counter];
1071 hw_event.irq_period = event_count[counter];
1072 hw_event.record_type = PERF_RECORD_IRQ;
1073 hw_event.nmi = nmi;
1074
1075 fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
1076 fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
1077 if (fd[i][counter] < 0) {
1078 printf("kerneltop error: syscall returned with %d (%s)\n",
1079 fd[i][counter], strerror(-fd[i][counter]));
1080 if (fd[i][counter] == -1)
1081 printf("Are you root?\n");
1082 exit(-1);
1083 }
1084 assert(fd[i][counter] >= 0);
1085
1086 /*
1087 * First counter acts as the group leader:
1088 */
1089 if (group && group_fd == -1)
1090 group_fd = fd[i][counter];
1091
1092 event_array[i][counter].fd = fd[i][counter];
1093 event_array[i][counter].events = POLLIN;
1094 }
1095 }
1096
1097 parse_symbols();
1098 if (vmlinux && sym_filter_entry)
1099 parse_vmlinux(vmlinux);
1100
1101 printf("KernelTop refresh period: %d seconds\n", delay_secs);
1102 last_refresh = time(NULL);
1103
1104 while (1) {
1105 int hits = events;
1106
1107 for (i = 0; i < nr_cpus; i++) {
1108 for (counter = 0; counter < nr_counters; counter++) {
1109 res = read(fd[i][counter], (char *) &ip, sizeof(ip));
1110 if (res > 0) {
1111 assert(res == sizeof(ip));
1112
1113 process_event(ip, counter);
1114 }
1115 }
1116 }
1117
1118 if (time(NULL) >= last_refresh + delay_secs) {
1119 print_sym_table();
1120 events = userspace_events = 0;
1121 }
1122
1123 if (hits == events)
1124 ret = poll(event_array[0], nr_cpus, 1000);
1125 hits = events;
1126 }
1127
1128 return 0;
1129}