blob: 6de38d2568830940e31d7f67adf161e2ca090373 [file] [log] [blame]
Ingo Molnarddcacfa2009-04-20 15:37:32 +02001/*
2 * kerneltop.c: show top kernel functions - performance counters showcase
3
4 Build with:
5
6 cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
7
8 Sample output:
9
10------------------------------------------------------------------------------
11 KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2)
12------------------------------------------------------------------------------
13
14 weight RIP kernel function
15 ______ ________________ _______________
16
17 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
18 33.00 - ffffffff804cb740 : sock_alloc_send_skb
19 31.26 - ffffffff804ce808 : skb_push
20 22.43 - ffffffff80510004 : tcp_established_options
21 19.00 - ffffffff8027d250 : find_get_page
22 15.76 - ffffffff804e4fc9 : eth_type_trans
23 15.20 - ffffffff804d8baa : dst_release
24 14.86 - ffffffff804cf5d8 : skb_release_head_state
25 14.00 - ffffffff802217d5 : read_hpet
26 12.00 - ffffffff804ffb7f : __ip_local_out
27 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
28 8.54 - ffffffff805001a3 : ip_queue_xmit
29 */
30
31/*
32 * perfstat: /usr/bin/time -alike performance counter statistics utility
33
34 It summarizes the counter events of all tasks (and child tasks),
35 covering all CPUs that the command (or workload) executes on.
36 It only counts the per-task events of the workload started,
37 independent of how many other tasks run on those CPUs.
38
39 Sample output:
40
41 $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
42
43 Performance counter stats for 'ls':
44
45 163516953 instructions
46 2295 cache-misses
47 2855182 branch-misses
48 */
49
50 /*
51 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
52 *
53 * Improvements and fixes by:
54 *
55 * Arjan van de Ven <arjan@linux.intel.com>
56 * Yanmin Zhang <yanmin.zhang@intel.com>
57 * Wu Fengguang <fengguang.wu@intel.com>
58 * Mike Galbraith <efault@gmx.de>
59 * Paul Mackerras <paulus@samba.org>
60 *
61 * Released under the GPL v2. (and only v2, not any later version)
62 */
63
Ingo Molnar148be2c2009-04-27 08:02:14 +020064#include "util/util.h"
Ingo Molnarddcacfa2009-04-20 15:37:32 +020065
66#include <getopt.h>
67#include <assert.h>
68#include <fcntl.h>
69#include <stdio.h>
70#include <errno.h>
Ingo Molnarddcacfa2009-04-20 15:37:32 +020071#include <time.h>
72#include <sched.h>
73#include <pthread.h>
74
75#include <sys/syscall.h>
76#include <sys/ioctl.h>
77#include <sys/poll.h>
78#include <sys/prctl.h>
79#include <sys/wait.h>
80#include <sys/uio.h>
81#include <sys/mman.h>
82
83#include <linux/unistd.h>
84#include <linux/types.h>
85
86#include "../../include/linux/perf_counter.h"
87
Thomas Gleixner6eda5832009-05-01 18:29:57 +020088#include "perf.h"
Ingo Molnarddcacfa2009-04-20 15:37:32 +020089
90static int system_wide = 0;
91
92static int nr_counters = 0;
93static __u64 event_id[MAX_COUNTERS] = {
94 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
95 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
96 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
97 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
98
99 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
100 EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
101 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
102 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
103};
104static int default_interval = 100000;
105static int event_count[MAX_COUNTERS];
106static int fd[MAX_NR_CPUS][MAX_COUNTERS];
107
108static int tid = -1;
109static int profile_cpu = -1;
110static int nr_cpus = 0;
111static int nmi = 1;
112static int group = 0;
113static unsigned int page_size;
114
115static int zero;
116
Ingo Molnar66cf7822009-04-30 13:53:33 +0200117static int scale = 1;
Ingo Molnarddcacfa2009-04-20 15:37:32 +0200118
119static const unsigned int default_count[] = {
120 1000000,
121 1000000,
122 10000,
123 10000,
124 1000000,
125 10000,
126};
127
128static char *hw_event_names[] = {
129 "CPU cycles",
130 "instructions",
131 "cache references",
132 "cache misses",
133 "branches",
134 "branch misses",
135 "bus cycles",
136};
137
138static char *sw_event_names[] = {
139 "cpu clock ticks",
140 "task clock ticks",
141 "pagefaults",
142 "context switches",
143 "CPU migrations",
144 "minor faults",
145 "major faults",
146};
147
148struct event_symbol {
149 __u64 event;
150 char *symbol;
151};
152
153static struct event_symbol event_symbols[] = {
154 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", },
155 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", },
156 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", },
157 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", },
158 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", },
159 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", },
160 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", },
161 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", },
162 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", },
163
164 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", },
165 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", },
166 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", },
167 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", },
168 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", },
169 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", },
170 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", },
171 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", },
172 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", },
173 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", },
174};
175
176#define __PERF_COUNTER_FIELD(config, name) \
177 ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
178
179#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW)
180#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG)
181#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE)
182#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT)
183
184static void display_events_help(void)
185{
186 unsigned int i;
187 __u64 e;
188
189 printf(
190 " -e EVENT --event=EVENT # symbolic-name abbreviations");
191
192 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
193 int type, id;
194
195 e = event_symbols[i].event;
196 type = PERF_COUNTER_TYPE(e);
197 id = PERF_COUNTER_ID(e);
198
199 printf("\n %d:%d: %-20s",
200 type, id, event_symbols[i].symbol);
201 }
202
203 printf("\n"
204 " rNNN: raw PMU events (eventsel+umask)\n\n");
205}
206
207static void display_help(void)
208{
209 printf(
210 "Usage: perfstat [<events...>] <cmd...>\n\n"
211 "PerfStat Options (up to %d event types can be specified):\n\n",
212 MAX_COUNTERS);
213
214 display_events_help();
215
216 printf(
217 " -l # scale counter values\n"
218 " -a # system-wide collection\n");
219 exit(0);
220}
221
222static char *event_name(int ctr)
223{
224 __u64 config = event_id[ctr];
225 int type = PERF_COUNTER_TYPE(config);
226 int id = PERF_COUNTER_ID(config);
227 static char buf[32];
228
229 if (PERF_COUNTER_RAW(config)) {
230 sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
231 return buf;
232 }
233
234 switch (type) {
235 case PERF_TYPE_HARDWARE:
236 if (id < PERF_HW_EVENTS_MAX)
237 return hw_event_names[id];
238 return "unknown-hardware";
239
240 case PERF_TYPE_SOFTWARE:
241 if (id < PERF_SW_EVENTS_MAX)
242 return sw_event_names[id];
243 return "unknown-software";
244
245 default:
246 break;
247 }
248
249 return "unknown";
250}
251
252/*
253 * Each event can have multiple symbolic names.
254 * Symbolic names are (almost) exactly matched.
255 */
256static __u64 match_event_symbols(char *str)
257{
258 __u64 config, id;
259 int type;
260 unsigned int i;
261
262 if (sscanf(str, "r%llx", &config) == 1)
263 return config | PERF_COUNTER_RAW_MASK;
264
265 if (sscanf(str, "%d:%llu", &type, &id) == 2)
266 return EID(type, id);
267
268 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
269 if (!strncmp(str, event_symbols[i].symbol,
270 strlen(event_symbols[i].symbol)))
271 return event_symbols[i].event;
272 }
273
274 return ~0ULL;
275}
276
277static int parse_events(char *str)
278{
279 __u64 config;
280
281again:
282 if (nr_counters == MAX_COUNTERS)
283 return -1;
284
285 config = match_event_symbols(str);
286 if (config == ~0ULL)
287 return -1;
288
289 event_id[nr_counters] = config;
290 nr_counters++;
291
292 str = strstr(str, ",");
293 if (str) {
294 str++;
295 goto again;
296 }
297
298 return 0;
299}
300
301
302/*
303 * perfstat
304 */
305
306char fault_here[1000000];
307
308static void create_perfstat_counter(int counter)
309{
310 struct perf_counter_hw_event hw_event;
311
312 memset(&hw_event, 0, sizeof(hw_event));
313 hw_event.config = event_id[counter];
314 hw_event.record_type = 0;
315 hw_event.nmi = 0;
316 if (scale)
317 hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
318 PERF_FORMAT_TOTAL_TIME_RUNNING;
319
320 if (system_wide) {
321 int cpu;
322 for (cpu = 0; cpu < nr_cpus; cpu ++) {
323 fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
324 if (fd[cpu][counter] < 0) {
325 printf("perfstat error: syscall returned with %d (%s)\n",
326 fd[cpu][counter], strerror(errno));
327 exit(-1);
328 }
329 }
330 } else {
331 hw_event.inherit = 1;
332 hw_event.disabled = 1;
333
334 fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
335 if (fd[0][counter] < 0) {
336 printf("perfstat error: syscall returned with %d (%s)\n",
337 fd[0][counter], strerror(errno));
338 exit(-1);
339 }
340 }
341}
342
343int do_perfstat(int argc, char *argv[])
344{
345 unsigned long long t0, t1;
346 int counter;
347 ssize_t res;
348 int status;
349 int pid;
350
351 if (!system_wide)
352 nr_cpus = 1;
353
354 for (counter = 0; counter < nr_counters; counter++)
355 create_perfstat_counter(counter);
356
357 argc -= optind;
358 argv += optind;
359
360 if (!argc)
361 display_help();
362
363 /*
364 * Enable counters and exec the command:
365 */
366 t0 = rdclock();
367 prctl(PR_TASK_PERF_COUNTERS_ENABLE);
368
369 if ((pid = fork()) < 0)
370 perror("failed to fork");
371 if (!pid) {
372 if (execvp(argv[0], argv)) {
373 perror(argv[0]);
374 exit(-1);
375 }
376 }
377 while (wait(&status) >= 0)
378 ;
379 prctl(PR_TASK_PERF_COUNTERS_DISABLE);
380 t1 = rdclock();
381
382 fflush(stdout);
383
384 fprintf(stderr, "\n");
385 fprintf(stderr, " Performance counter stats for \'%s\':\n",
386 argv[0]);
387 fprintf(stderr, "\n");
388
389 for (counter = 0; counter < nr_counters; counter++) {
390 int cpu, nv;
391 __u64 count[3], single_count[3];
392 int scaled;
393
394 count[0] = count[1] = count[2] = 0;
395 nv = scale ? 3 : 1;
396 for (cpu = 0; cpu < nr_cpus; cpu ++) {
397 res = read(fd[cpu][counter],
398 single_count, nv * sizeof(__u64));
399 assert(res == nv * sizeof(__u64));
400
401 count[0] += single_count[0];
402 if (scale) {
403 count[1] += single_count[1];
404 count[2] += single_count[2];
405 }
406 }
407
408 scaled = 0;
409 if (scale) {
410 if (count[2] == 0) {
411 fprintf(stderr, " %14s %-20s\n",
412 "<not counted>", event_name(counter));
413 continue;
414 }
415 if (count[2] < count[1]) {
416 scaled = 1;
417 count[0] = (unsigned long long)
418 ((double)count[0] * count[1] / count[2] + 0.5);
419 }
420 }
421
422 if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
423 event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
424
425 double msecs = (double)count[0] / 1000000;
426
427 fprintf(stderr, " %14.6f %-20s (msecs)",
428 msecs, event_name(counter));
429 } else {
430 fprintf(stderr, " %14Ld %-20s (events)",
431 count[0], event_name(counter));
432 }
433 if (scaled)
434 fprintf(stderr, " (scaled from %.2f%%)",
435 (double) count[2] / count[1] * 100);
436 fprintf(stderr, "\n");
437 }
438 fprintf(stderr, "\n");
439 fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
440 (double)(t1-t0)/1e6);
441 fprintf(stderr, "\n");
442
443 return 0;
444}
445
446static void process_options(int argc, char **argv)
447{
448 int error = 0, counter;
449
450 for (;;) {
451 int option_index = 0;
452 /** Options for getopt */
453 static struct option long_options[] = {
454 {"count", required_argument, NULL, 'c'},
455 {"cpu", required_argument, NULL, 'C'},
456 {"delay", required_argument, NULL, 'd'},
457 {"dump_symtab", no_argument, NULL, 'D'},
458 {"event", required_argument, NULL, 'e'},
459 {"filter", required_argument, NULL, 'f'},
460 {"group", required_argument, NULL, 'g'},
461 {"help", no_argument, NULL, 'h'},
462 {"nmi", required_argument, NULL, 'n'},
463 {"munmap_info", no_argument, NULL, 'U'},
464 {"pid", required_argument, NULL, 'p'},
465 {"realtime", required_argument, NULL, 'r'},
466 {"scale", no_argument, NULL, 'l'},
467 {"symbol", required_argument, NULL, 's'},
468 {"stat", no_argument, NULL, 'S'},
469 {"vmlinux", required_argument, NULL, 'x'},
470 {"zero", no_argument, NULL, 'z'},
471 {NULL, 0, NULL, 0 }
472 };
473 int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
474 long_options, &option_index);
475 if (c == -1)
476 break;
477
478 switch (c) {
479 case 'a': system_wide = 1; break;
480 case 'c': default_interval = atoi(optarg); break;
481 case 'C':
482 /* CPU and PID are mutually exclusive */
483 if (tid != -1) {
484 printf("WARNING: CPU switch overriding PID\n");
485 sleep(1);
486 tid = -1;
487 }
488 profile_cpu = atoi(optarg); break;
489
490 case 'e': error = parse_events(optarg); break;
491
492 case 'g': group = atoi(optarg); break;
493 case 'h': display_help(); break;
494 case 'l': scale = 1; break;
495 case 'n': nmi = atoi(optarg); break;
496 case 'p':
497 /* CPU and PID are mutually exclusive */
498 if (profile_cpu != -1) {
499 printf("WARNING: PID switch overriding CPU\n");
500 sleep(1);
501 profile_cpu = -1;
502 }
503 tid = atoi(optarg); break;
504 case 'z': zero = 1; break;
505 default: error = 1; break;
506 }
507 }
508 if (error)
509 display_help();
510
511 if (!nr_counters) {
512 nr_counters = 8;
513 }
514
515 for (counter = 0; counter < nr_counters; counter++) {
516 if (event_count[counter])
517 continue;
518
519 event_count[counter] = default_interval;
520 }
521}
522
523int cmd_stat(int argc, char **argv, const char *prefix)
524{
525 page_size = sysconf(_SC_PAGE_SIZE);
526
527 process_options(argc, argv);
528
529 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
530 assert(nr_cpus <= MAX_NR_CPUS);
531 assert(nr_cpus >= 0);
532
533 return do_perfstat(argc, argv);
534}