blob: 443f415441daefe4a807afba68450ad922c91fbc [file] [log] [blame]
Markus Metzgereee3af42008-01-30 13:31:09 +01001/*
2 * Debug Store support
3 *
4 * This provides a low-level interface to the hardware's Debug Store
Markus Metzger93fa7632008-04-08 11:01:58 +02005 * feature that is used for branch trace store (BTS) and
Markus Metzgereee3af42008-01-30 13:31:09 +01006 * precise-event based sampling (PEBS).
7 *
Markus Metzger93fa7632008-04-08 11:01:58 +02008 * It manages:
Markus Metzgerc2724772008-12-11 13:49:59 +01009 * - DS and BTS hardware configuration
Markus Metzger6abb11a2008-11-25 09:05:27 +010010 * - buffer overflow handling (to be done)
Markus Metzger93fa7632008-04-08 11:01:58 +020011 * - buffer access
12 *
Markus Metzgerc2724772008-12-11 13:49:59 +010013 * It does not do:
14 * - security checking (is the caller allowed to trace the task)
15 * - buffer allocation (memory accounting)
Markus Metzgereee3af42008-01-30 13:31:09 +010016 *
17 *
Markus Metzgerba2607f2009-01-19 10:38:35 +010018 * Copyright (C) 2007-2009 Intel Corporation.
19 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
Markus Metzgereee3af42008-01-30 13:31:09 +010020 */
21
Ingo Molnare9a22d12009-03-13 11:54:40 +010022#include <linux/kernel.h>
23#include <linux/string.h>
24#include <linux/errno.h>
25#include <linux/sched.h>
26#include <linux/slab.h>
27#include <linux/mm.h>
Markus Metzger15879d02009-04-03 16:43:38 +020028#include <linux/trace_clock.h>
Markus Metzger93fa7632008-04-08 11:01:58 +020029
Markus Metzgereee3af42008-01-30 13:31:09 +010030#include <asm/ds.h>
31
Markus Metzger8a327f62009-03-13 10:45:07 +010032#include "ds_selftest.h"
Markus Metzger93fa7632008-04-08 11:01:58 +020033
34/*
Ingo Molnare9a22d12009-03-13 11:54:40 +010035 * The configuration for a particular DS hardware implementation:
Markus Metzger93fa7632008-04-08 11:01:58 +020036 */
37struct ds_configuration {
Ingo Molnare9a22d12009-03-13 11:54:40 +010038 /* The name of the configuration: */
39 const char *name;
40
41 /* The size of pointer-typed fields in DS, BTS, and PEBS: */
42 unsigned char sizeof_ptr_field;
43
44 /* The size of a BTS/PEBS record in bytes: */
45 unsigned char sizeof_rec[2];
46
47 /* Control bit-masks indexed by enum ds_feature: */
48 unsigned long ctl[dsf_ctl_max];
Markus Metzger93fa7632008-04-08 11:01:58 +020049};
Markus Metzgeree811512009-04-03 16:43:47 +020050static struct ds_configuration ds_cfg __read_mostly;
Markus Metzgerc2724772008-12-11 13:49:59 +010051
Markus Metzgerc2724772008-12-11 13:49:59 +010052
Ingo Molnare9a22d12009-03-13 11:54:40 +010053/* Maximal size of a DS configuration: */
54#define MAX_SIZEOF_DS (12 * 8)
Markus Metzgerc2724772008-12-11 13:49:59 +010055
Ingo Molnare9a22d12009-03-13 11:54:40 +010056/* Maximal size of a BTS record: */
57#define MAX_SIZEOF_BTS (3 * 8)
Markus Metzgerc2724772008-12-11 13:49:59 +010058
Ingo Molnare9a22d12009-03-13 11:54:40 +010059/* BTS and PEBS buffer alignment: */
60#define DS_ALIGNMENT (1 << 3)
61
62/* Mask of control bits in the DS MSR register: */
63#define BTS_CONTROL \
64 ( ds_cfg.ctl[dsf_bts] | \
65 ds_cfg.ctl[dsf_bts_kernel] | \
66 ds_cfg.ctl[dsf_bts_user] | \
67 ds_cfg.ctl[dsf_bts_overflow] )
Markus Metzgereee3af42008-01-30 13:31:09 +010068
Markus Metzgerca0002a2008-11-25 09:01:25 +010069/*
70 * A BTS or PEBS tracer.
71 *
72 * This holds the configuration of the tracer and serves as a handle
73 * to identify tracers.
74 */
75struct ds_tracer {
Markus Metzgerb8e47192009-03-13 10:46:42 +010076 /* The DS context (partially) owned by this tracer. */
Ingo Molnare9a22d12009-03-13 11:54:40 +010077 struct ds_context *context;
Markus Metzgerb8e47192009-03-13 10:46:42 +010078 /* The buffer provided on ds_request() and its size in bytes. */
Ingo Molnare9a22d12009-03-13 11:54:40 +010079 void *buffer;
80 size_t size;
Markus Metzgerca0002a2008-11-25 09:01:25 +010081};
82
83struct bts_tracer {
Ingo Molnare9a22d12009-03-13 11:54:40 +010084 /* The common DS part: */
85 struct ds_tracer ds;
86
87 /* The trace including the DS configuration: */
88 struct bts_trace trace;
89
90 /* Buffer overflow notification function: */
91 bts_ovfl_callback_t ovfl;
Markus Metzgercac94f92009-04-03 16:43:33 +020092
93 /* Active flags affecting trace collection. */
94 unsigned int flags;
Markus Metzgerca0002a2008-11-25 09:01:25 +010095};
96
97struct pebs_tracer {
Ingo Molnare9a22d12009-03-13 11:54:40 +010098 /* The common DS part: */
99 struct ds_tracer ds;
100
101 /* The trace including the DS configuration: */
102 struct pebs_trace trace;
103
104 /* Buffer overflow notification function: */
105 pebs_ovfl_callback_t ovfl;
Markus Metzgerca0002a2008-11-25 09:01:25 +0100106};
Markus Metzgereee3af42008-01-30 13:31:09 +0100107
108/*
109 * Debug Store (DS) save area configuration (see Intel64 and IA32
110 * Architectures Software Developer's Manual, section 18.5)
111 *
112 * The DS configuration consists of the following fields; different
113 * architetures vary in the size of those fields.
Ingo Molnare9a22d12009-03-13 11:54:40 +0100114 *
Markus Metzgereee3af42008-01-30 13:31:09 +0100115 * - double-word aligned base linear address of the BTS buffer
116 * - write pointer into the BTS buffer
117 * - end linear address of the BTS buffer (one byte beyond the end of
118 * the buffer)
119 * - interrupt pointer into BTS buffer
120 * (interrupt occurs when write pointer passes interrupt pointer)
121 * - double-word aligned base linear address of the PEBS buffer
122 * - write pointer into the PEBS buffer
123 * - end linear address of the PEBS buffer (one byte beyond the end of
124 * the buffer)
125 * - interrupt pointer into PEBS buffer
126 * (interrupt occurs when write pointer passes interrupt pointer)
127 * - value to which counter is reset following counter overflow
128 *
Markus Metzger93fa7632008-04-08 11:01:58 +0200129 * Later architectures use 64bit pointers throughout, whereas earlier
130 * architectures use 32bit pointers in 32bit mode.
Markus Metzgereee3af42008-01-30 13:31:09 +0100131 *
132 *
Markus Metzger93fa7632008-04-08 11:01:58 +0200133 * We compute the base address for the first 8 fields based on:
134 * - the field size stored in the DS configuration
135 * - the relative field position
136 * - an offset giving the start of the respective region
Markus Metzgereee3af42008-01-30 13:31:09 +0100137 *
Markus Metzger93fa7632008-04-08 11:01:58 +0200138 * This offset is further used to index various arrays holding
139 * information for BTS and PEBS at the respective index.
Markus Metzgereee3af42008-01-30 13:31:09 +0100140 *
Markus Metzger93fa7632008-04-08 11:01:58 +0200141 * On later 32bit processors, we only access the lower 32bit of the
142 * 64bit pointer fields. The upper halves will be zeroed out.
Markus Metzgereee3af42008-01-30 13:31:09 +0100143 */
144
Markus Metzger93fa7632008-04-08 11:01:58 +0200145enum ds_field {
146 ds_buffer_base = 0,
147 ds_index,
148 ds_absolute_maximum,
149 ds_interrupt_threshold,
Markus Metzgereee3af42008-01-30 13:31:09 +0100150};
151
Markus Metzger93fa7632008-04-08 11:01:58 +0200152enum ds_qualifier {
Ingo Molnare9a22d12009-03-13 11:54:40 +0100153 ds_bts = 0,
Markus Metzger93fa7632008-04-08 11:01:58 +0200154 ds_pebs
Markus Metzgereee3af42008-01-30 13:31:09 +0100155};
156
Ingo Molnare9a22d12009-03-13 11:54:40 +0100157static inline unsigned long
158ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field)
Markus Metzger93fa7632008-04-08 11:01:58 +0200159{
Markus Metzgerbc44fb52009-03-13 10:42:18 +0100160 base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
Markus Metzger93fa7632008-04-08 11:01:58 +0200161 return *(unsigned long *)base;
162}
163
Ingo Molnare9a22d12009-03-13 11:54:40 +0100164static inline void
165ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field,
166 unsigned long value)
Markus Metzger93fa7632008-04-08 11:01:58 +0200167{
Markus Metzgerbc44fb52009-03-13 10:42:18 +0100168 base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
Markus Metzger93fa7632008-04-08 11:01:58 +0200169 (*(unsigned long *)base) = value;
170}
171
Markus Metzgereee3af42008-01-30 13:31:09 +0100172
173/*
Markus Metzger6abb11a2008-11-25 09:05:27 +0100174 * Locking is done only for allocating BTS or PEBS resources.
Markus Metzgereee3af42008-01-30 13:31:09 +0100175 */
Markus Metzgerc2724772008-12-11 13:49:59 +0100176static DEFINE_SPINLOCK(ds_lock);
Markus Metzgereee3af42008-01-30 13:31:09 +0100177
Markus Metzger93fa7632008-04-08 11:01:58 +0200178/*
179 * We either support (system-wide) per-cpu or per-thread allocation.
180 * We distinguish the two based on the task_struct pointer, where a
181 * NULL pointer indicates per-cpu allocation for the current cpu.
182 *
183 * Allocations are use-counted. As soon as resources are allocated,
184 * further allocations must be of the same type (per-cpu or
185 * per-thread). We model this by counting allocations (i.e. the number
186 * of tracers of a certain type) for one type negatively:
187 * =0 no tracers
188 * >0 number of per-thread tracers
189 * <0 number of per-cpu tracers
190 *
Markus Metzger93fa7632008-04-08 11:01:58 +0200191 * Tracers essentially gives the number of ds contexts for a certain
192 * type of allocation.
193 */
Markus Metzgerc2724772008-12-11 13:49:59 +0100194static atomic_t tracers = ATOMIC_INIT(0);
Markus Metzger93fa7632008-04-08 11:01:58 +0200195
Markus Metzger38f80112009-04-03 16:43:37 +0200196static inline int get_tracer(struct task_struct *task)
Markus Metzgera95d67f2008-01-30 13:31:20 +0100197{
Markus Metzger38f80112009-04-03 16:43:37 +0200198 int error;
199
200 spin_lock_irq(&ds_lock);
201
202 if (task) {
203 error = -EPERM;
204 if (atomic_read(&tracers) < 0)
205 goto out;
Markus Metzgerc2724772008-12-11 13:49:59 +0100206 atomic_inc(&tracers);
Markus Metzger38f80112009-04-03 16:43:37 +0200207 } else {
208 error = -EPERM;
209 if (atomic_read(&tracers) > 0)
210 goto out;
Markus Metzgerc2724772008-12-11 13:49:59 +0100211 atomic_dec(&tracers);
Markus Metzger38f80112009-04-03 16:43:37 +0200212 }
213
214 error = 0;
215out:
216 spin_unlock_irq(&ds_lock);
217 return error;
Markus Metzgereee3af42008-01-30 13:31:09 +0100218}
219
Markus Metzger93fa7632008-04-08 11:01:58 +0200220static inline void put_tracer(struct task_struct *task)
Markus Metzgereee3af42008-01-30 13:31:09 +0100221{
Markus Metzgerc2724772008-12-11 13:49:59 +0100222 if (task)
223 atomic_dec(&tracers);
224 else
225 atomic_inc(&tracers);
Markus Metzgereee3af42008-01-30 13:31:09 +0100226}
227
Markus Metzger93fa7632008-04-08 11:01:58 +0200228/*
229 * The DS context is either attached to a thread or to a cpu:
230 * - in the former case, the thread_struct contains a pointer to the
231 * attached context.
232 * - in the latter case, we use a static array of per-cpu context
233 * pointers.
234 *
235 * Contexts are use-counted. They are allocated on first access and
236 * deallocated when the last user puts the context.
Markus Metzger93fa7632008-04-08 11:01:58 +0200237 */
Markus Metzgerc2724772008-12-11 13:49:59 +0100238struct ds_context {
Ingo Molnare9a22d12009-03-13 11:54:40 +0100239 /* The DS configuration; goes into MSR_IA32_DS_AREA: */
240 unsigned char ds[MAX_SIZEOF_DS];
241
242 /* The owner of the BTS and PEBS configuration, respectively: */
243 struct bts_tracer *bts_master;
244 struct pebs_tracer *pebs_master;
245
246 /* Use count: */
Markus Metzgerde79f542009-04-03 16:43:40 +0200247 unsigned long count;
Ingo Molnare9a22d12009-03-13 11:54:40 +0100248
249 /* Pointer to the context pointer field: */
250 struct ds_context **this;
251
Markus Metzgerde79f542009-04-03 16:43:40 +0200252 /* The traced task; NULL for cpu tracing: */
Ingo Molnare9a22d12009-03-13 11:54:40 +0100253 struct task_struct *task;
Markus Metzgerde79f542009-04-03 16:43:40 +0200254
255 /* The traced cpu; only valid if task is NULL: */
256 int cpu;
Markus Metzgerc2724772008-12-11 13:49:59 +0100257};
Markus Metzger93fa7632008-04-08 11:01:58 +0200258
Markus Metzgerde79f542009-04-03 16:43:40 +0200259static DEFINE_PER_CPU(struct ds_context *, cpu_context);
Markus Metzger93fa7632008-04-08 11:01:58 +0200260
Markus Metzgercc1dc6d2008-12-16 15:51:03 +0100261
Markus Metzgerde79f542009-04-03 16:43:40 +0200262static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
Markus Metzger93fa7632008-04-08 11:01:58 +0200263{
Markus Metzger93fa7632008-04-08 11:01:58 +0200264 struct ds_context **p_context =
Markus Metzgerde79f542009-04-03 16:43:40 +0200265 (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu));
Markus Metzgercc1dc6d2008-12-16 15:51:03 +0100266 struct ds_context *context = NULL;
267 struct ds_context *new_context = NULL;
Markus Metzger93fa7632008-04-08 11:01:58 +0200268
Markus Metzgerde79f542009-04-03 16:43:40 +0200269 /* Chances are small that we already have a context. */
270 new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
Markus Metzgercc1dc6d2008-12-16 15:51:03 +0100271 if (!new_context)
272 return NULL;
273
Markus Metzgerde79f542009-04-03 16:43:40 +0200274 spin_lock_irq(&ds_lock);
Markus Metzgercc1dc6d2008-12-16 15:51:03 +0100275
276 context = *p_context;
Markus Metzgerde79f542009-04-03 16:43:40 +0200277 if (likely(!context)) {
Markus Metzgercc1dc6d2008-12-16 15:51:03 +0100278 context = new_context;
Markus Metzger93fa7632008-04-08 11:01:58 +0200279
Markus Metzgercc1dc6d2008-12-16 15:51:03 +0100280 context->this = p_context;
281 context->task = task;
Markus Metzgerde79f542009-04-03 16:43:40 +0200282 context->cpu = cpu;
Markus Metzgercc1dc6d2008-12-16 15:51:03 +0100283 context->count = 0;
Markus Metzgerde90add2008-11-25 08:52:56 +0100284
Markus Metzgercc1dc6d2008-12-16 15:51:03 +0100285 *p_context = context;
Markus Metzgerc2724772008-12-11 13:49:59 +0100286 }
Markus Metzger93fa7632008-04-08 11:01:58 +0200287
Markus Metzgercc1dc6d2008-12-16 15:51:03 +0100288 context->count++;
289
Markus Metzgerde79f542009-04-03 16:43:40 +0200290 spin_unlock_irq(&ds_lock);
Markus Metzgercc1dc6d2008-12-16 15:51:03 +0100291
292 if (context != new_context)
293 kfree(new_context);
294
Markus Metzger93fa7632008-04-08 11:01:58 +0200295 return context;
296}
297
Markus Metzgerde79f542009-04-03 16:43:40 +0200298static void ds_put_context(struct ds_context *context)
Markus Metzger93fa7632008-04-08 11:01:58 +0200299{
Markus Metzger8d99b3a2009-04-03 16:43:36 +0200300 struct task_struct *task;
Markus Metzgerde90add2008-11-25 08:52:56 +0100301 unsigned long irq;
302
Markus Metzger93fa7632008-04-08 11:01:58 +0200303 if (!context)
304 return;
305
Markus Metzgerde90add2008-11-25 08:52:56 +0100306 spin_lock_irqsave(&ds_lock, irq);
Markus Metzger93fa7632008-04-08 11:01:58 +0200307
Markus Metzgerc2724772008-12-11 13:49:59 +0100308 if (--context->count) {
309 spin_unlock_irqrestore(&ds_lock, irq);
310 return;
311 }
Markus Metzger93fa7632008-04-08 11:01:58 +0200312
Cyrill Gorcunov573da422008-04-28 23:15:04 +0400313 *(context->this) = NULL;
Markus Metzger93fa7632008-04-08 11:01:58 +0200314
Markus Metzger8d99b3a2009-04-03 16:43:36 +0200315 task = context->task;
Markus Metzger93fa7632008-04-08 11:01:58 +0200316
Markus Metzger8d99b3a2009-04-03 16:43:36 +0200317 if (task)
318 clear_tsk_thread_flag(task, TIF_DS_AREA_MSR);
319
Markus Metzgerde79f542009-04-03 16:43:40 +0200320 /*
321 * We leave the (now dangling) pointer to the DS configuration in
322 * the DS_AREA msr. This is as good or as bad as replacing it with
323 * NULL - the hardware would crash if we enabled tracing.
324 *
325 * This saves us some problems with having to write an msr on a
326 * different cpu while preventing others from doing the same for the
327 * next context for that same cpu.
328 */
Markus Metzger93fa7632008-04-08 11:01:58 +0200329
Markus Metzgerde90add2008-11-25 08:52:56 +0100330 spin_unlock_irqrestore(&ds_lock, irq);
Markus Metzgerc2724772008-12-11 13:49:59 +0100331
Markus Metzger8d99b3a2009-04-03 16:43:36 +0200332 /* The context might still be in use for context switching. */
333 if (task && (task != current))
334 wait_task_context_switch(task);
335
Markus Metzgerc2724772008-12-11 13:49:59 +0100336 kfree(context);
Markus Metzger93fa7632008-04-08 11:01:58 +0200337}
338
Markus Metzgerde79f542009-04-03 16:43:40 +0200339static void ds_install_ds_area(struct ds_context *context)
340{
341 unsigned long ds;
342
343 ds = (unsigned long)context->ds;
344
345 /*
346 * There is a race between the bts master and the pebs master.
347 *
348 * The thread/cpu access is synchronized via get/put_cpu() for
349 * task tracing and via wrmsr_on_cpu for cpu tracing.
350 *
351 * If bts and pebs are collected for the same task or same cpu,
352 * the same confiuration is written twice.
353 */
354 if (context->task) {
355 get_cpu();
356 if (context->task == current)
357 wrmsrl(MSR_IA32_DS_AREA, ds);
358 set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
359 put_cpu();
360 } else
361 wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA,
362 (u32)((u64)ds), (u32)((u64)ds >> 32));
363}
Markus Metzger93fa7632008-04-08 11:01:58 +0200364
365/*
Markus Metzgerc2724772008-12-11 13:49:59 +0100366 * Call the tracer's callback on a buffer overflow.
Markus Metzger93fa7632008-04-08 11:01:58 +0200367 *
Markus Metzger93fa7632008-04-08 11:01:58 +0200368 * context: the ds context
369 * qual: the buffer type
370 */
Markus Metzgerca0002a2008-11-25 09:01:25 +0100371static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
Markus Metzger93fa7632008-04-08 11:01:58 +0200372{
Markus Metzgerca0002a2008-11-25 09:01:25 +0100373 switch (qual) {
Markus Metzgerc2724772008-12-11 13:49:59 +0100374 case ds_bts:
375 if (context->bts_master &&
376 context->bts_master->ovfl)
377 context->bts_master->ovfl(context->bts_master);
Markus Metzgerca0002a2008-11-25 09:01:25 +0100378 break;
Markus Metzgerc2724772008-12-11 13:49:59 +0100379 case ds_pebs:
380 if (context->pebs_master &&
381 context->pebs_master->ovfl)
382 context->pebs_master->ovfl(context->pebs_master);
Markus Metzgerca0002a2008-11-25 09:01:25 +0100383 break;
384 }
Markus Metzger93fa7632008-04-08 11:01:58 +0200385}
386
387
Markus Metzgerc2724772008-12-11 13:49:59 +0100388/*
389 * Write raw data into the BTS or PEBS buffer.
390 *
391 * The remainder of any partially written record is zeroed out.
392 *
393 * context: the DS context
Ingo Molnare9a22d12009-03-13 11:54:40 +0100394 * qual: the buffer type
395 * record: the data to write
396 * size: the size of the data
Markus Metzgerc2724772008-12-11 13:49:59 +0100397 */
Markus Metzgerca0002a2008-11-25 09:01:25 +0100398static int ds_write(struct ds_context *context, enum ds_qualifier qual,
399 const void *record, size_t size)
Markus Metzger93fa7632008-04-08 11:01:58 +0200400{
Markus Metzgerca0002a2008-11-25 09:01:25 +0100401 int bytes_written = 0;
Markus Metzger93fa7632008-04-08 11:01:58 +0200402
403 if (!record)
404 return -EINVAL;
405
Markus Metzger93fa7632008-04-08 11:01:58 +0200406 while (size) {
407 unsigned long base, index, end, write_end, int_th;
408 unsigned long write_size, adj_write_size;
Markus Metzgereee3af42008-01-30 13:31:09 +0100409
Markus Metzger93fa7632008-04-08 11:01:58 +0200410 /*
Markus Metzgerb8e47192009-03-13 10:46:42 +0100411 * Write as much as possible without producing an
Markus Metzger93fa7632008-04-08 11:01:58 +0200412 * overflow interrupt.
413 *
Markus Metzgerb8e47192009-03-13 10:46:42 +0100414 * Interrupt_threshold must either be
Markus Metzger93fa7632008-04-08 11:01:58 +0200415 * - bigger than absolute_maximum or
416 * - point to a record between buffer_base and absolute_maximum
417 *
Markus Metzgerb8e47192009-03-13 10:46:42 +0100418 * Index points to a valid record.
Markus Metzger93fa7632008-04-08 11:01:58 +0200419 */
420 base = ds_get(context->ds, qual, ds_buffer_base);
421 index = ds_get(context->ds, qual, ds_index);
422 end = ds_get(context->ds, qual, ds_absolute_maximum);
423 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
424
425 write_end = min(end, int_th);
426
Markus Metzgerb8e47192009-03-13 10:46:42 +0100427 /*
428 * If we are already beyond the interrupt threshold,
429 * we fill the entire buffer.
430 */
Markus Metzger93fa7632008-04-08 11:01:58 +0200431 if (write_end <= index)
432 write_end = end;
433
434 if (write_end <= index)
Markus Metzgerca0002a2008-11-25 09:01:25 +0100435 break;
Markus Metzger93fa7632008-04-08 11:01:58 +0200436
437 write_size = min((unsigned long) size, write_end - index);
438 memcpy((void *)index, record, write_size);
439
440 record = (const char *)record + write_size;
Markus Metzgerca0002a2008-11-25 09:01:25 +0100441 size -= write_size;
442 bytes_written += write_size;
Markus Metzger93fa7632008-04-08 11:01:58 +0200443
444 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
445 adj_write_size *= ds_cfg.sizeof_rec[qual];
446
Markus Metzgerb8e47192009-03-13 10:46:42 +0100447 /* Zero out trailing bytes. */
Markus Metzger93fa7632008-04-08 11:01:58 +0200448 memset((char *)index + write_size, 0,
449 adj_write_size - write_size);
450 index += adj_write_size;
451
452 if (index >= end)
453 index = base;
454 ds_set(context->ds, qual, ds_index, index);
455
456 if (index >= int_th)
Markus Metzgerca0002a2008-11-25 09:01:25 +0100457 ds_overflow(context, qual);
Markus Metzger93fa7632008-04-08 11:01:58 +0200458 }
459
Markus Metzgerca0002a2008-11-25 09:01:25 +0100460 return bytes_written;
Markus Metzgereee3af42008-01-30 13:31:09 +0100461}
462
Markus Metzgerc2724772008-12-11 13:49:59 +0100463
464/*
465 * Branch Trace Store (BTS) uses the following format. Different
466 * architectures vary in the size of those fields.
467 * - source linear address
468 * - destination linear address
469 * - flags
470 *
471 * Later architectures use 64bit pointers throughout, whereas earlier
472 * architectures use 32bit pointers in 32bit mode.
473 *
Markus Metzgerbc44fb52009-03-13 10:42:18 +0100474 * We compute the base address for the fields based on:
Markus Metzgerc2724772008-12-11 13:49:59 +0100475 * - the field size stored in the DS configuration
476 * - the relative field position
477 *
478 * In order to store additional information in the BTS buffer, we use
479 * a special source address to indicate that the record requires
480 * special interpretation.
481 *
482 * Netburst indicated via a bit in the flags field whether the branch
483 * was predicted; this is ignored.
484 *
485 * We use two levels of abstraction:
486 * - the raw data level defined here
487 * - an arch-independent level defined in ds.h
488 */
489
490enum bts_field {
491 bts_from,
492 bts_to,
493 bts_flags,
494
Ingo Molnare9a22d12009-03-13 11:54:40 +0100495 bts_qual = bts_from,
Markus Metzger15879d02009-04-03 16:43:38 +0200496 bts_clock = bts_to,
Ingo Molnare9a22d12009-03-13 11:54:40 +0100497 bts_pid = bts_flags,
Markus Metzgerc2724772008-12-11 13:49:59 +0100498
Ingo Molnare9a22d12009-03-13 11:54:40 +0100499 bts_qual_mask = (bts_qual_max - 1),
500 bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
Markus Metzgerc2724772008-12-11 13:49:59 +0100501};
502
503static inline unsigned long bts_get(const char *base, enum bts_field field)
504{
Markus Metzgerbc44fb52009-03-13 10:42:18 +0100505 base += (ds_cfg.sizeof_ptr_field * field);
Markus Metzgerc2724772008-12-11 13:49:59 +0100506 return *(unsigned long *)base;
507}
508
509static inline void bts_set(char *base, enum bts_field field, unsigned long val)
510{
Markus Metzgerbc44fb52009-03-13 10:42:18 +0100511 base += (ds_cfg.sizeof_ptr_field * field);;
Markus Metzgerc2724772008-12-11 13:49:59 +0100512 (*(unsigned long *)base) = val;
513}
514
515
516/*
517 * The raw BTS data is architecture dependent.
518 *
519 * For higher-level users, we give an arch-independent view.
520 * - ds.h defines struct bts_struct
521 * - bts_read translates one raw bts record into a bts_struct
522 * - bts_write translates one bts_struct into the raw format and
523 * writes it into the top of the parameter tracer's buffer.
524 *
525 * return: bytes read/written on success; -Eerrno, otherwise
526 */
Ingo Molnare9a22d12009-03-13 11:54:40 +0100527static int
528bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out)
Markus Metzgereee3af42008-01-30 13:31:09 +0100529{
Markus Metzgerca0002a2008-11-25 09:01:25 +0100530 if (!tracer)
531 return -EINVAL;
532
Markus Metzgerc2724772008-12-11 13:49:59 +0100533 if (at < tracer->trace.ds.begin)
534 return -EINVAL;
535
536 if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
537 return -EINVAL;
538
539 memset(out, 0, sizeof(*out));
540 if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
541 out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
Markus Metzger15879d02009-04-03 16:43:38 +0200542 out->variant.event.clock = bts_get(at, bts_clock);
543 out->variant.event.pid = bts_get(at, bts_pid);
Markus Metzgerc2724772008-12-11 13:49:59 +0100544 } else {
545 out->qualifier = bts_branch;
546 out->variant.lbr.from = bts_get(at, bts_from);
547 out->variant.lbr.to = bts_get(at, bts_to);
Markus Metzgerd072c252008-12-16 15:53:11 +0100548
549 if (!out->variant.lbr.from && !out->variant.lbr.to)
550 out->qualifier = bts_invalid;
Markus Metzgerc2724772008-12-11 13:49:59 +0100551 }
552
553 return ds_cfg.sizeof_rec[ds_bts];
Markus Metzgereee3af42008-01-30 13:31:09 +0100554}
555
Markus Metzgerc2724772008-12-11 13:49:59 +0100556static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
Markus Metzger93fa7632008-04-08 11:01:58 +0200557{
Markus Metzgerc2724772008-12-11 13:49:59 +0100558 unsigned char raw[MAX_SIZEOF_BTS];
559
Markus Metzgerca0002a2008-11-25 09:01:25 +0100560 if (!tracer)
561 return -EINVAL;
562
Markus Metzgerc2724772008-12-11 13:49:59 +0100563 if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
564 return -EOVERFLOW;
565
566 switch (in->qualifier) {
567 case bts_invalid:
568 bts_set(raw, bts_from, 0);
569 bts_set(raw, bts_to, 0);
570 bts_set(raw, bts_flags, 0);
571 break;
572 case bts_branch:
573 bts_set(raw, bts_from, in->variant.lbr.from);
574 bts_set(raw, bts_to, in->variant.lbr.to);
575 bts_set(raw, bts_flags, 0);
576 break;
577 case bts_task_arrives:
578 case bts_task_departs:
579 bts_set(raw, bts_qual, (bts_escape | in->qualifier));
Markus Metzger15879d02009-04-03 16:43:38 +0200580 bts_set(raw, bts_clock, in->variant.event.clock);
581 bts_set(raw, bts_pid, in->variant.event.pid);
Markus Metzgerc2724772008-12-11 13:49:59 +0100582 break;
583 default:
584 return -EINVAL;
585 }
586
587 return ds_write(tracer->ds.context, ds_bts, raw,
588 ds_cfg.sizeof_rec[ds_bts]);
Markus Metzger93fa7632008-04-08 11:01:58 +0200589}
Markus Metzgereee3af42008-01-30 13:31:09 +0100590
Markus Metzgerc2724772008-12-11 13:49:59 +0100591
592static void ds_write_config(struct ds_context *context,
593 struct ds_trace *cfg, enum ds_qualifier qual)
Markus Metzger93fa7632008-04-08 11:01:58 +0200594{
Markus Metzgerc2724772008-12-11 13:49:59 +0100595 unsigned char *ds = context->ds;
Markus Metzger93fa7632008-04-08 11:01:58 +0200596
Markus Metzgerc2724772008-12-11 13:49:59 +0100597 ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
598 ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
599 ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
600 ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
601}
Markus Metzger93fa7632008-04-08 11:01:58 +0200602
Markus Metzgerc2724772008-12-11 13:49:59 +0100603static void ds_read_config(struct ds_context *context,
604 struct ds_trace *cfg, enum ds_qualifier qual)
605{
606 unsigned char *ds = context->ds;
Markus Metzger93fa7632008-04-08 11:01:58 +0200607
Markus Metzgerc2724772008-12-11 13:49:59 +0100608 cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
609 cfg->top = (void *)ds_get(ds, qual, ds_index);
610 cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
611 cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
612}
613
614static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
615 void *base, size_t size, size_t ith,
616 unsigned int flags) {
617 unsigned long buffer, adj;
618
Markus Metzgerb8e47192009-03-13 10:46:42 +0100619 /*
620 * Adjust the buffer address and size to meet alignment
Markus Metzgerc2724772008-12-11 13:49:59 +0100621 * constraints:
622 * - buffer is double-word aligned
623 * - size is multiple of record size
624 *
625 * We checked the size at the very beginning; we have enough
626 * space to do the adjustment.
627 */
628 buffer = (unsigned long)base;
629
630 adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
631 buffer += adj;
632 size -= adj;
633
634 trace->n = size / ds_cfg.sizeof_rec[qual];
635 trace->size = ds_cfg.sizeof_rec[qual];
636
637 size = (trace->n * trace->size);
638
639 trace->begin = (void *)buffer;
640 trace->top = trace->begin;
641 trace->end = (void *)(buffer + size);
Markus Metzgerb8e47192009-03-13 10:46:42 +0100642 /*
643 * The value for 'no threshold' is -1, which will set the
Markus Metzgerc2724772008-12-11 13:49:59 +0100644 * threshold outside of the buffer, just like we want it.
645 */
Markus Metzgerde79f542009-04-03 16:43:40 +0200646 ith *= ds_cfg.sizeof_rec[qual];
Markus Metzgerc2724772008-12-11 13:49:59 +0100647 trace->ith = (void *)(buffer + size - ith);
648
649 trace->flags = flags;
650}
651
652
653static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
654 enum ds_qualifier qual, struct task_struct *task,
Markus Metzgerde79f542009-04-03 16:43:40 +0200655 int cpu, void *base, size_t size, size_t th)
Markus Metzgerc2724772008-12-11 13:49:59 +0100656{
657 struct ds_context *context;
658 int error;
659
Markus Metzgerbc44fb52009-03-13 10:42:18 +0100660 error = -EOPNOTSUPP;
661 if (!ds_cfg.sizeof_rec[qual])
662 goto out;
663
Markus Metzgerc2724772008-12-11 13:49:59 +0100664 error = -EINVAL;
665 if (!base)
666 goto out;
667
Markus Metzgerde79f542009-04-03 16:43:40 +0200668 /* We need space for alignment adjustments in ds_init_ds_trace(). */
Markus Metzgerc2724772008-12-11 13:49:59 +0100669 error = -EINVAL;
670 if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
671 goto out;
672
673 if (th != (size_t)-1) {
674 th *= ds_cfg.sizeof_rec[qual];
675
676 error = -EINVAL;
677 if (size <= th)
678 goto out;
679 }
680
681 tracer->buffer = base;
682 tracer->size = size;
683
684 error = -ENOMEM;
Markus Metzgerde79f542009-04-03 16:43:40 +0200685 context = ds_get_context(task, cpu);
Markus Metzgerc2724772008-12-11 13:49:59 +0100686 if (!context)
687 goto out;
688 tracer->context = context;
689
Markus Metzgerde79f542009-04-03 16:43:40 +0200690 /*
691 * Defer any tracer-specific initialization work for the context until
692 * context ownership has been clarified.
693 */
Markus Metzgerc2724772008-12-11 13:49:59 +0100694
695 error = 0;
696 out:
697 return error;
698}
699
Markus Metzgerde79f542009-04-03 16:43:40 +0200700static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu,
701 void *base, size_t size,
702 bts_ovfl_callback_t ovfl, size_t th,
703 unsigned int flags)
Markus Metzgerc2724772008-12-11 13:49:59 +0100704{
705 struct bts_tracer *tracer;
Markus Metzgerc2724772008-12-11 13:49:59 +0100706 int error;
707
Markus Metzgerb8e47192009-03-13 10:46:42 +0100708 /* Buffer overflow notification is not yet implemented. */
Markus Metzgerc2724772008-12-11 13:49:59 +0100709 error = -EOPNOTSUPP;
710 if (ovfl)
711 goto out;
712
Markus Metzger38f80112009-04-03 16:43:37 +0200713 error = get_tracer(task);
714 if (error < 0)
715 goto out;
716
Markus Metzgerc2724772008-12-11 13:49:59 +0100717 error = -ENOMEM;
Markus Metzgerde79f542009-04-03 16:43:40 +0200718 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
Markus Metzgerc2724772008-12-11 13:49:59 +0100719 if (!tracer)
Markus Metzger38f80112009-04-03 16:43:37 +0200720 goto out_put_tracer;
Markus Metzgerc2724772008-12-11 13:49:59 +0100721 tracer->ovfl = ovfl;
722
Markus Metzgerde79f542009-04-03 16:43:40 +0200723 /* Do some more error checking and acquire a tracing context. */
Markus Metzgerc2724772008-12-11 13:49:59 +0100724 error = ds_request(&tracer->ds, &tracer->trace.ds,
Markus Metzgerde79f542009-04-03 16:43:40 +0200725 ds_bts, task, cpu, base, size, th);
Markus Metzgerc2724772008-12-11 13:49:59 +0100726 if (error < 0)
727 goto out_tracer;
728
Markus Metzgerde79f542009-04-03 16:43:40 +0200729 /* Claim the bts part of the tracing context we acquired above. */
730 spin_lock_irq(&ds_lock);
Markus Metzgerc2724772008-12-11 13:49:59 +0100731
732 error = -EPERM;
Markus Metzgerc2724772008-12-11 13:49:59 +0100733 if (tracer->ds.context->bts_master)
Markus Metzger38f80112009-04-03 16:43:37 +0200734 goto out_unlock;
Markus Metzgerc2724772008-12-11 13:49:59 +0100735 tracer->ds.context->bts_master = tracer;
736
Markus Metzgerde79f542009-04-03 16:43:40 +0200737 spin_unlock_irq(&ds_lock);
Markus Metzgerc2724772008-12-11 13:49:59 +0100738
Markus Metzgerde79f542009-04-03 16:43:40 +0200739 /*
740 * Now that we own the bts part of the context, let's complete the
741 * initialization for that part.
742 */
743 ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags);
744 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
745 ds_install_ds_area(tracer->ds.context);
Markus Metzgerc2724772008-12-11 13:49:59 +0100746
747 tracer->trace.read = bts_read;
748 tracer->trace.write = bts_write;
749
Markus Metzgerde79f542009-04-03 16:43:40 +0200750 /* Start tracing. */
Markus Metzgerc2724772008-12-11 13:49:59 +0100751 ds_resume_bts(tracer);
752
753 return tracer;
754
Markus Metzgerc2724772008-12-11 13:49:59 +0100755 out_unlock:
Markus Metzgerde79f542009-04-03 16:43:40 +0200756 spin_unlock_irq(&ds_lock);
Markus Metzgerc2724772008-12-11 13:49:59 +0100757 ds_put_context(tracer->ds.context);
758 out_tracer:
759 kfree(tracer);
Markus Metzger38f80112009-04-03 16:43:37 +0200760 out_put_tracer:
761 put_tracer(task);
Markus Metzgerc2724772008-12-11 13:49:59 +0100762 out:
763 return ERR_PTR(error);
764}
765
Markus Metzgerde79f542009-04-03 16:43:40 +0200766struct bts_tracer *ds_request_bts_task(struct task_struct *task,
767 void *base, size_t size,
768 bts_ovfl_callback_t ovfl,
769 size_t th, unsigned int flags)
770{
771 return ds_request_bts(task, 0, base, size, ovfl, th, flags);
772}
773
774struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
775 bts_ovfl_callback_t ovfl,
776 size_t th, unsigned int flags)
777{
778 return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags);
779}
780
781static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu,
782 void *base, size_t size,
783 pebs_ovfl_callback_t ovfl, size_t th,
784 unsigned int flags)
Markus Metzgerc2724772008-12-11 13:49:59 +0100785{
786 struct pebs_tracer *tracer;
Markus Metzgerc2724772008-12-11 13:49:59 +0100787 int error;
788
Markus Metzgerb8e47192009-03-13 10:46:42 +0100789 /* Buffer overflow notification is not yet implemented. */
Markus Metzgerc2724772008-12-11 13:49:59 +0100790 error = -EOPNOTSUPP;
791 if (ovfl)
792 goto out;
793
Markus Metzger38f80112009-04-03 16:43:37 +0200794 error = get_tracer(task);
795 if (error < 0)
796 goto out;
797
Markus Metzgerc2724772008-12-11 13:49:59 +0100798 error = -ENOMEM;
Markus Metzgerde79f542009-04-03 16:43:40 +0200799 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
Markus Metzgerc2724772008-12-11 13:49:59 +0100800 if (!tracer)
Markus Metzger38f80112009-04-03 16:43:37 +0200801 goto out_put_tracer;
Markus Metzgerc2724772008-12-11 13:49:59 +0100802 tracer->ovfl = ovfl;
803
Markus Metzgerde79f542009-04-03 16:43:40 +0200804 /* Do some more error checking and acquire a tracing context. */
Markus Metzgerc2724772008-12-11 13:49:59 +0100805 error = ds_request(&tracer->ds, &tracer->trace.ds,
Markus Metzgerde79f542009-04-03 16:43:40 +0200806 ds_pebs, task, cpu, base, size, th);
Markus Metzgerc2724772008-12-11 13:49:59 +0100807 if (error < 0)
808 goto out_tracer;
809
Markus Metzgerde79f542009-04-03 16:43:40 +0200810 /* Claim the pebs part of the tracing context we acquired above. */
811 spin_lock_irq(&ds_lock);
Markus Metzgerc2724772008-12-11 13:49:59 +0100812
813 error = -EPERM;
Markus Metzgerc2724772008-12-11 13:49:59 +0100814 if (tracer->ds.context->pebs_master)
Markus Metzger38f80112009-04-03 16:43:37 +0200815 goto out_unlock;
Markus Metzgerc2724772008-12-11 13:49:59 +0100816 tracer->ds.context->pebs_master = tracer;
817
Markus Metzgerde79f542009-04-03 16:43:40 +0200818 spin_unlock_irq(&ds_lock);
Markus Metzgerc2724772008-12-11 13:49:59 +0100819
Markus Metzgerde79f542009-04-03 16:43:40 +0200820 /*
821 * Now that we own the pebs part of the context, let's complete the
822 * initialization for that part.
823 */
824 ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags);
Markus Metzger73bf1b62009-03-05 08:57:21 +0100825 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
Markus Metzgerde79f542009-04-03 16:43:40 +0200826 ds_install_ds_area(tracer->ds.context);
827
828 /* Start tracing. */
Markus Metzgerc2724772008-12-11 13:49:59 +0100829 ds_resume_pebs(tracer);
830
831 return tracer;
832
Markus Metzgerc2724772008-12-11 13:49:59 +0100833 out_unlock:
Markus Metzgerde79f542009-04-03 16:43:40 +0200834 spin_unlock_irq(&ds_lock);
Markus Metzgerc2724772008-12-11 13:49:59 +0100835 ds_put_context(tracer->ds.context);
836 out_tracer:
837 kfree(tracer);
Markus Metzger38f80112009-04-03 16:43:37 +0200838 out_put_tracer:
839 put_tracer(task);
Markus Metzgerc2724772008-12-11 13:49:59 +0100840 out:
841 return ERR_PTR(error);
842}
843
Markus Metzgerde79f542009-04-03 16:43:40 +0200844struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
845 void *base, size_t size,
846 pebs_ovfl_callback_t ovfl,
847 size_t th, unsigned int flags)
848{
849 return ds_request_pebs(task, 0, base, size, ovfl, th, flags);
850}
851
852struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size,
853 pebs_ovfl_callback_t ovfl,
854 size_t th, unsigned int flags)
855{
856 return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags);
857}
858
859static void ds_free_bts(struct bts_tracer *tracer)
Markus Metzgerc2724772008-12-11 13:49:59 +0100860{
Markus Metzger8d99b3a2009-04-03 16:43:36 +0200861 struct task_struct *task;
862
Markus Metzger8d99b3a2009-04-03 16:43:36 +0200863 task = tracer->ds.context->task;
864
Markus Metzgerc2724772008-12-11 13:49:59 +0100865 WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
866 tracer->ds.context->bts_master = NULL;
867
Markus Metzger8d99b3a2009-04-03 16:43:36 +0200868 /* Make sure tracing stopped and the tracer is not in use. */
869 if (task && (task != current))
870 wait_task_context_switch(task);
871
Markus Metzgerc2724772008-12-11 13:49:59 +0100872 ds_put_context(tracer->ds.context);
Markus Metzger38f80112009-04-03 16:43:37 +0200873 put_tracer(task);
Markus Metzgerc2724772008-12-11 13:49:59 +0100874
875 kfree(tracer);
876}
877
Markus Metzgerde79f542009-04-03 16:43:40 +0200878void ds_release_bts(struct bts_tracer *tracer)
879{
880 might_sleep();
881
882 if (!tracer)
883 return;
884
885 ds_suspend_bts(tracer);
886 ds_free_bts(tracer);
887}
888
889int ds_release_bts_noirq(struct bts_tracer *tracer)
890{
891 struct task_struct *task;
892 unsigned long irq;
893 int error;
894
895 if (!tracer)
896 return 0;
897
898 task = tracer->ds.context->task;
899
900 local_irq_save(irq);
901
902 error = -EPERM;
903 if (!task &&
904 (tracer->ds.context->cpu != smp_processor_id()))
905 goto out;
906
907 error = -EPERM;
908 if (task && (task != current))
909 goto out;
910
911 ds_suspend_bts_noirq(tracer);
912 ds_free_bts(tracer);
913
914 error = 0;
915 out:
916 local_irq_restore(irq);
917 return error;
918}
919
920static void update_task_debugctlmsr(struct task_struct *task,
921 unsigned long debugctlmsr)
922{
923 task->thread.debugctlmsr = debugctlmsr;
924
925 get_cpu();
926 if (task == current)
927 update_debugctlmsr(debugctlmsr);
928
929 if (task->thread.debugctlmsr)
930 set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
931 else
932 clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
933 put_cpu();
934}
935
Markus Metzgerc2724772008-12-11 13:49:59 +0100936void ds_suspend_bts(struct bts_tracer *tracer)
937{
938 struct task_struct *task;
Markus Metzgerde79f542009-04-03 16:43:40 +0200939 unsigned long debugctlmsr;
940 int cpu;
Markus Metzgerc2724772008-12-11 13:49:59 +0100941
942 if (!tracer)
943 return;
944
Markus Metzgercac94f92009-04-03 16:43:33 +0200945 tracer->flags = 0;
946
Markus Metzgerc2724772008-12-11 13:49:59 +0100947 task = tracer->ds.context->task;
Markus Metzgerde79f542009-04-03 16:43:40 +0200948 cpu = tracer->ds.context->cpu;
Markus Metzgerc2724772008-12-11 13:49:59 +0100949
Markus Metzgerde79f542009-04-03 16:43:40 +0200950 WARN_ON(!task && irqs_disabled());
Markus Metzgerc2724772008-12-11 13:49:59 +0100951
Markus Metzgerde79f542009-04-03 16:43:40 +0200952 debugctlmsr = (task ?
953 task->thread.debugctlmsr :
954 get_debugctlmsr_on_cpu(cpu));
955 debugctlmsr &= ~BTS_CONTROL;
Markus Metzgerc2724772008-12-11 13:49:59 +0100956
Markus Metzgerde79f542009-04-03 16:43:40 +0200957 if (task)
958 update_task_debugctlmsr(task, debugctlmsr);
959 else
960 update_debugctlmsr_on_cpu(cpu, debugctlmsr);
Markus Metzgerc2724772008-12-11 13:49:59 +0100961}
962
Markus Metzgerde79f542009-04-03 16:43:40 +0200963int ds_suspend_bts_noirq(struct bts_tracer *tracer)
Markus Metzgerc2724772008-12-11 13:49:59 +0100964{
965 struct task_struct *task;
Markus Metzgerde79f542009-04-03 16:43:40 +0200966 unsigned long debugctlmsr, irq;
967 int cpu, error = 0;
Markus Metzgerc2724772008-12-11 13:49:59 +0100968
969 if (!tracer)
Markus Metzgerde79f542009-04-03 16:43:40 +0200970 return 0;
Markus Metzgerc2724772008-12-11 13:49:59 +0100971
Markus Metzgerde79f542009-04-03 16:43:40 +0200972 tracer->flags = 0;
Markus Metzgercac94f92009-04-03 16:43:33 +0200973
Markus Metzgerc2724772008-12-11 13:49:59 +0100974 task = tracer->ds.context->task;
Markus Metzgerde79f542009-04-03 16:43:40 +0200975 cpu = tracer->ds.context->cpu;
976
977 local_irq_save(irq);
978
979 error = -EPERM;
980 if (!task && (cpu != smp_processor_id()))
981 goto out;
982
983 debugctlmsr = (task ?
984 task->thread.debugctlmsr :
985 get_debugctlmsr());
986 debugctlmsr &= ~BTS_CONTROL;
987
988 if (task)
989 update_task_debugctlmsr(task, debugctlmsr);
990 else
991 update_debugctlmsr(debugctlmsr);
992
993 error = 0;
994 out:
995 local_irq_restore(irq);
996 return error;
997}
998
999static unsigned long ds_bts_control(struct bts_tracer *tracer)
1000{
1001 unsigned long control;
Markus Metzgerc2724772008-12-11 13:49:59 +01001002
1003 control = ds_cfg.ctl[dsf_bts];
1004 if (!(tracer->trace.ds.flags & BTS_KERNEL))
1005 control |= ds_cfg.ctl[dsf_bts_kernel];
1006 if (!(tracer->trace.ds.flags & BTS_USER))
1007 control |= ds_cfg.ctl[dsf_bts_user];
1008
Markus Metzgerde79f542009-04-03 16:43:40 +02001009 return control;
Markus Metzgerc2724772008-12-11 13:49:59 +01001010}
1011
Markus Metzgerde79f542009-04-03 16:43:40 +02001012void ds_resume_bts(struct bts_tracer *tracer)
Markus Metzgerc2724772008-12-11 13:49:59 +01001013{
Markus Metzger38f80112009-04-03 16:43:37 +02001014 struct task_struct *task;
Markus Metzgerde79f542009-04-03 16:43:40 +02001015 unsigned long debugctlmsr;
1016 int cpu;
Markus Metzger38f80112009-04-03 16:43:37 +02001017
Markus Metzgerc2724772008-12-11 13:49:59 +01001018 if (!tracer)
1019 return;
1020
Markus Metzgerde79f542009-04-03 16:43:40 +02001021 tracer->flags = tracer->trace.ds.flags;
Markus Metzger38f80112009-04-03 16:43:37 +02001022
Markus Metzgerde79f542009-04-03 16:43:40 +02001023 task = tracer->ds.context->task;
1024 cpu = tracer->ds.context->cpu;
1025
1026 WARN_ON(!task && irqs_disabled());
1027
1028 debugctlmsr = (task ?
1029 task->thread.debugctlmsr :
1030 get_debugctlmsr_on_cpu(cpu));
1031 debugctlmsr |= ds_bts_control(tracer);
1032
1033 if (task)
1034 update_task_debugctlmsr(task, debugctlmsr);
1035 else
1036 update_debugctlmsr_on_cpu(cpu, debugctlmsr);
1037}
1038
1039int ds_resume_bts_noirq(struct bts_tracer *tracer)
1040{
1041 struct task_struct *task;
1042 unsigned long debugctlmsr, irq;
1043 int cpu, error = 0;
1044
1045 if (!tracer)
1046 return 0;
1047
1048 tracer->flags = tracer->trace.ds.flags;
1049
1050 task = tracer->ds.context->task;
1051 cpu = tracer->ds.context->cpu;
1052
1053 local_irq_save(irq);
1054
1055 error = -EPERM;
1056 if (!task && (cpu != smp_processor_id()))
1057 goto out;
1058
1059 debugctlmsr = (task ?
1060 task->thread.debugctlmsr :
1061 get_debugctlmsr());
1062 debugctlmsr |= ds_bts_control(tracer);
1063
1064 if (task)
1065 update_task_debugctlmsr(task, debugctlmsr);
1066 else
1067 update_debugctlmsr(debugctlmsr);
1068
1069 error = 0;
1070 out:
1071 local_irq_restore(irq);
1072 return error;
1073}
1074
1075static void ds_free_pebs(struct pebs_tracer *tracer)
1076{
1077 struct task_struct *task;
1078
1079 task = tracer->ds.context->task;
Markus Metzgerc2724772008-12-11 13:49:59 +01001080
1081 WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
1082 tracer->ds.context->pebs_master = NULL;
1083
Markus Metzgerc2724772008-12-11 13:49:59 +01001084 ds_put_context(tracer->ds.context);
Markus Metzger38f80112009-04-03 16:43:37 +02001085 put_tracer(task);
Markus Metzgerc2724772008-12-11 13:49:59 +01001086
1087 kfree(tracer);
1088}
1089
Markus Metzgerde79f542009-04-03 16:43:40 +02001090void ds_release_pebs(struct pebs_tracer *tracer)
1091{
1092 might_sleep();
1093
1094 if (!tracer)
1095 return;
1096
1097 ds_suspend_pebs(tracer);
1098 ds_free_pebs(tracer);
1099}
1100
1101int ds_release_pebs_noirq(struct pebs_tracer *tracer)
1102{
1103 struct task_struct *task;
1104 unsigned long irq;
1105 int error;
1106
1107 if (!tracer)
1108 return 0;
1109
1110 task = tracer->ds.context->task;
1111
1112 local_irq_save(irq);
1113
1114 error = -EPERM;
1115 if (!task &&
1116 (tracer->ds.context->cpu != smp_processor_id()))
1117 goto out;
1118
1119 error = -EPERM;
1120 if (task && (task != current))
1121 goto out;
1122
1123 ds_suspend_pebs_noirq(tracer);
1124 ds_free_pebs(tracer);
1125
1126 error = 0;
1127 out:
1128 local_irq_restore(irq);
1129 return error;
1130}
1131
Markus Metzgerc2724772008-12-11 13:49:59 +01001132void ds_suspend_pebs(struct pebs_tracer *tracer)
1133{
1134
1135}
1136
Markus Metzgerde79f542009-04-03 16:43:40 +02001137int ds_suspend_pebs_noirq(struct pebs_tracer *tracer)
1138{
1139 return 0;
1140}
1141
Markus Metzgerc2724772008-12-11 13:49:59 +01001142void ds_resume_pebs(struct pebs_tracer *tracer)
1143{
1144
1145}
1146
Markus Metzgerde79f542009-04-03 16:43:40 +02001147int ds_resume_pebs_noirq(struct pebs_tracer *tracer)
1148{
1149 return 0;
1150}
1151
Markus Metzgerc2724772008-12-11 13:49:59 +01001152const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
1153{
1154 if (!tracer)
1155 return NULL;
1156
1157 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
1158 return &tracer->trace;
1159}
1160
1161const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
1162{
1163 if (!tracer)
1164 return NULL;
1165
1166 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
1167 tracer->trace.reset_value =
Markus Metzgerbc44fb52009-03-13 10:42:18 +01001168 *(u64 *)(tracer->ds.context->ds +
1169 (ds_cfg.sizeof_ptr_field * 8));
Markus Metzgerc2724772008-12-11 13:49:59 +01001170
1171 return &tracer->trace;
Markus Metzger93fa7632008-04-08 11:01:58 +02001172}
1173
Markus Metzgerca0002a2008-11-25 09:01:25 +01001174int ds_reset_bts(struct bts_tracer *tracer)
Markus Metzger93fa7632008-04-08 11:01:58 +02001175{
Markus Metzgerca0002a2008-11-25 09:01:25 +01001176 if (!tracer)
1177 return -EINVAL;
1178
Markus Metzgerc2724772008-12-11 13:49:59 +01001179 tracer->trace.ds.top = tracer->trace.ds.begin;
1180
1181 ds_set(tracer->ds.context->ds, ds_bts, ds_index,
1182 (unsigned long)tracer->trace.ds.top);
Markus Metzgerca0002a2008-11-25 09:01:25 +01001183
1184 return 0;
Markus Metzger93fa7632008-04-08 11:01:58 +02001185}
1186
Markus Metzgerca0002a2008-11-25 09:01:25 +01001187int ds_reset_pebs(struct pebs_tracer *tracer)
Markus Metzger93fa7632008-04-08 11:01:58 +02001188{
Markus Metzgerca0002a2008-11-25 09:01:25 +01001189 if (!tracer)
1190 return -EINVAL;
1191
Markus Metzgerc2724772008-12-11 13:49:59 +01001192 tracer->trace.ds.top = tracer->trace.ds.begin;
Markus Metzgerca0002a2008-11-25 09:01:25 +01001193
Markus Metzgerc2724772008-12-11 13:49:59 +01001194 ds_set(tracer->ds.context->ds, ds_bts, ds_index,
1195 (unsigned long)tracer->trace.ds.top);
Markus Metzger93fa7632008-04-08 11:01:58 +02001196
Markus Metzgerca0002a2008-11-25 09:01:25 +01001197 return 0;
Markus Metzger93fa7632008-04-08 11:01:58 +02001198}
1199
Markus Metzgerca0002a2008-11-25 09:01:25 +01001200int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
Markus Metzger93fa7632008-04-08 11:01:58 +02001201{
Markus Metzgerca0002a2008-11-25 09:01:25 +01001202 if (!tracer)
1203 return -EINVAL;
Markus Metzger93fa7632008-04-08 11:01:58 +02001204
Markus Metzgerbc44fb52009-03-13 10:42:18 +01001205 *(u64 *)(tracer->ds.context->ds +
1206 (ds_cfg.sizeof_ptr_field * 8)) = value;
Markus Metzger93fa7632008-04-08 11:01:58 +02001207
Markus Metzgerca0002a2008-11-25 09:01:25 +01001208 return 0;
Markus Metzger93fa7632008-04-08 11:01:58 +02001209}
1210
Markus Metzgerc2724772008-12-11 13:49:59 +01001211static const struct ds_configuration ds_cfg_netburst = {
Markus Metzgerba2607f2009-01-19 10:38:35 +01001212 .name = "Netburst",
Markus Metzgerc2724772008-12-11 13:49:59 +01001213 .ctl[dsf_bts] = (1 << 2) | (1 << 3),
1214 .ctl[dsf_bts_kernel] = (1 << 5),
1215 .ctl[dsf_bts_user] = (1 << 6),
Markus Metzger93fa7632008-04-08 11:01:58 +02001216};
Markus Metzgerc2724772008-12-11 13:49:59 +01001217static const struct ds_configuration ds_cfg_pentium_m = {
Markus Metzgerba2607f2009-01-19 10:38:35 +01001218 .name = "Pentium M",
Markus Metzgerc2724772008-12-11 13:49:59 +01001219 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
Markus Metzgereee3af42008-01-30 13:31:09 +01001220};
Markus Metzgerba2607f2009-01-19 10:38:35 +01001221static const struct ds_configuration ds_cfg_core2_atom = {
1222 .name = "Core 2/Atom",
Markus Metzgerc2724772008-12-11 13:49:59 +01001223 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
1224 .ctl[dsf_bts_kernel] = (1 << 9),
1225 .ctl[dsf_bts_user] = (1 << 10),
Markus Metzgerc2724772008-12-11 13:49:59 +01001226};
Markus Metzgereee3af42008-01-30 13:31:09 +01001227
Markus Metzgerc2724772008-12-11 13:49:59 +01001228static void
Markus Metzgerbc44fb52009-03-13 10:42:18 +01001229ds_configure(const struct ds_configuration *cfg,
1230 struct cpuinfo_x86 *cpu)
Markus Metzgereee3af42008-01-30 13:31:09 +01001231{
Markus Metzgerbc44fb52009-03-13 10:42:18 +01001232 unsigned long nr_pebs_fields = 0;
1233
1234 printk(KERN_INFO "[ds] using %s configuration\n", cfg->name);
1235
1236#ifdef __i386__
1237 nr_pebs_fields = 10;
1238#else
1239 nr_pebs_fields = 18;
1240#endif
1241
Markus Metzgerc2724772008-12-11 13:49:59 +01001242 memset(&ds_cfg, 0, sizeof(ds_cfg));
Markus Metzgereee3af42008-01-30 13:31:09 +01001243 ds_cfg = *cfg;
Markus Metzgerca0002a2008-11-25 09:01:25 +01001244
Markus Metzgerbc44fb52009-03-13 10:42:18 +01001245 ds_cfg.sizeof_ptr_field =
1246 (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4);
Markus Metzgerca0002a2008-11-25 09:01:25 +01001247
Markus Metzgerbc44fb52009-03-13 10:42:18 +01001248 ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3;
1249 ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields;
1250
1251 if (!cpu_has(cpu, X86_FEATURE_BTS)) {
1252 ds_cfg.sizeof_rec[ds_bts] = 0;
Markus Metzgerc2724772008-12-11 13:49:59 +01001253 printk(KERN_INFO "[ds] bts not available\n");
1254 }
Markus Metzgerbc44fb52009-03-13 10:42:18 +01001255 if (!cpu_has(cpu, X86_FEATURE_PEBS)) {
1256 ds_cfg.sizeof_rec[ds_pebs] = 0;
Markus Metzgerc2724772008-12-11 13:49:59 +01001257 printk(KERN_INFO "[ds] pebs not available\n");
Markus Metzgerbc44fb52009-03-13 10:42:18 +01001258 }
1259
1260 printk(KERN_INFO "[ds] sizes: address: %u bit, ",
1261 8 * ds_cfg.sizeof_ptr_field);
1262 printk("bts/pebs record: %u/%u bytes\n",
1263 ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]);
Markus Metzgerc2724772008-12-11 13:49:59 +01001264
Ingo Molnar79258a32009-03-13 12:02:08 +01001265 WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_ptr_field));
Markus Metzgereee3af42008-01-30 13:31:09 +01001266}
1267
1268void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
1269{
Markus Metzgeree811512009-04-03 16:43:47 +02001270 /* Only configure the first cpu. Others are identical. */
1271 if (ds_cfg.name)
1272 return;
1273
Markus Metzgereee3af42008-01-30 13:31:09 +01001274 switch (c->x86) {
1275 case 0x6:
1276 switch (c->x86_model) {
Markus Metzgerba2607f2009-01-19 10:38:35 +01001277 case 0x9:
1278 case 0xd: /* Pentium M */
Markus Metzgerbc44fb52009-03-13 10:42:18 +01001279 ds_configure(&ds_cfg_pentium_m, c);
Markus Metzgereee3af42008-01-30 13:31:09 +01001280 break;
Markus Metzgerba2607f2009-01-19 10:38:35 +01001281 case 0xf:
1282 case 0x17: /* Core2 */
1283 case 0x1c: /* Atom */
Markus Metzgerbc44fb52009-03-13 10:42:18 +01001284 ds_configure(&ds_cfg_core2_atom, c);
Markus Metzgerba2607f2009-01-19 10:38:35 +01001285 break;
Markus Metzgerb8e47192009-03-13 10:46:42 +01001286 case 0x1a: /* Core i7 */
Markus Metzgerba2607f2009-01-19 10:38:35 +01001287 default:
Markus Metzgerb8e47192009-03-13 10:46:42 +01001288 /* Sorry, don't know about them. */
Markus Metzgereee3af42008-01-30 13:31:09 +01001289 break;
Markus Metzgereee3af42008-01-30 13:31:09 +01001290 }
1291 break;
Markus Metzgerba2607f2009-01-19 10:38:35 +01001292 case 0xf:
Markus Metzgereee3af42008-01-30 13:31:09 +01001293 switch (c->x86_model) {
Markus Metzgereee3af42008-01-30 13:31:09 +01001294 case 0x0:
1295 case 0x1:
1296 case 0x2: /* Netburst */
Markus Metzgerbc44fb52009-03-13 10:42:18 +01001297 ds_configure(&ds_cfg_netburst, c);
Markus Metzgereee3af42008-01-30 13:31:09 +01001298 break;
Markus Metzgereee3af42008-01-30 13:31:09 +01001299 default:
Markus Metzgerb8e47192009-03-13 10:46:42 +01001300 /* Sorry, don't know about them. */
Markus Metzgereee3af42008-01-30 13:31:09 +01001301 break;
1302 }
1303 break;
1304 default:
Markus Metzgerb8e47192009-03-13 10:46:42 +01001305 /* Sorry, don't know about them. */
Markus Metzgereee3af42008-01-30 13:31:09 +01001306 break;
1307 }
1308}
Markus Metzger93fa7632008-04-08 11:01:58 +02001309
Markus Metzgercac94f92009-04-03 16:43:33 +02001310static inline void ds_take_timestamp(struct ds_context *context,
1311 enum bts_qualifier qualifier,
1312 struct task_struct *task)
1313{
1314 struct bts_tracer *tracer = context->bts_master;
1315 struct bts_struct ts;
1316
1317 /* Prevent compilers from reading the tracer pointer twice. */
1318 barrier();
1319
1320 if (!tracer || !(tracer->flags & BTS_TIMESTAMPS))
1321 return;
1322
1323 memset(&ts, 0, sizeof(ts));
Markus Metzger15879d02009-04-03 16:43:38 +02001324 ts.qualifier = qualifier;
1325 ts.variant.event.clock = trace_clock_global();
1326 ts.variant.event.pid = task->pid;
Markus Metzgercac94f92009-04-03 16:43:33 +02001327
1328 bts_write(tracer, &ts);
1329}
1330
Markus Metzgerc2724772008-12-11 13:49:59 +01001331/*
1332 * Change the DS configuration from tracing prev to tracing next.
1333 */
1334void ds_switch_to(struct task_struct *prev, struct task_struct *next)
Markus Metzger93fa7632008-04-08 11:01:58 +02001335{
Markus Metzgercac94f92009-04-03 16:43:33 +02001336 struct ds_context *prev_ctx = prev->thread.ds_ctx;
1337 struct ds_context *next_ctx = next->thread.ds_ctx;
1338 unsigned long debugctlmsr = next->thread.debugctlmsr;
1339
1340 /* Make sure all data is read before we start. */
1341 barrier();
Markus Metzgerc2724772008-12-11 13:49:59 +01001342
1343 if (prev_ctx) {
1344 update_debugctlmsr(0);
1345
Markus Metzgercac94f92009-04-03 16:43:33 +02001346 ds_take_timestamp(prev_ctx, bts_task_departs, prev);
Markus Metzgerca0002a2008-11-25 09:01:25 +01001347 }
Markus Metzgerc2724772008-12-11 13:49:59 +01001348
1349 if (next_ctx) {
Markus Metzgercac94f92009-04-03 16:43:33 +02001350 ds_take_timestamp(next_ctx, bts_task_arrives, next);
Markus Metzgerc2724772008-12-11 13:49:59 +01001351
1352 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
1353 }
1354
Markus Metzgercac94f92009-04-03 16:43:33 +02001355 update_debugctlmsr(debugctlmsr);
Markus Metzger93fa7632008-04-08 11:01:58 +02001356}
Markus Metzgerbf53de92008-12-19 15:10:24 +01001357
Markus Metzgerde79f542009-04-03 16:43:40 +02001358static __init int ds_selftest(void)
1359{
1360 if (ds_cfg.sizeof_rec[ds_bts]) {
1361 int error;
1362
1363 error = ds_selftest_bts();
1364 if (error) {
1365 WARN(1, "[ds] selftest failed. disabling bts.\n");
1366 ds_cfg.sizeof_rec[ds_bts] = 0;
1367 }
1368 }
1369
1370 if (ds_cfg.sizeof_rec[ds_pebs]) {
1371 int error;
1372
1373 error = ds_selftest_pebs();
1374 if (error) {
1375 WARN(1, "[ds] selftest failed. disabling pebs.\n");
1376 ds_cfg.sizeof_rec[ds_pebs] = 0;
1377 }
1378 }
1379
1380 return 0;
1381}
1382device_initcall(ds_selftest);