blob: 8dc72d5756668c27f19a1bf678e6c3b80318d4dc [file] [log] [blame]
Zachary Amsdenbbab4f32007-02-13 13:26:21 +01001/*
2 * VMI paravirtual timer support routines.
3 *
4 * Copyright (C) 2005, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * Send feedback to dhecht@vmware.com
22 *
23 */
24
25/*
26 * Portions of this code from arch/i386/kernel/timers/timer_tsc.c.
27 * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c.
28 * See comments there for proper credits.
29 */
30
31#include <linux/spinlock.h>
32#include <linux/init.h>
33#include <linux/errno.h>
34#include <linux/jiffies.h>
35#include <linux/interrupt.h>
36#include <linux/kernel_stat.h>
37#include <linux/rcupdate.h>
38#include <linux/clocksource.h>
39
40#include <asm/timer.h>
41#include <asm/io.h>
42#include <asm/apic.h>
43#include <asm/div64.h>
44#include <asm/timer.h>
45#include <asm/desc.h>
46
47#include <asm/vmi.h>
48#include <asm/vmi_time.h>
49
50#include <mach_timer.h>
51#include <io_ports.h>
52
53#ifdef CONFIG_X86_LOCAL_APIC
54#define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT
55#else
56#define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0
57#endif
58
59/* Cached VMI operations */
60struct vmi_timer_ops vmi_timer_ops;
61
62#ifdef CONFIG_NO_IDLE_HZ
63
64/* /proc/sys/kernel/hz_timer state. */
65int sysctl_hz_timer;
66
67/* Some stats */
68static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs);
69static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies);
70static DEFINE_PER_CPU(unsigned long, idle_start_jiffies);
71
72#endif /* CONFIG_NO_IDLE_HZ */
73
74/* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */
75static int alarm_hz = CONFIG_VMI_ALARM_HZ;
76
77/* Cache of the value get_cycle_frequency / HZ. */
78static signed long long cycles_per_jiffy;
79
80/* Cache of the value get_cycle_frequency / alarm_hz. */
81static signed long long cycles_per_alarm;
82
83/* The number of cycles accounted for by the 'jiffies'/'xtime' count.
84 * Protected by xtime_lock. */
85static unsigned long long real_cycles_accounted_system;
86
87/* The number of cycles accounted for by update_process_times(), per cpu. */
88static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu);
89
90/* The number of stolen cycles accounted, per cpu. */
91static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu);
92
93/* Clock source. */
94static cycle_t read_real_cycles(void)
95{
96 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
97}
98
99static cycle_t read_available_cycles(void)
100{
101 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
102}
103
104#if 0
105static cycle_t read_stolen_cycles(void)
106{
107 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN);
108}
109#endif /* 0 */
110
111static struct clocksource clocksource_vmi = {
112 .name = "vmi-timer",
113 .rating = 450,
114 .read = read_real_cycles,
115 .mask = CLOCKSOURCE_MASK(64),
116 .mult = 0, /* to be set */
117 .shift = 22,
Thomas Gleixner73b08d22007-02-16 01:27:36 -0800118 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100119};
120
121
122/* Timer interrupt handler. */
123static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id);
124
125static struct irqaction vmi_timer_irq = {
126 vmi_timer_interrupt,
127 SA_INTERRUPT,
128 CPU_MASK_NONE,
129 "VMI-alarm",
130 NULL,
131 NULL
132};
133
134/* Alarm rate */
135static int __init vmi_timer_alarm_rate_setup(char* str)
136{
137 int alarm_rate;
138 if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) {
139 alarm_hz = alarm_rate;
140 printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz);
141 }
142 return 1;
143}
144__setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup);
145
146
147/* Initialization */
148static void vmi_get_wallclock_ts(struct timespec *ts)
149{
150 unsigned long long wallclock;
151 wallclock = vmi_timer_ops.get_wallclock(); // nsec units
152 ts->tv_nsec = do_div(wallclock, 1000000000);
153 ts->tv_sec = wallclock;
154}
155
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100156unsigned long vmi_get_wallclock(void)
157{
158 struct timespec ts;
159 vmi_get_wallclock_ts(&ts);
160 return ts.tv_sec;
161}
162
163int vmi_set_wallclock(unsigned long now)
164{
165 return -1;
166}
167
Zachary Amsden6cb9a832007-03-05 00:30:35 -0800168unsigned long long vmi_get_sched_cycles(void)
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100169{
170 return read_available_cycles();
171}
172
Zachary Amsden1182d852007-03-05 00:30:36 -0800173unsigned long vmi_cpu_khz(void)
174{
175 unsigned long long khz;
176
177 khz = vmi_timer_ops.get_cycle_frequency();
178 (void)do_div(khz, 1000);
179 return khz;
180}
181
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100182void __init vmi_time_init(void)
183{
184 unsigned long long cycles_per_sec, cycles_per_msec;
Zachary Amsden90736e22007-02-13 13:26:21 +0100185 unsigned long flags;
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100186
Zachary Amsden90736e22007-02-13 13:26:21 +0100187 local_irq_save(flags);
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100188 setup_irq(0, &vmi_timer_irq);
189#ifdef CONFIG_X86_LOCAL_APIC
190 set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt);
191#endif
192
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100193 real_cycles_accounted_system = read_real_cycles();
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100194 per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles();
195
196 cycles_per_sec = vmi_timer_ops.get_cycle_frequency();
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100197 cycles_per_jiffy = cycles_per_sec;
198 (void)do_div(cycles_per_jiffy, HZ);
199 cycles_per_alarm = cycles_per_sec;
200 (void)do_div(cycles_per_alarm, alarm_hz);
201 cycles_per_msec = cycles_per_sec;
202 (void)do_div(cycles_per_msec, 1000);
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100203
204 printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;"
205 "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy,
206 cycles_per_alarm);
207
208 clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
209 clocksource_vmi.shift);
210 if (clocksource_register(&clocksource_vmi))
211 printk(KERN_WARNING "Error registering VMITIME clocksource.");
212
213 /* Disable PIT. */
214 outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
215
216 /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu
217 * reduce the latency calling update_process_times. */
218 vmi_timer_ops.set_alarm(
219 VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
220 per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
221 cycles_per_alarm);
Zachary Amsden90736e22007-02-13 13:26:21 +0100222
223 local_irq_restore(flags);
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100224}
225
226#ifdef CONFIG_X86_LOCAL_APIC
227
228void __init vmi_timer_setup_boot_alarm(void)
229{
230 local_irq_disable();
231
232 /* Route the interrupt to the correct vector. */
233 apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
234
235 /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */
236 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
237 vmi_timer_ops.set_alarm(
238 VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
239 per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
240 cycles_per_alarm);
241 local_irq_enable();
242}
243
244/* Initialize the time accounting variables for an AP on an SMP system.
245 * Also, set the local alarm for the AP. */
Zachary Amsdenc6b36e92007-03-05 00:30:43 -0800246void __devinit vmi_timer_setup_secondary_alarm(void)
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100247{
248 int cpu = smp_processor_id();
249
250 /* Route the interrupt to the correct vector. */
251 apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
252
253 per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles();
254
255 vmi_timer_ops.set_alarm(
256 VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
257 per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
258 cycles_per_alarm);
259}
260
261#endif
262
263/* Update system wide (real) time accounting (e.g. jiffies, xtime). */
264static void vmi_account_real_cycles(unsigned long long cur_real_cycles)
265{
266 long long cycles_not_accounted;
267
268 write_seqlock(&xtime_lock);
269
270 cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system;
271 while (cycles_not_accounted >= cycles_per_jiffy) {
Zachary Amsden7507ba32007-03-05 00:30:34 -0800272 /* systems wide jiffies. */
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100273 do_timer(1);
274
275 cycles_not_accounted -= cycles_per_jiffy;
276 real_cycles_accounted_system += cycles_per_jiffy;
277 }
278
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100279 write_sequnlock(&xtime_lock);
280}
281
282/* Update per-cpu process times. */
283static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu,
284 unsigned long long cur_process_times_cycles)
285{
286 long long cycles_not_accounted;
287 cycles_not_accounted = cur_process_times_cycles -
288 per_cpu(process_times_cycles_accounted_cpu, cpu);
289
290 while (cycles_not_accounted >= cycles_per_jiffy) {
291 /* Account time to the current process. This includes
292 * calling into the scheduler to decrement the timeslice
293 * and possibly reschedule.*/
294 update_process_times(user_mode(regs));
295 /* XXX handle /proc/profile multiplier. */
296 profile_tick(CPU_PROFILING);
297
298 cycles_not_accounted -= cycles_per_jiffy;
299 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
300 }
301}
302
303#ifdef CONFIG_NO_IDLE_HZ
304/* Update per-cpu idle times. Used when a no-hz halt is ended. */
305static void vmi_account_no_hz_idle_cycles(int cpu,
306 unsigned long long cur_process_times_cycles)
307{
308 long long cycles_not_accounted;
309 unsigned long no_idle_hz_jiffies = 0;
310
311 cycles_not_accounted = cur_process_times_cycles -
312 per_cpu(process_times_cycles_accounted_cpu, cpu);
313
314 while (cycles_not_accounted >= cycles_per_jiffy) {
315 no_idle_hz_jiffies++;
316 cycles_not_accounted -= cycles_per_jiffy;
317 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
318 }
319 /* Account time to the idle process. */
320 account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies));
321}
322#endif
323
324/* Update per-cpu stolen time. */
325static void vmi_account_stolen_cycles(int cpu,
326 unsigned long long cur_real_cycles,
327 unsigned long long cur_avail_cycles)
328{
329 long long stolen_cycles_not_accounted;
330 unsigned long stolen_jiffies = 0;
331
332 if (cur_real_cycles < cur_avail_cycles)
333 return;
334
335 stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles -
336 per_cpu(stolen_cycles_accounted_cpu, cpu);
337
338 while (stolen_cycles_not_accounted >= cycles_per_jiffy) {
339 stolen_jiffies++;
340 stolen_cycles_not_accounted -= cycles_per_jiffy;
341 per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
342 }
343 /* HACK: pass NULL to force time onto cpustat->steal. */
344 account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies));
345}
346
347/* Body of either IRQ0 interrupt handler (UP no local-APIC) or
348 * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */
349static void vmi_local_timer_interrupt(int cpu)
350{
351 unsigned long long cur_real_cycles, cur_process_times_cycles;
352
353 cur_real_cycles = read_real_cycles();
354 cur_process_times_cycles = read_available_cycles();
355 /* Update system wide (real) time state (xtime, jiffies). */
356 vmi_account_real_cycles(cur_real_cycles);
357 /* Update per-cpu process times. */
358 vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles);
359 /* Update time stolen from this cpu by the hypervisor. */
360 vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
361}
362
363#ifdef CONFIG_NO_IDLE_HZ
364
365/* Must be called only from idle loop, with interrupts disabled. */
366int vmi_stop_hz_timer(void)
367{
368 /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */
369
370 unsigned long seq, next;
371 unsigned long long real_cycles_expiry;
372 int cpu = smp_processor_id();
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100373
374 BUG_ON(!irqs_disabled());
375 if (sysctl_hz_timer != 0)
376 return 0;
377
378 cpu_set(cpu, nohz_cpu_mask);
379 smp_mb();
Zachary Amsden7507ba32007-03-05 00:30:34 -0800380
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100381 if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
Zachary Amsden7507ba32007-03-05 00:30:34 -0800382 (next = next_timer_interrupt(),
383 time_before_eq(next, jiffies + HZ/CONFIG_VMI_ALARM_HZ))) {
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100384 cpu_clear(cpu, nohz_cpu_mask);
Zachary Amsden7507ba32007-03-05 00:30:34 -0800385 return 0;
386 }
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100387
388 /* Convert jiffies to the real cycle counter. */
389 do {
390 seq = read_seqbegin(&xtime_lock);
391 real_cycles_expiry = real_cycles_accounted_system +
392 (long)(next - jiffies) * cycles_per_jiffy;
393 } while (read_seqretry(&xtime_lock, seq));
394
395 /* This cpu is going idle. Disable the periodic alarm. */
Zachary Amsden7507ba32007-03-05 00:30:34 -0800396 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
397 per_cpu(idle_start_jiffies, cpu) = jiffies;
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100398 /* Set the real time alarm to expire at the next event. */
399 vmi_timer_ops.set_alarm(
Zachary Amsden7507ba32007-03-05 00:30:34 -0800400 VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL,
401 real_cycles_expiry, 0);
402 return 1;
Zachary Amsdenbbab4f32007-02-13 13:26:21 +0100403}
404
405static void vmi_reenable_hz_timer(int cpu)
406{
407 /* For /proc/vmi/info idle_hz stat. */
408 per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu);
409 per_cpu(vmi_idle_no_hz_irqs, cpu)++;
410
411 /* Don't bother explicitly cancelling the one-shot alarm -- at
412 * worse we will receive a spurious timer interrupt. */
413 vmi_timer_ops.set_alarm(
414 VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
415 per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
416 cycles_per_alarm);
417 /* Indicate this cpu is no longer nohz idle. */
418 cpu_clear(cpu, nohz_cpu_mask);
419}
420
421/* Called from interrupt handlers when (local) HZ timer is disabled. */
422void vmi_account_time_restart_hz_timer(void)
423{
424 unsigned long long cur_real_cycles, cur_process_times_cycles;
425 int cpu = smp_processor_id();
426
427 BUG_ON(!irqs_disabled());
428 /* Account the time during which the HZ timer was disabled. */
429 cur_real_cycles = read_real_cycles();
430 cur_process_times_cycles = read_available_cycles();
431 /* Update system wide (real) time state (xtime, jiffies). */
432 vmi_account_real_cycles(cur_real_cycles);
433 /* Update per-cpu idle times. */
434 vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles);
435 /* Update time stolen from this cpu by the hypervisor. */
436 vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
437 /* Reenable the hz timer. */
438 vmi_reenable_hz_timer(cpu);
439}
440
441#endif /* CONFIG_NO_IDLE_HZ */
442
443/* UP (and no local-APIC) VMI-timer alarm interrupt handler.
444 * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after
445 * APIC setup and setup_boot_vmi_alarm() is called. */
446static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
447{
448 vmi_local_timer_interrupt(smp_processor_id());
449 return IRQ_HANDLED;
450}
451
452#ifdef CONFIG_X86_LOCAL_APIC
453
454/* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector.
455 * Also used in UP when CONFIG_X86_LOCAL_APIC.
456 * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */
457void smp_apic_vmi_timer_interrupt(struct pt_regs *regs)
458{
459 struct pt_regs *old_regs = set_irq_regs(regs);
460 int cpu = smp_processor_id();
461
462 /*
463 * the NMI deadlock-detector uses this.
464 */
465 per_cpu(irq_stat,cpu).apic_timer_irqs++;
466
467 /*
468 * NOTE! We'd better ACK the irq immediately,
469 * because timer handling can be slow.
470 */
471 ack_APIC_irq();
472
473 /*
474 * update_process_times() expects us to have done irq_enter().
475 * Besides, if we don't timer interrupts ignore the global
476 * interrupt lock, which is the WrongThing (tm) to do.
477 */
478 irq_enter();
479 vmi_local_timer_interrupt(cpu);
480 irq_exit();
481 set_irq_regs(old_regs);
482}
483
484#endif /* CONFIG_X86_LOCAL_APIC */