blob: 76857add2f5149440c40653cd9b8bc160298c9e7 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/x86-64/kernel/time.c
3 *
4 * "High Precision Event Timer" based timekeeping.
5 *
6 * Copyright (c) 1991,1992,1995 Linus Torvalds
7 * Copyright (c) 1994 Alan Modra
8 * Copyright (c) 1995 Markus Kuhn
9 * Copyright (c) 1996 Ingo Molnar
10 * Copyright (c) 1998 Andrea Arcangeli
Vojtech Pavlik2f82bde42006-06-26 13:58:38 +020011 * Copyright (c) 2002,2006 Vojtech Pavlik
Linus Torvalds1da177e2005-04-16 15:20:36 -070012 * Copyright (c) 2003 Andi Kleen
13 * RTC support code taken from arch/i386/kernel/timers/time_hpet.c
14 */
15
16#include <linux/kernel.h>
17#include <linux/sched.h>
18#include <linux/interrupt.h>
19#include <linux/init.h>
20#include <linux/mc146818rtc.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/time.h>
22#include <linux/ioport.h>
23#include <linux/module.h>
24#include <linux/device.h>
25#include <linux/sysdev.h>
26#include <linux/bcd.h>
Vojtech Pavlika670fad2006-09-26 10:52:28 +020027#include <linux/notifier.h>
28#include <linux/cpu.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <linux/kallsyms.h>
Andi Kleen312df5f2005-05-16 21:53:28 -070030#include <linux/acpi.h>
Andi Kleen8d916402005-05-31 14:39:26 -070031#ifdef CONFIG_ACPI
Andi Kleen312df5f2005-05-16 21:53:28 -070032#include <acpi/achware.h> /* for PM timer frequency */
Andi Kleen0e5f61b2006-07-29 21:42:37 +020033#include <acpi/acpi_bus.h>
Andi Kleen8d916402005-05-31 14:39:26 -070034#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <asm/8253pit.h>
36#include <asm/pgtable.h>
37#include <asm/vsyscall.h>
38#include <asm/timex.h>
39#include <asm/proto.h>
40#include <asm/hpet.h>
41#include <asm/sections.h>
42#include <linux/cpufreq.h>
43#include <linux/hpet.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044#include <asm/apic.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045
Linus Torvalds1da177e2005-04-16 15:20:36 -070046#ifdef CONFIG_CPU_FREQ
47static void cpufreq_delayed_get(void);
48#endif
49extern void i8254_timer_resume(void);
50extern int using_apic_timer;
51
Vojtech Pavlika670fad2006-09-26 10:52:28 +020052static char *timename = NULL;
Andi Kleene8b91772006-02-26 04:18:49 +010053
Linus Torvalds1da177e2005-04-16 15:20:36 -070054DEFINE_SPINLOCK(rtc_lock);
Andi Kleen2ee60e172006-06-26 13:59:44 +020055EXPORT_SYMBOL(rtc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070056DEFINE_SPINLOCK(i8253_lock);
57
Andi Kleen73dea472006-02-03 21:50:50 +010058int nohpet __initdata = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070059static int notsc __initdata = 0;
60
Vojtech Pavlik42211332006-06-26 13:58:32 +020061#define USEC_PER_TICK (USEC_PER_SEC / HZ)
62#define NSEC_PER_TICK (NSEC_PER_SEC / HZ)
63#define FSEC_PER_TICK (FSEC_PER_SEC / HZ)
64
65#define NS_SCALE 10 /* 2^10, carefully chosen */
66#define US_SCALE 32 /* 2^32, arbitralrily chosen */
67
Linus Torvalds1da177e2005-04-16 15:20:36 -070068unsigned int cpu_khz; /* TSC clocks / usec, not used here */
Andi Kleen2ee60e172006-06-26 13:59:44 +020069EXPORT_SYMBOL(cpu_khz);
john stultz2d0c87c2007-02-16 01:28:18 -080070unsigned long hpet_address;
Linus Torvalds1da177e2005-04-16 15:20:36 -070071static unsigned long hpet_period; /* fsecs / HPET clock */
72unsigned long hpet_tick; /* HPET clocks / interrupt */
Chris McDermott33042a92006-02-11 17:55:50 -080073int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */
Linus Torvalds1da177e2005-04-16 15:20:36 -070074unsigned long vxtime_hz = PIT_TICK_RATE;
75int report_lost_ticks; /* command line option */
76unsigned long long monotonic_base;
77
78struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
79
80volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
Linus Torvalds1da177e2005-04-16 15:20:36 -070081struct timespec __xtime __section_xtime;
82struct timezone __sys_tz __section_sys_tz;
83
Linus Torvalds1da177e2005-04-16 15:20:36 -070084/*
85 * do_gettimeoffset() returns microseconds since last timer interrupt was
86 * triggered by hardware. A memory read of HPET is slower than a register read
87 * of TSC, but much more reliable. It's also synchronized to the timer
88 * interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a
89 * timer interrupt has happened already, but vxtime.trigger wasn't updated yet.
90 * This is not a problem, because jiffies hasn't updated either. They are bound
91 * together by xtime_lock.
92 */
93
94static inline unsigned int do_gettimeoffset_tsc(void)
95{
96 unsigned long t;
97 unsigned long x;
Andi Kleenc818a182006-01-11 22:45:24 +010098 t = get_cycles_sync();
Andi Kleen7351c0b2006-03-25 16:30:34 +010099 if (t < vxtime.last_tsc)
100 t = vxtime.last_tsc; /* hack */
Vojtech Pavlik42211332006-06-26 13:58:32 +0200101 x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102 return x;
103}
104
105static inline unsigned int do_gettimeoffset_hpet(void)
106{
john stultza3a00752005-06-23 00:08:36 -0700107 /* cap counter read to one tick to avoid inconsistencies */
108 unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last;
Vojtech Pavlik42211332006-06-26 13:58:32 +0200109 return (min(counter,hpet_tick) * vxtime.quot) >> US_SCALE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110}
111
112unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
113
114/*
115 * This version of gettimeofday() has microsecond resolution and better than
116 * microsecond precision, as we're using at least a 10 MHz (usually 14.31818
117 * MHz) HPET timer.
118 */
119
120void do_gettimeofday(struct timeval *tv)
121{
Atsushi Nemoto8ef38602006-09-30 23:28:31 -0700122 unsigned long seq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123 unsigned int sec, usec;
124
125 do {
126 seq = read_seqbegin(&xtime_lock);
127
128 sec = xtime.tv_sec;
Vojtech Pavlik42211332006-06-26 13:58:32 +0200129 usec = xtime.tv_nsec / NSEC_PER_USEC;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130
131 /* i386 does some correction here to keep the clock
132 monotonous even when ntpd is fixing drift.
133 But they didn't work for me, there is a non monotonic
134 clock anyways with ntp.
135 I dropped all corrections now until a real solution can
136 be found. Note when you fix it here you need to do the same
137 in arch/x86_64/kernel/vsyscall.c and export all needed
138 variables in vmlinux.lds. -AK */
Atsushi Nemoto8ef38602006-09-30 23:28:31 -0700139 usec += do_gettimeoffset();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140
141 } while (read_seqretry(&xtime_lock, seq));
142
Vojtech Pavlik42211332006-06-26 13:58:32 +0200143 tv->tv_sec = sec + usec / USEC_PER_SEC;
144 tv->tv_usec = usec % USEC_PER_SEC;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145}
146
147EXPORT_SYMBOL(do_gettimeofday);
148
149/*
150 * settimeofday() first undoes the correction that gettimeofday would do
151 * on the time, and then saves it. This is ugly, but has been like this for
152 * ages already.
153 */
154
155int do_settimeofday(struct timespec *tv)
156{
157 time_t wtm_sec, sec = tv->tv_sec;
158 long wtm_nsec, nsec = tv->tv_nsec;
159
160 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
161 return -EINVAL;
162
163 write_seqlock_irq(&xtime_lock);
164
Atsushi Nemoto8ef38602006-09-30 23:28:31 -0700165 nsec -= do_gettimeoffset() * NSEC_PER_USEC;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166
167 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
168 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
169
170 set_normalized_timespec(&xtime, sec, nsec);
171 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
172
john stultzb149ee22005-09-06 15:17:46 -0700173 ntp_clear();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174
175 write_sequnlock_irq(&xtime_lock);
176 clock_was_set();
177 return 0;
178}
179
180EXPORT_SYMBOL(do_settimeofday);
181
182unsigned long profile_pc(struct pt_regs *regs)
183{
184 unsigned long pc = instruction_pointer(regs);
185
Andi Kleen31679f32006-09-26 10:52:28 +0200186 /* Assume the lock function has either no stack frame or a copy
187 of eflags from PUSHF
188 Eflags always has bits 22 and up cleared unlike kernel addresses. */
Andi Kleend5a26012006-07-28 14:44:42 +0200189 if (!user_mode(regs) && in_lock_functions(pc)) {
Andi Kleen31679f32006-09-26 10:52:28 +0200190 unsigned long *sp = (unsigned long *)regs->rsp;
191 if (sp[0] >> 22)
192 return sp[0];
193 if (sp[1] >> 22)
194 return sp[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195 }
196 return pc;
197}
198EXPORT_SYMBOL(profile_pc);
199
200/*
201 * In order to set the CMOS clock precisely, set_rtc_mmss has to be called 500
202 * ms after the second nowtime has started, because when nowtime is written
203 * into the registers of the CMOS clock, it will jump to the next second
204 * precisely 500 ms later. Check the Motorola MC146818A or Dallas DS12887 data
205 * sheet for details.
206 */
207
208static void set_rtc_mmss(unsigned long nowtime)
209{
210 int real_seconds, real_minutes, cmos_minutes;
211 unsigned char control, freq_select;
212
213/*
214 * IRQs are disabled when we're called from the timer interrupt,
215 * no need for spin_lock_irqsave()
216 */
217
218 spin_lock(&rtc_lock);
219
220/*
221 * Tell the clock it's being set and stop it.
222 */
223
224 control = CMOS_READ(RTC_CONTROL);
225 CMOS_WRITE(control | RTC_SET, RTC_CONTROL);
226
227 freq_select = CMOS_READ(RTC_FREQ_SELECT);
228 CMOS_WRITE(freq_select | RTC_DIV_RESET2, RTC_FREQ_SELECT);
229
230 cmos_minutes = CMOS_READ(RTC_MINUTES);
231 BCD_TO_BIN(cmos_minutes);
232
233/*
234 * since we're only adjusting minutes and seconds, don't interfere with hour
235 * overflow. This avoids messing with unknown time zones but requires your RTC
236 * not to be off by more than 15 minutes. Since we're calling it only when
237 * our clock is externally synchronized using NTP, this shouldn't be a problem.
238 */
239
240 real_seconds = nowtime % 60;
241 real_minutes = nowtime / 60;
242 if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1)
243 real_minutes += 30; /* correct for half hour time zone */
244 real_minutes %= 60;
245
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 if (abs(real_minutes - cmos_minutes) >= 30) {
247 printk(KERN_WARNING "time.c: can't update CMOS clock "
248 "from %d to %d\n", cmos_minutes, real_minutes);
Andi Kleen28456ed2006-03-25 16:30:37 +0100249 } else {
Andi Kleen0b913172006-01-11 22:45:33 +0100250 BIN_TO_BCD(real_seconds);
251 BIN_TO_BCD(real_minutes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 CMOS_WRITE(real_seconds, RTC_SECONDS);
253 CMOS_WRITE(real_minutes, RTC_MINUTES);
254 }
255
256/*
257 * The following flags have to be released exactly in this order, otherwise the
258 * DS12887 (popular MC146818A clone with integrated battery and quartz) will
259 * not reset the oscillator and will not update precisely 500 ms later. You
260 * won't find this mentioned in the Dallas Semiconductor data sheets, but who
261 * believes data sheets anyway ... -- Markus Kuhn
262 */
263
264 CMOS_WRITE(control, RTC_CONTROL);
265 CMOS_WRITE(freq_select, RTC_FREQ_SELECT);
266
267 spin_unlock(&rtc_lock);
268}
269
270
271/* monotonic_clock(): returns # of nanoseconds passed since time_init()
272 * Note: This function is required to return accurate
273 * time even in the absence of multiple timer ticks.
274 */
Dimitri Sivanichcbf9b4b2006-09-26 10:52:34 +0200275static inline unsigned long long cycles_2_ns(unsigned long long cyc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276unsigned long long monotonic_clock(void)
277{
278 unsigned long seq;
279 u32 last_offset, this_offset, offset;
280 unsigned long long base;
281
282 if (vxtime.mode == VXTIME_HPET) {
283 do {
284 seq = read_seqbegin(&xtime_lock);
285
286 last_offset = vxtime.last;
287 base = monotonic_base;
john stultza3a00752005-06-23 00:08:36 -0700288 this_offset = hpet_readl(HPET_COUNTER);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 } while (read_seqretry(&xtime_lock, seq));
290 offset = (this_offset - last_offset);
Vojtech Pavlik42211332006-06-26 13:58:32 +0200291 offset *= NSEC_PER_TICK / hpet_tick;
Andi Kleen0b913172006-01-11 22:45:33 +0100292 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293 do {
294 seq = read_seqbegin(&xtime_lock);
295
296 last_offset = vxtime.last_tsc;
297 base = monotonic_base;
298 } while (read_seqretry(&xtime_lock, seq));
Andi Kleenc818a182006-01-11 22:45:24 +0100299 this_offset = get_cycles_sync();
Dimitri Sivanichcbf9b4b2006-09-26 10:52:34 +0200300 offset = cycles_2_ns(this_offset - last_offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301 }
Andi Kleen7351c0b2006-03-25 16:30:34 +0100302 return base + offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303}
304EXPORT_SYMBOL(monotonic_clock);
305
David Howells7d12e782006-10-05 14:55:46 +0100306static noinline void handle_lost_ticks(int lost)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307{
Andi Kleen7351c0b2006-03-25 16:30:34 +0100308 static long lost_count;
309 static int warned;
310 if (report_lost_ticks) {
311 printk(KERN_WARNING "time.c: Lost %d timer tick(s)! ", lost);
David Howells7d12e782006-10-05 14:55:46 +0100312 print_symbol("rip %s)\n", get_irq_regs()->rip);
Andi Kleen7351c0b2006-03-25 16:30:34 +0100313 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314
Andi Kleen7351c0b2006-03-25 16:30:34 +0100315 if (lost_count == 1000 && !warned) {
316 printk(KERN_WARNING "warning: many lost ticks.\n"
317 KERN_WARNING "Your time source seems to be instable or "
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318 "some driver is hogging interupts\n");
David Howells7d12e782006-10-05 14:55:46 +0100319 print_symbol("rip %s\n", get_irq_regs()->rip);
john stultz2d0c87c2007-02-16 01:28:18 -0800320 if (vxtime.mode == VXTIME_TSC && hpet_address) {
Andi Kleen7351c0b2006-03-25 16:30:34 +0100321 printk(KERN_WARNING "Falling back to HPET\n");
322 if (hpet_use_timer)
323 vxtime.last = hpet_readl(HPET_T0_CMP) -
324 hpet_tick;
325 else
326 vxtime.last = hpet_readl(HPET_COUNTER);
327 vxtime.mode = VXTIME_HPET;
john stultz2d0c87c2007-02-16 01:28:18 -0800328 vxtime.hpet_address = hpet_address;
Andi Kleen7351c0b2006-03-25 16:30:34 +0100329 do_gettimeoffset = do_gettimeoffset_hpet;
330 }
331 /* else should fall back to PIT, but code missing. */
332 warned = 1;
333 } else
334 lost_count++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335
336#ifdef CONFIG_CPU_FREQ
Andi Kleen7351c0b2006-03-25 16:30:34 +0100337 /* In some cases the CPU can change frequency without us noticing
338 Give cpufreq a change to catch up. */
339 if ((lost_count+1) % 25 == 0)
340 cpufreq_delayed_get();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341#endif
342}
343
David Howells7d12e782006-10-05 14:55:46 +0100344void main_timer_handler(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345{
346 static unsigned long rtc_update = 0;
347 unsigned long tsc;
Andi Kleen9ede6b02006-03-25 16:29:31 +0100348 int delay = 0, offset = 0, lost = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349
350/*
351 * Here we are in the timer irq handler. We have irqs locally disabled (so we
352 * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running
353 * on the other CPU, so we need a lock. We also need to lock the vsyscall
354 * variables, because both do_timer() and us change them -arca+vojtech
355 */
356
357 write_seqlock(&xtime_lock);
358
john stultz2d0c87c2007-02-16 01:28:18 -0800359 if (hpet_address)
john stultza3a00752005-06-23 00:08:36 -0700360 offset = hpet_readl(HPET_COUNTER);
361
362 if (hpet_use_timer) {
363 /* if we're using the hpet timer functionality,
364 * we can more accurately know the counter value
365 * when the timer interrupt occured.
366 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
368 delay = hpet_readl(HPET_COUNTER) - offset;
Andi Kleen9ede6b02006-03-25 16:29:31 +0100369 } else if (!pmtmr_ioport) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 spin_lock(&i8253_lock);
371 outb_p(0x00, 0x43);
372 delay = inb_p(0x40);
373 delay |= inb(0x40) << 8;
374 spin_unlock(&i8253_lock);
375 delay = LATCH - 1 - delay;
376 }
377
Andi Kleenc818a182006-01-11 22:45:24 +0100378 tsc = get_cycles_sync();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379
380 if (vxtime.mode == VXTIME_HPET) {
381 if (offset - vxtime.last > hpet_tick) {
382 lost = (offset - vxtime.last) / hpet_tick - 1;
383 }
384
385 monotonic_base +=
Vojtech Pavlik42211332006-06-26 13:58:32 +0200386 (offset - vxtime.last) * NSEC_PER_TICK / hpet_tick;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387
388 vxtime.last = offset;
Andi Kleen312df5f2005-05-16 21:53:28 -0700389#ifdef CONFIG_X86_PM_TIMER
390 } else if (vxtime.mode == VXTIME_PMTMR) {
391 lost = pmtimer_mark_offset();
392#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 } else {
394 offset = (((tsc - vxtime.last_tsc) *
Vojtech Pavlik42211332006-06-26 13:58:32 +0200395 vxtime.tsc_quot) >> US_SCALE) - USEC_PER_TICK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396
397 if (offset < 0)
398 offset = 0;
399
Vojtech Pavlik42211332006-06-26 13:58:32 +0200400 if (offset > USEC_PER_TICK) {
401 lost = offset / USEC_PER_TICK;
402 offset %= USEC_PER_TICK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403 }
404
Dimitri Sivanichcbf9b4b2006-09-26 10:52:34 +0200405 monotonic_base += cycles_2_ns(tsc - vxtime.last_tsc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406
407 vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
408
409 if ((((tsc - vxtime.last_tsc) *
Vojtech Pavlik42211332006-06-26 13:58:32 +0200410 vxtime.tsc_quot) >> US_SCALE) < offset)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411 vxtime.last_tsc = tsc -
Vojtech Pavlik42211332006-06-26 13:58:32 +0200412 (((long) offset << US_SCALE) / vxtime.tsc_quot) - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413 }
414
Atsushi Nemoto3171a032006-09-29 02:00:32 -0700415 if (lost > 0)
David Howells7d12e782006-10-05 14:55:46 +0100416 handle_lost_ticks(lost);
Atsushi Nemoto3171a032006-09-29 02:00:32 -0700417 else
418 lost = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419
420/*
421 * Do the timer stuff.
422 */
423
Atsushi Nemoto3171a032006-09-29 02:00:32 -0700424 do_timer(lost + 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425#ifndef CONFIG_SMP
David Howells7d12e782006-10-05 14:55:46 +0100426 update_process_times(user_mode(get_irq_regs()));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427#endif
428
429/*
430 * In the SMP case we use the local APIC timer interrupt to do the profiling,
431 * except when we simulate SMP mode on a uniprocessor system, in that case we
432 * have to call the local interrupt handler.
433 */
434
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 if (!using_apic_timer)
David Howells7d12e782006-10-05 14:55:46 +0100436 smp_local_timer_interrupt();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437
438/*
439 * If we have an externally synchronized Linux clock, then update CMOS clock
440 * accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy
441 * closest to exactly 500 ms before the next second. If the update fails, we
442 * don't care, as it'll be updated on the next turn, and the problem (time way
443 * off) isn't likely to go away much sooner anyway.
444 */
445
john stultzb149ee22005-09-06 15:17:46 -0700446 if (ntp_synced() && xtime.tv_sec > rtc_update &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
448 set_rtc_mmss(xtime.tv_sec);
449 rtc_update = xtime.tv_sec + 660;
450 }
451
452 write_sequnlock(&xtime_lock);
Andi Kleen73dea472006-02-03 21:50:50 +0100453}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454
David Howells7d12e782006-10-05 14:55:46 +0100455static irqreturn_t timer_interrupt(int irq, void *dev_id)
Andi Kleen73dea472006-02-03 21:50:50 +0100456{
457 if (apic_runs_main_timer > 1)
458 return IRQ_HANDLED;
David Howells7d12e782006-10-05 14:55:46 +0100459 main_timer_handler();
Venkatesh Pallipadid25bf7e2006-01-11 22:44:24 +0100460 if (using_apic_timer)
461 smp_send_timer_broadcast_ipi();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 return IRQ_HANDLED;
463}
464
Ravikiran G Thirumalai68ed0042006-03-22 00:07:38 -0800465static unsigned int cyc2ns_scale __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466
Mathieu Desnoyersdacb16b2005-10-30 14:59:25 -0800467static inline void set_cyc2ns_scale(unsigned long cpu_khz)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468{
Vojtech Pavlik42211332006-06-26 13:58:32 +0200469 cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470}
471
472static inline unsigned long long cycles_2_ns(unsigned long long cyc)
473{
Vojtech Pavlik42211332006-06-26 13:58:32 +0200474 return (cyc * cyc2ns_scale) >> NS_SCALE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475}
476
477unsigned long long sched_clock(void)
478{
479 unsigned long a = 0;
480
481#if 0
482 /* Don't do a HPET read here. Using TSC always is much faster
483 and HPET may not be mapped yet when the scheduler first runs.
484 Disadvantage is a small drift between CPUs in some configurations,
485 but that should be tolerable. */
486 if (__vxtime.mode == VXTIME_HPET)
Vojtech Pavlik42211332006-06-26 13:58:32 +0200487 return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> US_SCALE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488#endif
489
490 /* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
491 which means it is not completely exact and may not be monotonous between
492 CPUs. But the errors should be too small to matter for scheduling
493 purposes. */
494
495 rdtscll(a);
496 return cycles_2_ns(a);
497}
498
Andi Kleenbdf2b1c2006-01-11 22:46:39 +0100499static unsigned long get_cmos_time(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500{
Matt Mackall641f71f2006-03-28 01:56:01 -0800501 unsigned int year, mon, day, hour, min, sec;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502 unsigned long flags;
Alexey Starikovskiyad718602007-02-02 19:48:19 +0300503 unsigned century = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505 spin_lock_irqsave(&rtc_lock, flags);
506
Matt Mackall641f71f2006-03-28 01:56:01 -0800507 do {
508 sec = CMOS_READ(RTC_SECONDS);
509 min = CMOS_READ(RTC_MINUTES);
510 hour = CMOS_READ(RTC_HOURS);
511 day = CMOS_READ(RTC_DAY_OF_MONTH);
512 mon = CMOS_READ(RTC_MONTH);
513 year = CMOS_READ(RTC_YEAR);
Andi Kleen6954bee2006-03-25 16:30:31 +0100514#ifdef CONFIG_ACPI
Alexey Starikovskiyad718602007-02-02 19:48:19 +0300515 if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
516 acpi_gbl_FADT.century)
517 century = CMOS_READ(acpi_gbl_FADT.century);
Andi Kleen6954bee2006-03-25 16:30:31 +0100518#endif
Matt Mackall641f71f2006-03-28 01:56:01 -0800519 } while (sec != CMOS_READ(RTC_SECONDS));
Andi Kleen6954bee2006-03-25 16:30:31 +0100520
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521 spin_unlock_irqrestore(&rtc_lock, flags);
522
Andi Kleen0b913172006-01-11 22:45:33 +0100523 /*
524 * We know that x86-64 always uses BCD format, no need to check the
525 * config register.
Andi Kleen7351c0b2006-03-25 16:30:34 +0100526 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527
Andi Kleen0b913172006-01-11 22:45:33 +0100528 BCD_TO_BIN(sec);
529 BCD_TO_BIN(min);
530 BCD_TO_BIN(hour);
531 BCD_TO_BIN(day);
532 BCD_TO_BIN(mon);
533 BCD_TO_BIN(year);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534
Alexey Starikovskiyad718602007-02-02 19:48:19 +0300535 if (century) {
536 BCD_TO_BIN(century);
537 year += century * 100;
538 printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
Andi Kleen6954bee2006-03-25 16:30:31 +0100539 } else {
540 /*
541 * x86-64 systems only exists since 2002.
542 * This will work up to Dec 31, 2100
543 */
544 year += 2000;
545 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546
547 return mktime(year, mon, day, hour, min, sec);
548}
549
550#ifdef CONFIG_CPU_FREQ
551
552/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
553 changes.
554
555 RED-PEN: On SMP we assume all CPUs run with the same frequency. It's
556 not that important because current Opteron setups do not support
557 scaling on SMP anyroads.
558
559 Should fix up last_tsc too. Currently gettimeofday in the
560 first tick after the change will be slightly wrong. */
561
562#include <linux/workqueue.h>
563
564static unsigned int cpufreq_delayed_issched = 0;
565static unsigned int cpufreq_init = 0;
566static struct work_struct cpufreq_delayed_get_work;
567
David Howells65f27f32006-11-22 14:55:48 +0000568static void handle_cpufreq_delayed_get(struct work_struct *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569{
570 unsigned int cpu;
571 for_each_online_cpu(cpu) {
572 cpufreq_get(cpu);
573 }
574 cpufreq_delayed_issched = 0;
575}
576
577/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
578 * to verify the CPU frequency the timing core thinks the CPU is running
579 * at is still correct.
580 */
581static void cpufreq_delayed_get(void)
582{
583 static int warned;
584 if (cpufreq_init && !cpufreq_delayed_issched) {
585 cpufreq_delayed_issched = 1;
586 if (!warned) {
587 warned = 1;
Andi Kleen7351c0b2006-03-25 16:30:34 +0100588 printk(KERN_DEBUG
589 "Losing some ticks... checking if CPU frequency changed.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590 }
591 schedule_work(&cpufreq_delayed_get_work);
592 }
593}
594
595static unsigned int ref_freq = 0;
596static unsigned long loops_per_jiffy_ref = 0;
597
598static unsigned long cpu_khz_ref = 0;
599
600static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
601 void *data)
602{
603 struct cpufreq_freqs *freq = data;
604 unsigned long *lpj, dummy;
605
Andi Kleenc29601e2005-04-16 15:25:05 -0700606 if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC))
607 return 0;
608
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 lpj = &dummy;
610 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
611#ifdef CONFIG_SMP
Andi Kleen7351c0b2006-03-25 16:30:34 +0100612 lpj = &cpu_data[freq->cpu].loops_per_jiffy;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613#else
Andi Kleen7351c0b2006-03-25 16:30:34 +0100614 lpj = &boot_cpu_data.loops_per_jiffy;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615#endif
616
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617 if (!ref_freq) {
618 ref_freq = freq->old;
619 loops_per_jiffy_ref = *lpj;
620 cpu_khz_ref = cpu_khz;
621 }
622 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
623 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
624 (val == CPUFREQ_RESUMECHANGE)) {
625 *lpj =
626 cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
627
628 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
629 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
Vojtech Pavlik42211332006-06-26 13:58:32 +0200630 vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 }
632
Mathieu Desnoyersdacb16b2005-10-30 14:59:25 -0800633 set_cyc2ns_scale(cpu_khz_ref);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634
635 return 0;
636}
637
638static struct notifier_block time_cpufreq_notifier_block = {
639 .notifier_call = time_cpufreq_notifier
640};
641
642static int __init cpufreq_tsc(void)
643{
David Howells65f27f32006-11-22 14:55:48 +0000644 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
646 CPUFREQ_TRANSITION_NOTIFIER))
647 cpufreq_init = 1;
648 return 0;
649}
650
651core_initcall(cpufreq_tsc);
652
653#endif
654
655/*
656 * calibrate_tsc() calibrates the processor TSC in a very simple way, comparing
657 * it to the HPET timer of known frequency.
658 */
659
660#define TICK_COUNT 100000000
Jack Steinered5316d2007-01-11 01:52:44 +0100661#define TICK_MIN 5000
Jack Steiner2f7a2a72007-02-13 13:26:24 +0100662#define MAX_READ_RETRIES 5
Jack Steinered5316d2007-01-11 01:52:44 +0100663
664/*
665 * Some platforms take periodic SMI interrupts with 5ms duration. Make sure none
666 * occurs between the reads of the hpet & TSC.
667 */
668static void __init read_hpet_tsc(int *hpet, int *tsc)
669{
Jack Steiner2f7a2a72007-02-13 13:26:24 +0100670 int tsc1, tsc2, hpet1, retries = 0;
671 static int msg;
Jack Steinered5316d2007-01-11 01:52:44 +0100672
673 do {
674 tsc1 = get_cycles_sync();
675 hpet1 = hpet_readl(HPET_COUNTER);
676 tsc2 = get_cycles_sync();
Jack Steiner2f7a2a72007-02-13 13:26:24 +0100677 } while (tsc2 - tsc1 > TICK_MIN && retries++ < MAX_READ_RETRIES);
678 if (retries >= MAX_READ_RETRIES && !msg++)
679 printk(KERN_WARNING
680 "hpet.c: exceeded max retries to read HPET & TSC\n");
Jack Steinered5316d2007-01-11 01:52:44 +0100681 *hpet = hpet1;
682 *tsc = tsc2;
683}
684
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685
686static unsigned int __init hpet_calibrate_tsc(void)
687{
688 int tsc_start, hpet_start;
689 int tsc_now, hpet_now;
690 unsigned long flags;
691
692 local_irq_save(flags);
693 local_irq_disable();
694
Jack Steinered5316d2007-01-11 01:52:44 +0100695 read_hpet_tsc(&hpet_start, &tsc_start);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696
697 do {
698 local_irq_disable();
Jack Steinered5316d2007-01-11 01:52:44 +0100699 read_hpet_tsc(&hpet_now, &tsc_now);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700 local_irq_restore(flags);
701 } while ((tsc_now - tsc_start) < TICK_COUNT &&
702 (hpet_now - hpet_start) < TICK_COUNT);
703
704 return (tsc_now - tsc_start) * 1000000000L
705 / ((hpet_now - hpet_start) * hpet_period / 1000);
706}
707
708
709/*
710 * pit_calibrate_tsc() uses the speaker output (channel 2) of
711 * the PIT. This is better than using the timer interrupt output,
712 * because we can read the value of the speaker with just one inb(),
713 * where we need three i/o operations for the interrupt channel.
714 * We count how many ticks the TSC does in 50 ms.
715 */
716
717static unsigned int __init pit_calibrate_tsc(void)
718{
719 unsigned long start, end;
720 unsigned long flags;
721
722 spin_lock_irqsave(&i8253_lock, flags);
723
724 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
725
726 outb(0xb0, 0x43);
727 outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
728 outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42);
Andi Kleenc818a182006-01-11 22:45:24 +0100729 start = get_cycles_sync();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 while ((inb(0x61) & 0x20) == 0);
Andi Kleenc818a182006-01-11 22:45:24 +0100731 end = get_cycles_sync();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732
733 spin_unlock_irqrestore(&i8253_lock, flags);
734
735 return (end - start) / 50;
736}
737
738#ifdef CONFIG_HPET
739static __init int late_hpet_init(void)
740{
741 struct hpet_data hd;
742 unsigned int ntimer;
743
john stultz2d0c87c2007-02-16 01:28:18 -0800744 if (!hpet_address)
Andi Kleen3d34ee62006-04-07 19:50:06 +0200745 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746
747 memset(&hd, 0, sizeof (hd));
748
749 ntimer = hpet_readl(HPET_ID);
750 ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
751 ntimer++;
752
753 /*
754 * Register with driver.
755 * Timer0 and Timer1 is used by platform.
756 */
john stultz2d0c87c2007-02-16 01:28:18 -0800757 hd.hd_phys_address = hpet_address;
Al Virodd42b152006-02-01 07:30:33 -0500758 hd.hd_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 hd.hd_nirqs = ntimer;
760 hd.hd_flags = HPET_DATA_PLATFORM;
761 hpet_reserve_timer(&hd, 0);
762#ifdef CONFIG_HPET_EMULATE_RTC
763 hpet_reserve_timer(&hd, 1);
764#endif
765 hd.hd_irq[0] = HPET_LEGACY_8254;
766 hd.hd_irq[1] = HPET_LEGACY_RTC;
767 if (ntimer > 2) {
768 struct hpet *hpet;
769 struct hpet_timer *timer;
770 int i;
771
772 hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE);
Andi Kleen7351c0b2006-03-25 16:30:34 +0100773 timer = &hpet->hpet_timers[2];
774 for (i = 2; i < ntimer; timer++, i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775 hd.hd_irq[i] = (timer->hpet_config &
776 Tn_INT_ROUTE_CNF_MASK) >>
777 Tn_INT_ROUTE_CNF_SHIFT;
778
779 }
780
781 hpet_alloc(&hd);
782 return 0;
783}
784fs_initcall(late_hpet_init);
785#endif
786
787static int hpet_timer_stop_set_go(unsigned long tick)
788{
789 unsigned int cfg;
790
791/*
792 * Stop the timers and reset the main counter.
793 */
794
795 cfg = hpet_readl(HPET_CFG);
796 cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
797 hpet_writel(cfg, HPET_CFG);
798 hpet_writel(0, HPET_COUNTER);
799 hpet_writel(0, HPET_COUNTER + 4);
800
801/*
802 * Set up timer 0, as periodic with first interrupt to happen at hpet_tick,
803 * and period also hpet_tick.
804 */
john stultza3a00752005-06-23 00:08:36 -0700805 if (hpet_use_timer) {
806 hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 HPET_TN_32BIT, HPET_T0_CFG);
Vojtech Pavlikb2df3dd2006-06-26 13:58:35 +0200808 hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */
809 hpet_writel(hpet_tick, HPET_T0_CMP); /* period */
john stultza3a00752005-06-23 00:08:36 -0700810 cfg |= HPET_CFG_LEGACY;
811 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812/*
813 * Go!
814 */
815
john stultza3a00752005-06-23 00:08:36 -0700816 cfg |= HPET_CFG_ENABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817 hpet_writel(cfg, HPET_CFG);
818
819 return 0;
820}
821
822static int hpet_init(void)
823{
824 unsigned int id;
825
john stultz2d0c87c2007-02-16 01:28:18 -0800826 if (!hpet_address)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 return -1;
john stultz2d0c87c2007-02-16 01:28:18 -0800828 set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
829 __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830
831/*
832 * Read the period, compute tick and quotient.
833 */
834
835 id = hpet_readl(HPET_ID);
836
john stultza3a00752005-06-23 00:08:36 -0700837 if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838 return -1;
839
840 hpet_period = hpet_readl(HPET_PERIOD);
841 if (hpet_period < 100000 || hpet_period > 100000000)
842 return -1;
843
Vojtech Pavlik42211332006-06-26 13:58:32 +0200844 hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845
john stultza3a00752005-06-23 00:08:36 -0700846 hpet_use_timer = (id & HPET_ID_LEGSUP);
847
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 return hpet_timer_stop_set_go(hpet_tick);
849}
850
851static int hpet_reenable(void)
852{
853 return hpet_timer_stop_set_go(hpet_tick);
854}
855
Andi Kleen73dea472006-02-03 21:50:50 +0100856#define PIT_MODE 0x43
857#define PIT_CH0 0x40
858
859static void __init __pit_init(int val, u8 mode)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860{
861 unsigned long flags;
862
863 spin_lock_irqsave(&i8253_lock, flags);
Andi Kleen73dea472006-02-03 21:50:50 +0100864 outb_p(mode, PIT_MODE);
865 outb_p(val & 0xff, PIT_CH0); /* LSB */
866 outb_p(val >> 8, PIT_CH0); /* MSB */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867 spin_unlock_irqrestore(&i8253_lock, flags);
868}
869
Andi Kleen73dea472006-02-03 21:50:50 +0100870void __init pit_init(void)
871{
872 __pit_init(LATCH, 0x34); /* binary, mode 2, LSB/MSB, ch 0 */
873}
874
875void __init pit_stop_interrupt(void)
876{
877 __pit_init(0, 0x30); /* mode 0 */
878}
879
880void __init stop_timer_interrupt(void)
881{
882 char *name;
john stultz2d0c87c2007-02-16 01:28:18 -0800883 if (hpet_address) {
Andi Kleen73dea472006-02-03 21:50:50 +0100884 name = "HPET";
885 hpet_timer_stop_set_go(0);
886 } else {
887 name = "PIT";
888 pit_stop_interrupt();
889 }
890 printk(KERN_INFO "timer: %s interrupt stopped.\n", name);
891}
892
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893int __init time_setup(char *str)
894{
895 report_lost_ticks = 1;
896 return 1;
897}
898
899static struct irqaction irq0 = {
Thomas Gleixnerb1e05aa2006-07-01 19:29:29 -0700900 timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901};
902
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903void __init time_init(void)
904{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 if (nohpet)
john stultz2d0c87c2007-02-16 01:28:18 -0800906 hpet_address = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907 xtime.tv_sec = get_cmos_time();
908 xtime.tv_nsec = 0;
909
910 set_normalized_timespec(&wall_to_monotonic,
911 -xtime.tv_sec, -xtime.tv_nsec);
912
john stultza3a00752005-06-23 00:08:36 -0700913 if (!hpet_init())
Vojtech Pavlik42211332006-06-26 13:58:32 +0200914 vxtime_hz = (FSEC_PER_SEC + hpet_period / 2) / hpet_period;
Andi Kleen68e18892005-12-12 22:17:07 -0800915 else
john stultz2d0c87c2007-02-16 01:28:18 -0800916 hpet_address = 0;
john stultza3a00752005-06-23 00:08:36 -0700917
918 if (hpet_use_timer) {
Jordan Hargraveb20367a2006-04-07 19:50:18 +0200919 /* set tick_nsec to use the proper rate for HPET */
920 tick_nsec = TICK_NSEC_HPET;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 cpu_khz = hpet_calibrate_tsc();
922 timename = "HPET";
Andi Kleen312df5f2005-05-16 21:53:28 -0700923#ifdef CONFIG_X86_PM_TIMER
john stultz2d0c87c2007-02-16 01:28:18 -0800924 } else if (pmtmr_ioport && !hpet_address) {
Andi Kleen312df5f2005-05-16 21:53:28 -0700925 vxtime_hz = PM_TIMER_FREQUENCY;
926 timename = "PM";
927 pit_init();
928 cpu_khz = pit_calibrate_tsc();
929#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 } else {
931 pit_init();
932 cpu_khz = pit_calibrate_tsc();
933 timename = "PIT";
934 }
935
Andi Kleene8b91772006-02-26 04:18:49 +0100936 vxtime.mode = VXTIME_TSC;
Vojtech Pavlik42211332006-06-26 13:58:32 +0200937 vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
938 vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
Andi Kleenc818a182006-01-11 22:45:24 +0100939 vxtime.last_tsc = get_cycles_sync();
Mathieu Desnoyersdacb16b2005-10-30 14:59:25 -0800940 set_cyc2ns_scale(cpu_khz);
Dimitri Sivanichcbf9b4b2006-09-26 10:52:34 +0200941 setup_irq(0, &irq0);
Vojtech Pavlika670fad2006-09-26 10:52:28 +0200942
943#ifndef CONFIG_SMP
944 time_init_gtod();
945#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946}
947
Ingo Molnar95492e42007-02-16 01:27:34 -0800948static int tsc_unstable = 0;
949
950void mark_tsc_unstable(void)
951{
952 tsc_unstable = 1;
953}
954EXPORT_SYMBOL_GPL(mark_tsc_unstable);
955
Andi Kleena8ab26f2005-04-16 15:25:19 -0700956/*
Andi Kleen312df5f2005-05-16 21:53:28 -0700957 * Make an educated guess if the TSC is trustworthy and synchronized
958 * over all CPUs.
959 */
Shaohua Li396bd502006-02-03 21:51:20 +0100960__cpuinit int unsynchronized_tsc(void)
Andi Kleen312df5f2005-05-16 21:53:28 -0700961{
Ingo Molnar95492e42007-02-16 01:27:34 -0800962 if (tsc_unstable)
963 return 1;
964
Andi Kleen312df5f2005-05-16 21:53:28 -0700965#ifdef CONFIG_SMP
Vojtech Pavlikf8bf3c62006-06-26 13:58:23 +0200966 if (apic_is_clustered_box())
Andi Kleen312df5f2005-05-16 21:53:28 -0700967 return 1;
Andi Kleen312df5f2005-05-16 21:53:28 -0700968#endif
Andi Kleen0e5f61b2006-07-29 21:42:37 +0200969 /* Most intel systems have synchronized TSCs except for
970 multi node systems */
971 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
972#ifdef CONFIG_ACPI
973 /* But TSC doesn't tick in C3 so don't use it there */
Alexey Starikovskiyad718602007-02-02 19:48:19 +0300974 if (acpi_gbl_FADT.header.length > 0 && acpi_gbl_FADT.C3latency < 1000)
Andi Kleen0e5f61b2006-07-29 21:42:37 +0200975 return 1;
976#endif
977 return 0;
978 }
979
Andi Kleen312df5f2005-05-16 21:53:28 -0700980 /* Assume multi socket systems are not synchronized */
Andi Kleen737c5c32006-01-11 22:45:15 +0100981 return num_present_cpus() > 1;
Andi Kleen312df5f2005-05-16 21:53:28 -0700982}
983
984/*
Andi Kleene8b91772006-02-26 04:18:49 +0100985 * Decide what mode gettimeofday should use.
Andi Kleena8ab26f2005-04-16 15:25:19 -0700986 */
Vojtech Pavlika670fad2006-09-26 10:52:28 +0200987void time_init_gtod(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988{
989 char *timetype;
990
Andi Kleen312df5f2005-05-16 21:53:28 -0700991 if (unsynchronized_tsc())
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 notsc = 1;
Vojtech Pavlika670fad2006-09-26 10:52:28 +0200993
john stultz2d0c87c2007-02-16 01:28:18 -0800994 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
Vojtech Pavlikc08c8202006-09-26 10:52:28 +0200995 vgetcpu_mode = VGETCPU_RDTSCP;
996 else
997 vgetcpu_mode = VGETCPU_LSL;
998
john stultz2d0c87c2007-02-16 01:28:18 -0800999 if (hpet_address && notsc) {
john stultza3a00752005-06-23 00:08:36 -07001000 timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
Chris McDermott33042a92006-02-11 17:55:50 -08001001 if (hpet_use_timer)
1002 vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
1003 else
1004 vxtime.last = hpet_readl(HPET_COUNTER);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 vxtime.mode = VXTIME_HPET;
john stultz2d0c87c2007-02-16 01:28:18 -08001006 vxtime.hpet_address = hpet_address;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007 do_gettimeoffset = do_gettimeoffset_hpet;
Andi Kleen312df5f2005-05-16 21:53:28 -07001008#ifdef CONFIG_X86_PM_TIMER
1009 /* Using PM for gettimeofday is quite slow, but we have no other
1010 choice because the TSC is too unreliable on some systems. */
john stultz2d0c87c2007-02-16 01:28:18 -08001011 } else if (pmtmr_ioport && !hpet_address && notsc) {
Andi Kleen312df5f2005-05-16 21:53:28 -07001012 timetype = "PM";
1013 do_gettimeoffset = do_gettimeoffset_pm;
1014 vxtime.mode = VXTIME_PMTMR;
1015 sysctl_vsyscall = 0;
1016 printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n");
1017#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018 } else {
john stultza3a00752005-06-23 00:08:36 -07001019 timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC";
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020 vxtime.mode = VXTIME_TSC;
1021 }
Vojtech Pavlika670fad2006-09-26 10:52:28 +02001022
1023 printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n",
1024 vxtime_hz / 1000000, vxtime_hz % 1000000, timename, timetype);
1025 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
1026 cpu_khz / 1000, cpu_khz % 1000);
1027 vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
1028 vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
1029 vxtime.last_tsc = get_cycles_sync();
1030
1031 set_cyc2ns_scale(cpu_khz);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032}
1033
1034__setup("report_lost_ticks", time_setup);
1035
1036static long clock_cmos_diff;
1037static unsigned long sleep_start;
1038
Andi Kleen0b913172006-01-11 22:45:33 +01001039/*
1040 * sysfs support for the timer.
1041 */
1042
Pavel Machek0b9c33a2005-04-16 15:25:31 -07001043static int timer_suspend(struct sys_device *dev, pm_message_t state)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044{
1045 /*
1046 * Estimate time zone so that set_time can update the clock
1047 */
1048 long cmos_time = get_cmos_time();
1049
1050 clock_cmos_diff = -cmos_time;
1051 clock_cmos_diff += get_seconds();
1052 sleep_start = cmos_time;
1053 return 0;
1054}
1055
1056static int timer_resume(struct sys_device *dev)
1057{
1058 unsigned long flags;
1059 unsigned long sec;
1060 unsigned long ctime = get_cmos_time();
Rafael J. Wysocki34464a52006-09-26 10:52:37 +02001061 long sleep_length = (ctime - sleep_start) * HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062
Rafael J. Wysocki34464a52006-09-26 10:52:37 +02001063 if (sleep_length < 0) {
1064 printk(KERN_WARNING "Time skew detected in timer resume!\n");
1065 /* The time after the resume must not be earlier than the time
1066 * before the suspend or some nasty things will happen
1067 */
1068 sleep_length = 0;
1069 ctime = sleep_start;
1070 }
john stultz2d0c87c2007-02-16 01:28:18 -08001071 if (hpet_address)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072 hpet_reenable();
1073 else
1074 i8254_timer_resume();
1075
1076 sec = ctime + clock_cmos_diff;
1077 write_seqlock_irqsave(&xtime_lock,flags);
1078 xtime.tv_sec = sec;
1079 xtime.tv_nsec = 0;
Shaohua Li0dd2ea92006-02-03 21:50:56 +01001080 if (vxtime.mode == VXTIME_HPET) {
1081 if (hpet_use_timer)
1082 vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
1083 else
1084 vxtime.last = hpet_readl(HPET_COUNTER);
1085#ifdef CONFIG_X86_PM_TIMER
1086 } else if (vxtime.mode == VXTIME_PMTMR) {
1087 pmtimer_resume();
1088#endif
1089 } else
1090 vxtime.last_tsc = get_cycles_sync();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091 write_sequnlock_irqrestore(&xtime_lock,flags);
1092 jiffies += sleep_length;
Shaohua Li0dd2ea92006-02-03 21:50:56 +01001093 monotonic_base += sleep_length * (NSEC_PER_SEC/HZ);
Ingo Molnar8446f1d2005-09-06 15:16:27 -07001094 touch_softlockup_watchdog();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095 return 0;
1096}
1097
1098static struct sysdev_class timer_sysclass = {
1099 .resume = timer_resume,
1100 .suspend = timer_suspend,
1101 set_kset_name("timer"),
1102};
1103
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104/* XXX this driverfs stuff should probably go elsewhere later -john */
1105static struct sys_device device_timer = {
1106 .id = 0,
1107 .cls = &timer_sysclass,
1108};
1109
1110static int time_init_device(void)
1111{
1112 int error = sysdev_class_register(&timer_sysclass);
1113 if (!error)
1114 error = sysdev_register(&device_timer);
1115 return error;
1116}
1117
1118device_initcall(time_init_device);
1119
1120#ifdef CONFIG_HPET_EMULATE_RTC
1121/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
1122 * is enabled, we support RTC interrupt functionality in software.
1123 * RTC has 3 kinds of interrupts:
1124 * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
1125 * is updated
1126 * 2) Alarm Interrupt - generate an interrupt at a specific time of day
1127 * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
1128 * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
1129 * (1) and (2) above are implemented using polling at a frequency of
1130 * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
1131 * overhead. (DEFAULT_RTC_INT_FREQ)
1132 * For (3), we use interrupts at 64Hz or user specified periodic
1133 * frequency, whichever is higher.
1134 */
1135#include <linux/rtc.h>
1136
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137#define DEFAULT_RTC_INT_FREQ 64
1138#define RTC_NUM_INTS 1
1139
1140static unsigned long UIE_on;
1141static unsigned long prev_update_sec;
1142
1143static unsigned long AIE_on;
1144static struct rtc_time alarm_time;
1145
1146static unsigned long PIE_on;
1147static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ;
1148static unsigned long PIE_count;
1149
1150static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */
Clemens Ladisch7811fb82005-10-30 15:03:36 -08001151static unsigned int hpet_t1_cmp; /* cached comparator register */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152
1153int is_hpet_enabled(void)
1154{
john stultz2d0c87c2007-02-16 01:28:18 -08001155 return hpet_address != 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156}
1157
1158/*
1159 * Timer 1 for RTC, we do not use periodic interrupt feature,
1160 * even if HPET supports periodic interrupts on Timer 1.
1161 * The reason being, to set up a periodic interrupt in HPET, we need to
1162 * stop the main counter. And if we do that everytime someone diables/enables
1163 * RTC, we will have adverse effect on main kernel timer running on Timer 0.
1164 * So, for the time being, simulate the periodic interrupt in software.
1165 *
1166 * hpet_rtc_timer_init() is called for the first time and during subsequent
1167 * interuppts reinit happens through hpet_rtc_timer_reinit().
1168 */
1169int hpet_rtc_timer_init(void)
1170{
1171 unsigned int cfg, cnt;
1172 unsigned long flags;
1173
1174 if (!is_hpet_enabled())
1175 return 0;
1176 /*
1177 * Set the counter 1 and enable the interrupts.
1178 */
1179 if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
1180 hpet_rtc_int_freq = PIE_freq;
1181 else
1182 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
1183
1184 local_irq_save(flags);
Clemens Ladisch1447c272006-09-25 23:32:17 -07001185
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 cnt = hpet_readl(HPET_COUNTER);
1187 cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
1188 hpet_writel(cnt, HPET_T1_CMP);
Clemens Ladisch7811fb82005-10-30 15:03:36 -08001189 hpet_t1_cmp = cnt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190
1191 cfg = hpet_readl(HPET_T1_CFG);
Clemens Ladisch5f819942005-10-30 15:03:36 -08001192 cfg &= ~HPET_TN_PERIODIC;
1193 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 hpet_writel(cfg, HPET_T1_CFG);
1195
Clemens Ladisch1447c272006-09-25 23:32:17 -07001196 local_irq_restore(flags);
1197
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198 return 1;
1199}
1200
1201static void hpet_rtc_timer_reinit(void)
1202{
Clemens Ladisch1447c272006-09-25 23:32:17 -07001203 unsigned int cfg, cnt, ticks_per_int, lost_ints;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204
Clemens Ladischf00c96f2005-10-30 15:03:35 -08001205 if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
1206 cfg = hpet_readl(HPET_T1_CFG);
1207 cfg &= ~HPET_TN_ENABLE;
1208 hpet_writel(cfg, HPET_T1_CFG);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 return;
Clemens Ladischf00c96f2005-10-30 15:03:35 -08001210 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211
1212 if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
1213 hpet_rtc_int_freq = PIE_freq;
1214 else
1215 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
1216
1217 /* It is more accurate to use the comparator value than current count.*/
Clemens Ladisch1447c272006-09-25 23:32:17 -07001218 ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq;
1219 hpet_t1_cmp += ticks_per_int;
1220 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
1221
1222 /*
1223 * If the interrupt handler was delayed too long, the write above tries
1224 * to schedule the next interrupt in the past and the hardware would
1225 * not interrupt until the counter had wrapped around.
1226 * So we have to check that the comparator wasn't set to a past time.
1227 */
1228 cnt = hpet_readl(HPET_COUNTER);
1229 if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) {
1230 lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1;
1231 /* Make sure that, even with the time needed to execute
1232 * this code, the next scheduled interrupt has been moved
1233 * back to the future: */
1234 lost_ints++;
1235
1236 hpet_t1_cmp += lost_ints * ticks_per_int;
1237 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
1238
1239 if (PIE_on)
1240 PIE_count += lost_ints;
1241
Roland Dreier3e94fb82007-02-13 13:26:25 +01001242 if (printk_ratelimit())
1243 printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
1244 hpet_rtc_int_freq);
Clemens Ladisch1447c272006-09-25 23:32:17 -07001245 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246}
1247
1248/*
1249 * The functions below are called from rtc driver.
1250 * Return 0 if HPET is not being used.
1251 * Otherwise do the necessary changes and return 1.
1252 */
1253int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
1254{
1255 if (!is_hpet_enabled())
1256 return 0;
1257
1258 if (bit_mask & RTC_UIE)
1259 UIE_on = 0;
1260 if (bit_mask & RTC_PIE)
1261 PIE_on = 0;
1262 if (bit_mask & RTC_AIE)
1263 AIE_on = 0;
1264
1265 return 1;
1266}
1267
1268int hpet_set_rtc_irq_bit(unsigned long bit_mask)
1269{
1270 int timer_init_reqd = 0;
1271
1272 if (!is_hpet_enabled())
1273 return 0;
1274
1275 if (!(PIE_on | AIE_on | UIE_on))
1276 timer_init_reqd = 1;
1277
1278 if (bit_mask & RTC_UIE) {
1279 UIE_on = 1;
1280 }
1281 if (bit_mask & RTC_PIE) {
1282 PIE_on = 1;
1283 PIE_count = 0;
1284 }
1285 if (bit_mask & RTC_AIE) {
1286 AIE_on = 1;
1287 }
1288
1289 if (timer_init_reqd)
1290 hpet_rtc_timer_init();
1291
1292 return 1;
1293}
1294
1295int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec)
1296{
1297 if (!is_hpet_enabled())
1298 return 0;
1299
1300 alarm_time.tm_hour = hrs;
1301 alarm_time.tm_min = min;
1302 alarm_time.tm_sec = sec;
1303
1304 return 1;
1305}
1306
1307int hpet_set_periodic_freq(unsigned long freq)
1308{
1309 if (!is_hpet_enabled())
1310 return 0;
1311
1312 PIE_freq = freq;
1313 PIE_count = 0;
1314
1315 return 1;
1316}
1317
1318int hpet_rtc_dropped_irq(void)
1319{
1320 if (!is_hpet_enabled())
1321 return 0;
1322
1323 return 1;
1324}
1325
1326irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
1327{
1328 struct rtc_time curr_time;
1329 unsigned long rtc_int_flag = 0;
1330 int call_rtc_interrupt = 0;
1331
1332 hpet_rtc_timer_reinit();
1333
1334 if (UIE_on | AIE_on) {
1335 rtc_get_rtc_time(&curr_time);
1336 }
1337 if (UIE_on) {
1338 if (curr_time.tm_sec != prev_update_sec) {
1339 /* Set update int info, call real rtc int routine */
1340 call_rtc_interrupt = 1;
1341 rtc_int_flag = RTC_UF;
1342 prev_update_sec = curr_time.tm_sec;
1343 }
1344 }
1345 if (PIE_on) {
1346 PIE_count++;
1347 if (PIE_count >= hpet_rtc_int_freq/PIE_freq) {
1348 /* Set periodic int info, call real rtc int routine */
1349 call_rtc_interrupt = 1;
1350 rtc_int_flag |= RTC_PF;
1351 PIE_count = 0;
1352 }
1353 }
1354 if (AIE_on) {
1355 if ((curr_time.tm_sec == alarm_time.tm_sec) &&
1356 (curr_time.tm_min == alarm_time.tm_min) &&
1357 (curr_time.tm_hour == alarm_time.tm_hour)) {
1358 /* Set alarm int info, call real rtc int routine */
1359 call_rtc_interrupt = 1;
1360 rtc_int_flag |= RTC_AF;
1361 }
1362 }
1363 if (call_rtc_interrupt) {
1364 rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
David Howells7d12e782006-10-05 14:55:46 +01001365 rtc_interrupt(rtc_int_flag, dev_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 }
1367 return IRQ_HANDLED;
1368}
1369#endif
1370
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371static int __init nohpet_setup(char *s)
1372{
1373 nohpet = 1;
OGAWA Hirofumi9b410462006-03-31 02:30:33 -08001374 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375}
1376
1377__setup("nohpet", nohpet_setup);
1378
Andi Kleen7fd67842006-02-16 23:42:07 +01001379int __init notsc_setup(char *s)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380{
1381 notsc = 1;
OGAWA Hirofumi9b410462006-03-31 02:30:33 -08001382 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383}
1384
1385__setup("notsc", notsc_setup);