| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 1 | #include <linux/kernel.h> | 
| Alok Kataria | 0ef9553 | 2008-07-01 11:43:18 -0700 | [diff] [blame] | 2 | #include <linux/sched.h> | 
|  | 3 | #include <linux/init.h> | 
|  | 4 | #include <linux/module.h> | 
|  | 5 | #include <linux/timer.h> | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 6 | #include <linux/acpi_pmtmr.h> | 
| Alok Kataria | 2dbe06f | 2008-07-01 11:43:31 -0700 | [diff] [blame] | 7 | #include <linux/cpufreq.h> | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 8 | #include <linux/dmi.h> | 
|  | 9 | #include <linux/delay.h> | 
|  | 10 | #include <linux/clocksource.h> | 
|  | 11 | #include <linux/percpu.h> | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 12 |  | 
|  | 13 | #include <asm/hpet.h> | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 14 | #include <asm/timer.h> | 
|  | 15 | #include <asm/vgtod.h> | 
|  | 16 | #include <asm/time.h> | 
|  | 17 | #include <asm/delay.h> | 
| Alok Kataria | 88b094f | 2008-10-27 10:41:46 -0700 | [diff] [blame] | 18 | #include <asm/hypervisor.h> | 
| Alok Kataria | 0ef9553 | 2008-07-01 11:43:18 -0700 | [diff] [blame] | 19 |  | 
|  | 20 | unsigned int cpu_khz;           /* TSC clocks / usec, not used here */ | 
|  | 21 | EXPORT_SYMBOL(cpu_khz); | 
|  | 22 | unsigned int tsc_khz; | 
|  | 23 | EXPORT_SYMBOL(tsc_khz); | 
|  | 24 |  | 
|  | 25 | /* | 
|  | 26 | * TSC can be unstable due to cpufreq or due to unsynced TSCs | 
|  | 27 | */ | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 28 | static int tsc_unstable; | 
| Alok Kataria | 0ef9553 | 2008-07-01 11:43:18 -0700 | [diff] [blame] | 29 |  | 
|  | 30 | /* native_sched_clock() is called before tsc_init(), so | 
|  | 31 | we must start with the TSC soft disabled to prevent | 
|  | 32 | erroneous rdtsc usage on !cpu_has_tsc processors */ | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 33 | static int tsc_disabled = -1; | 
| Alok Kataria | 0ef9553 | 2008-07-01 11:43:18 -0700 | [diff] [blame] | 34 |  | 
| Alok Kataria | 395628e | 2008-10-24 17:22:01 -0700 | [diff] [blame] | 35 | static int tsc_clocksource_reliable; | 
| Alok Kataria | 0ef9553 | 2008-07-01 11:43:18 -0700 | [diff] [blame] | 36 | /* | 
|  | 37 | * Scheduler clock - returns current time in nanosec units. | 
|  | 38 | */ | 
|  | 39 | u64 native_sched_clock(void) | 
|  | 40 | { | 
|  | 41 | u64 this_offset; | 
|  | 42 |  | 
|  | 43 | /* | 
|  | 44 | * Fall back to jiffies if there's no TSC available: | 
|  | 45 | * ( But note that we still use it if the TSC is marked | 
|  | 46 | *   unstable. We do this because unlike Time Of Day, | 
|  | 47 | *   the scheduler clock tolerates small errors and it's | 
|  | 48 | *   very important for it to be as fast as the platform | 
|  | 49 | *   can achive it. ) | 
|  | 50 | */ | 
|  | 51 | if (unlikely(tsc_disabled)) { | 
|  | 52 | /* No locking but a rare wrong value is not a big deal: */ | 
|  | 53 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); | 
|  | 54 | } | 
|  | 55 |  | 
|  | 56 | /* read the Time Stamp Counter: */ | 
|  | 57 | rdtscll(this_offset); | 
|  | 58 |  | 
|  | 59 | /* return the value in ns */ | 
| Ingo Molnar | 7cbaef9 | 2008-11-08 17:05:38 +0100 | [diff] [blame] | 60 | return __cycles_2_ns(this_offset); | 
| Alok Kataria | 0ef9553 | 2008-07-01 11:43:18 -0700 | [diff] [blame] | 61 | } | 
|  | 62 |  | 
|  | 63 | /* We need to define a real function for sched_clock, to override the | 
|  | 64 | weak default version */ | 
|  | 65 | #ifdef CONFIG_PARAVIRT | 
|  | 66 | unsigned long long sched_clock(void) | 
|  | 67 | { | 
|  | 68 | return paravirt_sched_clock(); | 
|  | 69 | } | 
|  | 70 | #else | 
|  | 71 | unsigned long long | 
|  | 72 | sched_clock(void) __attribute__((alias("native_sched_clock"))); | 
|  | 73 | #endif | 
|  | 74 |  | 
|  | 75 | int check_tsc_unstable(void) | 
|  | 76 | { | 
|  | 77 | return tsc_unstable; | 
|  | 78 | } | 
|  | 79 | EXPORT_SYMBOL_GPL(check_tsc_unstable); | 
|  | 80 |  | 
|  | 81 | #ifdef CONFIG_X86_TSC | 
|  | 82 | int __init notsc_setup(char *str) | 
|  | 83 | { | 
|  | 84 | printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " | 
|  | 85 | "cannot disable TSC completely.\n"); | 
|  | 86 | tsc_disabled = 1; | 
|  | 87 | return 1; | 
|  | 88 | } | 
|  | 89 | #else | 
|  | 90 | /* | 
|  | 91 | * disable flag for tsc. Takes effect by clearing the TSC cpu flag | 
|  | 92 | * in cpu/common.c | 
|  | 93 | */ | 
|  | 94 | int __init notsc_setup(char *str) | 
|  | 95 | { | 
|  | 96 | setup_clear_cpu_cap(X86_FEATURE_TSC); | 
|  | 97 | return 1; | 
|  | 98 | } | 
|  | 99 | #endif | 
|  | 100 |  | 
|  | 101 | __setup("notsc", notsc_setup); | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 102 |  | 
| Alok Kataria | 395628e | 2008-10-24 17:22:01 -0700 | [diff] [blame] | 103 | static int __init tsc_setup(char *str) | 
|  | 104 | { | 
|  | 105 | if (!strcmp(str, "reliable")) | 
|  | 106 | tsc_clocksource_reliable = 1; | 
|  | 107 | return 1; | 
|  | 108 | } | 
|  | 109 |  | 
|  | 110 | __setup("tsc=", tsc_setup); | 
|  | 111 |  | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 112 | #define MAX_RETRIES     5 | 
|  | 113 | #define SMI_TRESHOLD    50000 | 
|  | 114 |  | 
|  | 115 | /* | 
|  | 116 | * Read TSC and the reference counters. Take care of SMI disturbance | 
|  | 117 | */ | 
| Thomas Gleixner | 827014b | 2008-09-04 15:18:53 +0000 | [diff] [blame] | 118 | static u64 tsc_read_refs(u64 *p, int hpet) | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 119 | { | 
|  | 120 | u64 t1, t2; | 
|  | 121 | int i; | 
|  | 122 |  | 
|  | 123 | for (i = 0; i < MAX_RETRIES; i++) { | 
|  | 124 | t1 = get_cycles(); | 
|  | 125 | if (hpet) | 
| Thomas Gleixner | 827014b | 2008-09-04 15:18:53 +0000 | [diff] [blame] | 126 | *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 127 | else | 
| Thomas Gleixner | 827014b | 2008-09-04 15:18:53 +0000 | [diff] [blame] | 128 | *p = acpi_pm_read_early(); | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 129 | t2 = get_cycles(); | 
|  | 130 | if ((t2 - t1) < SMI_TRESHOLD) | 
|  | 131 | return t2; | 
|  | 132 | } | 
|  | 133 | return ULLONG_MAX; | 
|  | 134 | } | 
|  | 135 |  | 
| Linus Torvalds | ec0c15a | 2008-09-03 07:30:13 -0700 | [diff] [blame] | 136 | /* | 
| Thomas Gleixner | d683ef7 | 2008-09-04 15:18:48 +0000 | [diff] [blame] | 137 | * Calculate the TSC frequency from HPET reference | 
|  | 138 | */ | 
|  | 139 | static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) | 
|  | 140 | { | 
|  | 141 | u64 tmp; | 
|  | 142 |  | 
|  | 143 | if (hpet2 < hpet1) | 
|  | 144 | hpet2 += 0x100000000ULL; | 
|  | 145 | hpet2 -= hpet1; | 
|  | 146 | tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); | 
|  | 147 | do_div(tmp, 1000000); | 
|  | 148 | do_div(deltatsc, tmp); | 
|  | 149 |  | 
|  | 150 | return (unsigned long) deltatsc; | 
|  | 151 | } | 
|  | 152 |  | 
|  | 153 | /* | 
|  | 154 | * Calculate the TSC frequency from PMTimer reference | 
|  | 155 | */ | 
|  | 156 | static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) | 
|  | 157 | { | 
|  | 158 | u64 tmp; | 
|  | 159 |  | 
|  | 160 | if (!pm1 && !pm2) | 
|  | 161 | return ULONG_MAX; | 
|  | 162 |  | 
|  | 163 | if (pm2 < pm1) | 
|  | 164 | pm2 += (u64)ACPI_PM_OVRRUN; | 
|  | 165 | pm2 -= pm1; | 
|  | 166 | tmp = pm2 * 1000000000LL; | 
|  | 167 | do_div(tmp, PMTMR_TICKS_PER_SEC); | 
|  | 168 | do_div(deltatsc, tmp); | 
|  | 169 |  | 
|  | 170 | return (unsigned long) deltatsc; | 
|  | 171 | } | 
|  | 172 |  | 
| Thomas Gleixner | a977c40 | 2008-09-04 15:18:59 +0000 | [diff] [blame] | 173 | #define CAL_MS		10 | 
| Thomas Gleixner | cce3e05 | 2008-09-04 15:18:44 +0000 | [diff] [blame] | 174 | #define CAL_LATCH	(CLOCK_TICK_RATE / (1000 / CAL_MS)) | 
| Thomas Gleixner | a977c40 | 2008-09-04 15:18:59 +0000 | [diff] [blame] | 175 | #define CAL_PIT_LOOPS	1000 | 
|  | 176 |  | 
|  | 177 | #define CAL2_MS		50 | 
|  | 178 | #define CAL2_LATCH	(CLOCK_TICK_RATE / (1000 / CAL2_MS)) | 
|  | 179 | #define CAL2_PIT_LOOPS	5000 | 
|  | 180 |  | 
| Thomas Gleixner | cce3e05 | 2008-09-04 15:18:44 +0000 | [diff] [blame] | 181 |  | 
| Linus Torvalds | ec0c15a | 2008-09-03 07:30:13 -0700 | [diff] [blame] | 182 | /* | 
|  | 183 | * Try to calibrate the TSC against the Programmable | 
|  | 184 | * Interrupt Timer and return the frequency of the TSC | 
|  | 185 | * in kHz. | 
|  | 186 | * | 
|  | 187 | * Return ULONG_MAX on failure to calibrate. | 
|  | 188 | */ | 
| Thomas Gleixner | a977c40 | 2008-09-04 15:18:59 +0000 | [diff] [blame] | 189 | static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) | 
| Linus Torvalds | ec0c15a | 2008-09-03 07:30:13 -0700 | [diff] [blame] | 190 | { | 
|  | 191 | u64 tsc, t1, t2, delta; | 
|  | 192 | unsigned long tscmin, tscmax; | 
|  | 193 | int pitcnt; | 
|  | 194 |  | 
|  | 195 | /* Set the Gate high, disable speaker */ | 
|  | 196 | outb((inb(0x61) & ~0x02) | 0x01, 0x61); | 
|  | 197 |  | 
|  | 198 | /* | 
|  | 199 | * Setup CTC channel 2* for mode 0, (interrupt on terminal | 
|  | 200 | * count mode), binary count. Set the latch register to 50ms | 
|  | 201 | * (LSB then MSB) to begin countdown. | 
|  | 202 | */ | 
|  | 203 | outb(0xb0, 0x43); | 
| Thomas Gleixner | a977c40 | 2008-09-04 15:18:59 +0000 | [diff] [blame] | 204 | outb(latch & 0xff, 0x42); | 
|  | 205 | outb(latch >> 8, 0x42); | 
| Linus Torvalds | ec0c15a | 2008-09-03 07:30:13 -0700 | [diff] [blame] | 206 |  | 
|  | 207 | tsc = t1 = t2 = get_cycles(); | 
|  | 208 |  | 
|  | 209 | pitcnt = 0; | 
|  | 210 | tscmax = 0; | 
|  | 211 | tscmin = ULONG_MAX; | 
|  | 212 | while ((inb(0x61) & 0x20) == 0) { | 
|  | 213 | t2 = get_cycles(); | 
|  | 214 | delta = t2 - tsc; | 
|  | 215 | tsc = t2; | 
|  | 216 | if ((unsigned long) delta < tscmin) | 
|  | 217 | tscmin = (unsigned int) delta; | 
|  | 218 | if ((unsigned long) delta > tscmax) | 
|  | 219 | tscmax = (unsigned int) delta; | 
|  | 220 | pitcnt++; | 
|  | 221 | } | 
|  | 222 |  | 
|  | 223 | /* | 
|  | 224 | * Sanity checks: | 
|  | 225 | * | 
| Thomas Gleixner | a977c40 | 2008-09-04 15:18:59 +0000 | [diff] [blame] | 226 | * If we were not able to read the PIT more than loopmin | 
| Linus Torvalds | ec0c15a | 2008-09-03 07:30:13 -0700 | [diff] [blame] | 227 | * times, then we have been hit by a massive SMI | 
|  | 228 | * | 
|  | 229 | * If the maximum is 10 times larger than the minimum, | 
|  | 230 | * then we got hit by an SMI as well. | 
|  | 231 | */ | 
| Thomas Gleixner | a977c40 | 2008-09-04 15:18:59 +0000 | [diff] [blame] | 232 | if (pitcnt < loopmin || tscmax > 10 * tscmin) | 
| Linus Torvalds | ec0c15a | 2008-09-03 07:30:13 -0700 | [diff] [blame] | 233 | return ULONG_MAX; | 
|  | 234 |  | 
|  | 235 | /* Calculate the PIT value */ | 
|  | 236 | delta = t2 - t1; | 
| Thomas Gleixner | a977c40 | 2008-09-04 15:18:59 +0000 | [diff] [blame] | 237 | do_div(delta, ms); | 
| Linus Torvalds | ec0c15a | 2008-09-03 07:30:13 -0700 | [diff] [blame] | 238 | return delta; | 
|  | 239 | } | 
|  | 240 |  | 
| Linus Torvalds | 6ac40ed | 2008-09-04 10:41:22 -0700 | [diff] [blame] | 241 | /* | 
|  | 242 | * This reads the current MSB of the PIT counter, and | 
|  | 243 | * checks if we are running on sufficiently fast and | 
|  | 244 | * non-virtualized hardware. | 
|  | 245 | * | 
|  | 246 | * Our expectations are: | 
|  | 247 | * | 
|  | 248 | *  - the PIT is running at roughly 1.19MHz | 
|  | 249 | * | 
|  | 250 | *  - each IO is going to take about 1us on real hardware, | 
|  | 251 | *    but we allow it to be much faster (by a factor of 10) or | 
|  | 252 | *    _slightly_ slower (ie we allow up to a 2us read+counter | 
|  | 253 | *    update - anything else implies a unacceptably slow CPU | 
|  | 254 | *    or PIT for the fast calibration to work. | 
|  | 255 | * | 
|  | 256 | *  - with 256 PIT ticks to read the value, we have 214us to | 
|  | 257 | *    see the same MSB (and overhead like doing a single TSC | 
|  | 258 | *    read per MSB value etc). | 
|  | 259 | * | 
|  | 260 | *  - We're doing 2 reads per loop (LSB, MSB), and we expect | 
|  | 261 | *    them each to take about a microsecond on real hardware. | 
|  | 262 | *    So we expect a count value of around 100. But we'll be | 
|  | 263 | *    generous, and accept anything over 50. | 
|  | 264 | * | 
|  | 265 | *  - if the PIT is stuck, and we see *many* more reads, we | 
|  | 266 | *    return early (and the next caller of pit_expect_msb() | 
|  | 267 | *    then consider it a failure when they don't see the | 
|  | 268 | *    next expected value). | 
|  | 269 | * | 
|  | 270 | * These expectations mean that we know that we have seen the | 
|  | 271 | * transition from one expected value to another with a fairly | 
|  | 272 | * high accuracy, and we didn't miss any events. We can thus | 
|  | 273 | * use the TSC value at the transitions to calculate a pretty | 
|  | 274 | * good value for the TSC frequencty. | 
|  | 275 | */ | 
|  | 276 | static inline int pit_expect_msb(unsigned char val) | 
|  | 277 | { | 
|  | 278 | int count = 0; | 
|  | 279 |  | 
|  | 280 | for (count = 0; count < 50000; count++) { | 
|  | 281 | /* Ignore LSB */ | 
|  | 282 | inb(0x42); | 
|  | 283 | if (inb(0x42) != val) | 
|  | 284 | break; | 
|  | 285 | } | 
|  | 286 | return count > 50; | 
|  | 287 | } | 
|  | 288 |  | 
|  | 289 | /* | 
|  | 290 | * How many MSB values do we want to see? We aim for a | 
|  | 291 | * 15ms calibration, which assuming a 2us counter read | 
|  | 292 | * error should give us roughly 150 ppm precision for | 
|  | 293 | * the calibration. | 
|  | 294 | */ | 
|  | 295 | #define QUICK_PIT_MS 15 | 
|  | 296 | #define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) | 
|  | 297 |  | 
|  | 298 | static unsigned long quick_pit_calibrate(void) | 
|  | 299 | { | 
|  | 300 | /* Set the Gate high, disable speaker */ | 
|  | 301 | outb((inb(0x61) & ~0x02) | 0x01, 0x61); | 
|  | 302 |  | 
|  | 303 | /* | 
|  | 304 | * Counter 2, mode 0 (one-shot), binary count | 
|  | 305 | * | 
|  | 306 | * NOTE! Mode 2 decrements by two (and then the | 
|  | 307 | * output is flipped each time, giving the same | 
|  | 308 | * final output frequency as a decrement-by-one), | 
|  | 309 | * so mode 0 is much better when looking at the | 
|  | 310 | * individual counts. | 
|  | 311 | */ | 
|  | 312 | outb(0xb0, 0x43); | 
|  | 313 |  | 
|  | 314 | /* Start at 0xffff */ | 
|  | 315 | outb(0xff, 0x42); | 
|  | 316 | outb(0xff, 0x42); | 
|  | 317 |  | 
|  | 318 | if (pit_expect_msb(0xff)) { | 
|  | 319 | int i; | 
|  | 320 | u64 t1, t2, delta; | 
|  | 321 | unsigned char expect = 0xfe; | 
|  | 322 |  | 
|  | 323 | t1 = get_cycles(); | 
|  | 324 | for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) { | 
|  | 325 | if (!pit_expect_msb(expect)) | 
|  | 326 | goto failed; | 
|  | 327 | } | 
|  | 328 | t2 = get_cycles(); | 
|  | 329 |  | 
|  | 330 | /* | 
| Ingo Molnar | 4156e9a | 2008-09-04 22:47:47 +0200 | [diff] [blame] | 331 | * Make sure we can rely on the second TSC timestamp: | 
|  | 332 | */ | 
| Ingo Molnar | 5df4551 | 2008-09-06 23:55:40 +0200 | [diff] [blame] | 333 | if (!pit_expect_msb(expect)) | 
| Ingo Molnar | 4156e9a | 2008-09-04 22:47:47 +0200 | [diff] [blame] | 334 | goto failed; | 
|  | 335 |  | 
|  | 336 | /* | 
| Linus Torvalds | 6ac40ed | 2008-09-04 10:41:22 -0700 | [diff] [blame] | 337 | * Ok, if we get here, then we've seen the | 
|  | 338 | * MSB of the PIT decrement QUICK_PIT_ITERATIONS | 
|  | 339 | * times, and each MSB had many hits, so we never | 
|  | 340 | * had any sudden jumps. | 
|  | 341 | * | 
|  | 342 | * As a result, we can depend on there not being | 
|  | 343 | * any odd delays anywhere, and the TSC reads are | 
|  | 344 | * reliable. | 
|  | 345 | * | 
|  | 346 | * kHz = ticks / time-in-seconds / 1000; | 
|  | 347 | * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000 | 
|  | 348 | * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000) | 
|  | 349 | */ | 
|  | 350 | delta = (t2 - t1)*PIT_TICK_RATE; | 
|  | 351 | do_div(delta, QUICK_PIT_ITERATIONS*256*1000); | 
|  | 352 | printk("Fast TSC calibration using PIT\n"); | 
|  | 353 | return delta; | 
|  | 354 | } | 
|  | 355 | failed: | 
|  | 356 | return 0; | 
|  | 357 | } | 
| Linus Torvalds | ec0c15a | 2008-09-03 07:30:13 -0700 | [diff] [blame] | 358 |  | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 359 | /** | 
| Alok Kataria | e93ef94 | 2008-07-01 11:43:36 -0700 | [diff] [blame] | 360 | * native_calibrate_tsc - calibrate the tsc on boot | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 361 | */ | 
| Alok Kataria | e93ef94 | 2008-07-01 11:43:36 -0700 | [diff] [blame] | 362 | unsigned long native_calibrate_tsc(void) | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 363 | { | 
| Thomas Gleixner | 827014b | 2008-09-04 15:18:53 +0000 | [diff] [blame] | 364 | u64 tsc1, tsc2, delta, ref1, ref2; | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 365 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; | 
| Alok Kataria | 88b094f | 2008-10-27 10:41:46 -0700 | [diff] [blame] | 366 | unsigned long flags, latch, ms, fast_calibrate, tsc_khz; | 
| Thomas Gleixner | a977c40 | 2008-09-04 15:18:59 +0000 | [diff] [blame] | 367 | int hpet = is_hpet_enabled(), i, loopmin; | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 368 |  | 
| Alok Kataria | 88b094f | 2008-10-27 10:41:46 -0700 | [diff] [blame] | 369 | tsc_khz = get_hypervisor_tsc_freq(); | 
|  | 370 | if (tsc_khz) { | 
|  | 371 | printk(KERN_INFO "TSC: Frequency read from the hypervisor\n"); | 
|  | 372 | return tsc_khz; | 
|  | 373 | } | 
|  | 374 |  | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 375 | local_irq_save(flags); | 
| Linus Torvalds | 6ac40ed | 2008-09-04 10:41:22 -0700 | [diff] [blame] | 376 | fast_calibrate = quick_pit_calibrate(); | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 377 | local_irq_restore(flags); | 
| Linus Torvalds | 6ac40ed | 2008-09-04 10:41:22 -0700 | [diff] [blame] | 378 | if (fast_calibrate) | 
|  | 379 | return fast_calibrate; | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 380 |  | 
|  | 381 | /* | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 382 | * Run 5 calibration loops to get the lowest frequency value | 
|  | 383 | * (the best estimate). We use two different calibration modes | 
|  | 384 | * here: | 
|  | 385 | * | 
|  | 386 | * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and | 
|  | 387 | * load a timeout of 50ms. We read the time right after we | 
|  | 388 | * started the timer and wait until the PIT count down reaches | 
|  | 389 | * zero. In each wait loop iteration we read the TSC and check | 
|  | 390 | * the delta to the previous read. We keep track of the min | 
|  | 391 | * and max values of that delta. The delta is mostly defined | 
|  | 392 | * by the IO time of the PIT access, so we can detect when a | 
|  | 393 | * SMI/SMM disturbance happend between the two reads. If the | 
|  | 394 | * maximum time is significantly larger than the minimum time, | 
|  | 395 | * then we discard the result and have another try. | 
|  | 396 | * | 
|  | 397 | * 2) Reference counter. If available we use the HPET or the | 
|  | 398 | * PMTIMER as a reference to check the sanity of that value. | 
|  | 399 | * We use separate TSC readouts and check inside of the | 
|  | 400 | * reference read for a SMI/SMM disturbance. We dicard | 
|  | 401 | * disturbed values here as well. We do that around the PIT | 
|  | 402 | * calibration delay loop as we have to wait for a certain | 
|  | 403 | * amount of time anyway. | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 404 | */ | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 405 |  | 
| Thomas Gleixner | a977c40 | 2008-09-04 15:18:59 +0000 | [diff] [blame] | 406 | /* Preset PIT loop values */ | 
|  | 407 | latch = CAL_LATCH; | 
|  | 408 | ms = CAL_MS; | 
|  | 409 | loopmin = CAL_PIT_LOOPS; | 
|  | 410 |  | 
|  | 411 | for (i = 0; i < 3; i++) { | 
| Linus Torvalds | ec0c15a | 2008-09-03 07:30:13 -0700 | [diff] [blame] | 412 | unsigned long tsc_pit_khz; | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 413 |  | 
|  | 414 | /* | 
|  | 415 | * Read the start value and the reference count of | 
| Linus Torvalds | ec0c15a | 2008-09-03 07:30:13 -0700 | [diff] [blame] | 416 | * hpet/pmtimer when available. Then do the PIT | 
|  | 417 | * calibration, which will take at least 50ms, and | 
|  | 418 | * read the end value. | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 419 | */ | 
| Linus Torvalds | ec0c15a | 2008-09-03 07:30:13 -0700 | [diff] [blame] | 420 | local_irq_save(flags); | 
| Thomas Gleixner | 827014b | 2008-09-04 15:18:53 +0000 | [diff] [blame] | 421 | tsc1 = tsc_read_refs(&ref1, hpet); | 
| Thomas Gleixner | a977c40 | 2008-09-04 15:18:59 +0000 | [diff] [blame] | 422 | tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin); | 
| Thomas Gleixner | 827014b | 2008-09-04 15:18:53 +0000 | [diff] [blame] | 423 | tsc2 = tsc_read_refs(&ref2, hpet); | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 424 | local_irq_restore(flags); | 
|  | 425 |  | 
| Linus Torvalds | ec0c15a | 2008-09-03 07:30:13 -0700 | [diff] [blame] | 426 | /* Pick the lowest PIT TSC calibration so far */ | 
|  | 427 | tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 428 |  | 
|  | 429 | /* hpet or pmtimer available ? */ | 
| Thomas Gleixner | 827014b | 2008-09-04 15:18:53 +0000 | [diff] [blame] | 430 | if (!hpet && !ref1 && !ref2) | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 431 | continue; | 
|  | 432 |  | 
|  | 433 | /* Check, whether the sampling was disturbed by an SMI */ | 
|  | 434 | if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) | 
|  | 435 | continue; | 
|  | 436 |  | 
|  | 437 | tsc2 = (tsc2 - tsc1) * 1000000LL; | 
| Thomas Gleixner | d683ef7 | 2008-09-04 15:18:48 +0000 | [diff] [blame] | 438 | if (hpet) | 
| Thomas Gleixner | 827014b | 2008-09-04 15:18:53 +0000 | [diff] [blame] | 439 | tsc2 = calc_hpet_ref(tsc2, ref1, ref2); | 
| Thomas Gleixner | d683ef7 | 2008-09-04 15:18:48 +0000 | [diff] [blame] | 440 | else | 
| Thomas Gleixner | 827014b | 2008-09-04 15:18:53 +0000 | [diff] [blame] | 441 | tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 442 |  | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 443 | tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); | 
| Thomas Gleixner | a977c40 | 2008-09-04 15:18:59 +0000 | [diff] [blame] | 444 |  | 
|  | 445 | /* Check the reference deviation */ | 
|  | 446 | delta = ((u64) tsc_pit_min) * 100; | 
|  | 447 | do_div(delta, tsc_ref_min); | 
|  | 448 |  | 
|  | 449 | /* | 
|  | 450 | * If both calibration results are inside a 10% window | 
|  | 451 | * then we can be sure, that the calibration | 
|  | 452 | * succeeded. We break out of the loop right away. We | 
|  | 453 | * use the reference value, as it is more precise. | 
|  | 454 | */ | 
|  | 455 | if (delta >= 90 && delta <= 110) { | 
|  | 456 | printk(KERN_INFO | 
|  | 457 | "TSC: PIT calibration matches %s. %d loops\n", | 
|  | 458 | hpet ? "HPET" : "PMTIMER", i + 1); | 
|  | 459 | return tsc_ref_min; | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 460 | } | 
|  | 461 |  | 
| Thomas Gleixner | a977c40 | 2008-09-04 15:18:59 +0000 | [diff] [blame] | 462 | /* | 
|  | 463 | * Check whether PIT failed more than once. This | 
|  | 464 | * happens in virtualized environments. We need to | 
|  | 465 | * give the virtual PC a slightly longer timeframe for | 
|  | 466 | * the HPET/PMTIMER to make the result precise. | 
|  | 467 | */ | 
|  | 468 | if (i == 1 && tsc_pit_min == ULONG_MAX) { | 
|  | 469 | latch = CAL2_LATCH; | 
|  | 470 | ms = CAL2_MS; | 
|  | 471 | loopmin = CAL2_PIT_LOOPS; | 
|  | 472 | } | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 473 | } | 
|  | 474 |  | 
|  | 475 | /* | 
|  | 476 | * Now check the results. | 
|  | 477 | */ | 
|  | 478 | if (tsc_pit_min == ULONG_MAX) { | 
|  | 479 | /* PIT gave no useful value */ | 
| Alok N Kataria | de014d6 | 2008-09-03 18:18:01 -0700 | [diff] [blame] | 480 | printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 481 |  | 
|  | 482 | /* We don't have an alternative source, disable TSC */ | 
| Thomas Gleixner | 827014b | 2008-09-04 15:18:53 +0000 | [diff] [blame] | 483 | if (!hpet && !ref1 && !ref2) { | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 484 | printk("TSC: No reference (HPET/PMTIMER) available\n"); | 
|  | 485 | return 0; | 
|  | 486 | } | 
|  | 487 |  | 
|  | 488 | /* The alternative source failed as well, disable TSC */ | 
|  | 489 | if (tsc_ref_min == ULONG_MAX) { | 
|  | 490 | printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " | 
| Thomas Gleixner | a977c40 | 2008-09-04 15:18:59 +0000 | [diff] [blame] | 491 | "failed.\n"); | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 492 | return 0; | 
|  | 493 | } | 
|  | 494 |  | 
|  | 495 | /* Use the alternative source */ | 
|  | 496 | printk(KERN_INFO "TSC: using %s reference calibration\n", | 
|  | 497 | hpet ? "HPET" : "PMTIMER"); | 
|  | 498 |  | 
|  | 499 | return tsc_ref_min; | 
|  | 500 | } | 
|  | 501 |  | 
|  | 502 | /* We don't have an alternative source, use the PIT calibration value */ | 
| Thomas Gleixner | 827014b | 2008-09-04 15:18:53 +0000 | [diff] [blame] | 503 | if (!hpet && !ref1 && !ref2) { | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 504 | printk(KERN_INFO "TSC: Using PIT calibration value\n"); | 
|  | 505 | return tsc_pit_min; | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 506 | } | 
|  | 507 |  | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 508 | /* The alternative source failed, use the PIT calibration value */ | 
|  | 509 | if (tsc_ref_min == ULONG_MAX) { | 
| Thomas Gleixner | a977c40 | 2008-09-04 15:18:59 +0000 | [diff] [blame] | 510 | printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. " | 
|  | 511 | "Using PIT calibration\n"); | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 512 | return tsc_pit_min; | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 513 | } | 
|  | 514 |  | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 515 | /* | 
|  | 516 | * The calibration values differ too much. In doubt, we use | 
|  | 517 | * the PIT value as we know that there are PMTIMERs around | 
| Thomas Gleixner | a977c40 | 2008-09-04 15:18:59 +0000 | [diff] [blame] | 518 | * running at double speed. At least we let the user know: | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 519 | */ | 
| Thomas Gleixner | a977c40 | 2008-09-04 15:18:59 +0000 | [diff] [blame] | 520 | printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", | 
|  | 521 | hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); | 
| Thomas Gleixner | fbb16e2 | 2008-09-03 00:54:47 +0200 | [diff] [blame] | 522 | printk(KERN_INFO "TSC: Using PIT calibration value\n"); | 
|  | 523 | return tsc_pit_min; | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 524 | } | 
|  | 525 |  | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 526 | #ifdef CONFIG_X86_32 | 
|  | 527 | /* Only called from the Powernow K7 cpu freq driver */ | 
|  | 528 | int recalibrate_cpu_khz(void) | 
|  | 529 | { | 
|  | 530 | #ifndef CONFIG_SMP | 
|  | 531 | unsigned long cpu_khz_old = cpu_khz; | 
|  | 532 |  | 
|  | 533 | if (cpu_has_tsc) { | 
| Alok Kataria | e93ef94 | 2008-07-01 11:43:36 -0700 | [diff] [blame] | 534 | tsc_khz = calibrate_tsc(); | 
|  | 535 | cpu_khz = tsc_khz; | 
| Alok Kataria | bfc0f59 | 2008-07-01 11:43:24 -0700 | [diff] [blame] | 536 | cpu_data(0).loops_per_jiffy = | 
|  | 537 | cpufreq_scale(cpu_data(0).loops_per_jiffy, | 
|  | 538 | cpu_khz_old, cpu_khz); | 
|  | 539 | return 0; | 
|  | 540 | } else | 
|  | 541 | return -ENODEV; | 
|  | 542 | #else | 
|  | 543 | return -ENODEV; | 
|  | 544 | #endif | 
|  | 545 | } | 
|  | 546 |  | 
|  | 547 | EXPORT_SYMBOL(recalibrate_cpu_khz); | 
|  | 548 |  | 
|  | 549 | #endif /* CONFIG_X86_32 */ | 
| Alok Kataria | 2dbe06f | 2008-07-01 11:43:31 -0700 | [diff] [blame] | 550 |  | 
|  | 551 | /* Accelerators for sched_clock() | 
|  | 552 | * convert from cycles(64bits) => nanoseconds (64bits) | 
|  | 553 | *  basic equation: | 
|  | 554 | *              ns = cycles / (freq / ns_per_sec) | 
|  | 555 | *              ns = cycles * (ns_per_sec / freq) | 
|  | 556 | *              ns = cycles * (10^9 / (cpu_khz * 10^3)) | 
|  | 557 | *              ns = cycles * (10^6 / cpu_khz) | 
|  | 558 | * | 
|  | 559 | *      Then we use scaling math (suggested by george@mvista.com) to get: | 
|  | 560 | *              ns = cycles * (10^6 * SC / cpu_khz) / SC | 
|  | 561 | *              ns = cycles * cyc2ns_scale / SC | 
|  | 562 | * | 
|  | 563 | *      And since SC is a constant power of two, we can convert the div | 
|  | 564 | *  into a shift. | 
|  | 565 | * | 
|  | 566 | *  We can use khz divisor instead of mhz to keep a better precision, since | 
|  | 567 | *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | 
|  | 568 | *  (mathieu.desnoyers@polymtl.ca) | 
|  | 569 | * | 
|  | 570 | *                      -johnstul@us.ibm.com "math is hard, lets go shopping!" | 
|  | 571 | */ | 
|  | 572 |  | 
|  | 573 | DEFINE_PER_CPU(unsigned long, cyc2ns); | 
|  | 574 |  | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 575 | static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) | 
| Alok Kataria | 2dbe06f | 2008-07-01 11:43:31 -0700 | [diff] [blame] | 576 | { | 
|  | 577 | unsigned long long tsc_now, ns_now; | 
|  | 578 | unsigned long flags, *scale; | 
|  | 579 |  | 
|  | 580 | local_irq_save(flags); | 
|  | 581 | sched_clock_idle_sleep_event(); | 
|  | 582 |  | 
|  | 583 | scale = &per_cpu(cyc2ns, cpu); | 
|  | 584 |  | 
|  | 585 | rdtscll(tsc_now); | 
|  | 586 | ns_now = __cycles_2_ns(tsc_now); | 
|  | 587 |  | 
|  | 588 | if (cpu_khz) | 
|  | 589 | *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; | 
|  | 590 |  | 
|  | 591 | sched_clock_idle_wakeup_event(0); | 
|  | 592 | local_irq_restore(flags); | 
|  | 593 | } | 
|  | 594 |  | 
|  | 595 | #ifdef CONFIG_CPU_FREQ | 
|  | 596 |  | 
|  | 597 | /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency | 
|  | 598 | * changes. | 
|  | 599 | * | 
|  | 600 | * RED-PEN: On SMP we assume all CPUs run with the same frequency.  It's | 
|  | 601 | * not that important because current Opteron setups do not support | 
|  | 602 | * scaling on SMP anyroads. | 
|  | 603 | * | 
|  | 604 | * Should fix up last_tsc too. Currently gettimeofday in the | 
|  | 605 | * first tick after the change will be slightly wrong. | 
|  | 606 | */ | 
|  | 607 |  | 
|  | 608 | static unsigned int  ref_freq; | 
|  | 609 | static unsigned long loops_per_jiffy_ref; | 
|  | 610 | static unsigned long tsc_khz_ref; | 
|  | 611 |  | 
|  | 612 | static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | 
|  | 613 | void *data) | 
|  | 614 | { | 
|  | 615 | struct cpufreq_freqs *freq = data; | 
|  | 616 | unsigned long *lpj, dummy; | 
|  | 617 |  | 
|  | 618 | if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) | 
|  | 619 | return 0; | 
|  | 620 |  | 
|  | 621 | lpj = &dummy; | 
|  | 622 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | 
|  | 623 | #ifdef CONFIG_SMP | 
|  | 624 | lpj = &cpu_data(freq->cpu).loops_per_jiffy; | 
|  | 625 | #else | 
|  | 626 | lpj = &boot_cpu_data.loops_per_jiffy; | 
|  | 627 | #endif | 
|  | 628 |  | 
|  | 629 | if (!ref_freq) { | 
|  | 630 | ref_freq = freq->old; | 
|  | 631 | loops_per_jiffy_ref = *lpj; | 
|  | 632 | tsc_khz_ref = tsc_khz; | 
|  | 633 | } | 
|  | 634 | if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) || | 
|  | 635 | (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || | 
|  | 636 | (val == CPUFREQ_RESUMECHANGE)) { | 
|  | 637 | *lpj = 	cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); | 
|  | 638 |  | 
|  | 639 | tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); | 
|  | 640 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | 
|  | 641 | mark_tsc_unstable("cpufreq changes"); | 
|  | 642 | } | 
|  | 643 |  | 
| Peter Zijlstra | 52a8968 | 2008-08-25 13:35:06 +0200 | [diff] [blame] | 644 | set_cyc2ns_scale(tsc_khz, freq->cpu); | 
| Alok Kataria | 2dbe06f | 2008-07-01 11:43:31 -0700 | [diff] [blame] | 645 |  | 
|  | 646 | return 0; | 
|  | 647 | } | 
|  | 648 |  | 
|  | 649 | static struct notifier_block time_cpufreq_notifier_block = { | 
|  | 650 | .notifier_call  = time_cpufreq_notifier | 
|  | 651 | }; | 
|  | 652 |  | 
|  | 653 | static int __init cpufreq_tsc(void) | 
|  | 654 | { | 
| Linus Torvalds | 060700b | 2008-08-24 11:52:06 -0700 | [diff] [blame] | 655 | if (!cpu_has_tsc) | 
|  | 656 | return 0; | 
|  | 657 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | 
|  | 658 | return 0; | 
| Alok Kataria | 2dbe06f | 2008-07-01 11:43:31 -0700 | [diff] [blame] | 659 | cpufreq_register_notifier(&time_cpufreq_notifier_block, | 
|  | 660 | CPUFREQ_TRANSITION_NOTIFIER); | 
|  | 661 | return 0; | 
|  | 662 | } | 
|  | 663 |  | 
|  | 664 | core_initcall(cpufreq_tsc); | 
|  | 665 |  | 
|  | 666 | #endif /* CONFIG_CPU_FREQ */ | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 667 |  | 
|  | 668 | /* clocksource code */ | 
|  | 669 |  | 
|  | 670 | static struct clocksource clocksource_tsc; | 
|  | 671 |  | 
|  | 672 | /* | 
|  | 673 | * We compare the TSC to the cycle_last value in the clocksource | 
|  | 674 | * structure to avoid a nasty time-warp. This can be observed in a | 
|  | 675 | * very small window right after one CPU updated cycle_last under | 
|  | 676 | * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which | 
|  | 677 | * is smaller than the cycle_last reference value due to a TSC which | 
|  | 678 | * is slighty behind. This delta is nowhere else observable, but in | 
|  | 679 | * that case it results in a forward time jump in the range of hours | 
|  | 680 | * due to the unsigned delta calculation of the time keeping core | 
|  | 681 | * code, which is necessary to support wrapping clocksources like pm | 
|  | 682 | * timer. | 
|  | 683 | */ | 
|  | 684 | static cycle_t read_tsc(void) | 
|  | 685 | { | 
|  | 686 | cycle_t ret = (cycle_t)get_cycles(); | 
|  | 687 |  | 
|  | 688 | return ret >= clocksource_tsc.cycle_last ? | 
|  | 689 | ret : clocksource_tsc.cycle_last; | 
|  | 690 | } | 
|  | 691 |  | 
| Thomas Gleixner | 431ceb8 | 2008-07-15 22:08:04 +0200 | [diff] [blame] | 692 | #ifdef CONFIG_X86_64 | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 693 | static cycle_t __vsyscall_fn vread_tsc(void) | 
|  | 694 | { | 
|  | 695 | cycle_t ret = (cycle_t)vget_cycles(); | 
|  | 696 |  | 
|  | 697 | return ret >= __vsyscall_gtod_data.clock.cycle_last ? | 
|  | 698 | ret : __vsyscall_gtod_data.clock.cycle_last; | 
|  | 699 | } | 
| Thomas Gleixner | 431ceb8 | 2008-07-15 22:08:04 +0200 | [diff] [blame] | 700 | #endif | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 701 |  | 
|  | 702 | static struct clocksource clocksource_tsc = { | 
|  | 703 | .name                   = "tsc", | 
|  | 704 | .rating                 = 300, | 
|  | 705 | .read                   = read_tsc, | 
|  | 706 | .mask                   = CLOCKSOURCE_MASK(64), | 
|  | 707 | .shift                  = 22, | 
|  | 708 | .flags                  = CLOCK_SOURCE_IS_CONTINUOUS | | 
|  | 709 | CLOCK_SOURCE_MUST_VERIFY, | 
|  | 710 | #ifdef CONFIG_X86_64 | 
|  | 711 | .vread                  = vread_tsc, | 
|  | 712 | #endif | 
|  | 713 | }; | 
|  | 714 |  | 
|  | 715 | void mark_tsc_unstable(char *reason) | 
|  | 716 | { | 
|  | 717 | if (!tsc_unstable) { | 
|  | 718 | tsc_unstable = 1; | 
|  | 719 | printk("Marking TSC unstable due to %s\n", reason); | 
|  | 720 | /* Change only the rating, when not registered */ | 
|  | 721 | if (clocksource_tsc.mult) | 
|  | 722 | clocksource_change_rating(&clocksource_tsc, 0); | 
|  | 723 | else | 
|  | 724 | clocksource_tsc.rating = 0; | 
|  | 725 | } | 
|  | 726 | } | 
|  | 727 |  | 
|  | 728 | EXPORT_SYMBOL_GPL(mark_tsc_unstable); | 
|  | 729 |  | 
|  | 730 | static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d) | 
|  | 731 | { | 
|  | 732 | printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", | 
|  | 733 | d->ident); | 
|  | 734 | tsc_unstable = 1; | 
|  | 735 | return 0; | 
|  | 736 | } | 
|  | 737 |  | 
|  | 738 | /* List of systems that have known TSC problems */ | 
|  | 739 | static struct dmi_system_id __initdata bad_tsc_dmi_table[] = { | 
|  | 740 | { | 
|  | 741 | .callback = dmi_mark_tsc_unstable, | 
|  | 742 | .ident = "IBM Thinkpad 380XD", | 
|  | 743 | .matches = { | 
|  | 744 | DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), | 
|  | 745 | DMI_MATCH(DMI_BOARD_NAME, "2635FA0"), | 
|  | 746 | }, | 
|  | 747 | }, | 
|  | 748 | {} | 
|  | 749 | }; | 
|  | 750 |  | 
| Alok Kataria | 395628e | 2008-10-24 17:22:01 -0700 | [diff] [blame] | 751 | static void __init check_system_tsc_reliable(void) | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 752 | { | 
| Alok Kataria | 395628e | 2008-10-24 17:22:01 -0700 | [diff] [blame] | 753 | #ifdef CONFIG_MGEODE_LX | 
|  | 754 | /* RTSC counts during suspend */ | 
|  | 755 | #define RTSC_SUSP 0x100 | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 756 | unsigned long res_low, res_high; | 
|  | 757 |  | 
|  | 758 | rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); | 
| Alok Kataria | 395628e | 2008-10-24 17:22:01 -0700 | [diff] [blame] | 759 | /* Geode_LX - the OLPC CPU has a possibly a very reliable TSC */ | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 760 | if (res_low & RTSC_SUSP) | 
| Alok Kataria | 395628e | 2008-10-24 17:22:01 -0700 | [diff] [blame] | 761 | tsc_clocksource_reliable = 1; | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 762 | #endif | 
| Alok Kataria | 395628e | 2008-10-24 17:22:01 -0700 | [diff] [blame] | 763 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) | 
|  | 764 | tsc_clocksource_reliable = 1; | 
|  | 765 | } | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 766 |  | 
|  | 767 | /* | 
|  | 768 | * Make an educated guess if the TSC is trustworthy and synchronized | 
|  | 769 | * over all CPUs. | 
|  | 770 | */ | 
|  | 771 | __cpuinit int unsynchronized_tsc(void) | 
|  | 772 | { | 
|  | 773 | if (!cpu_has_tsc || tsc_unstable) | 
|  | 774 | return 1; | 
|  | 775 |  | 
| James Bottomley | 017d9d2 | 2008-10-30 16:05:39 -0500 | [diff] [blame] | 776 | #ifdef CONFIG_X86_SMP | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 777 | if (apic_is_clustered_box()) | 
|  | 778 | return 1; | 
|  | 779 | #endif | 
|  | 780 |  | 
|  | 781 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | 
|  | 782 | return 0; | 
|  | 783 | /* | 
|  | 784 | * Intel systems are normally all synchronized. | 
|  | 785 | * Exceptions must mark TSC as unstable: | 
|  | 786 | */ | 
|  | 787 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { | 
|  | 788 | /* assume multi socket systems are not synchronized: */ | 
|  | 789 | if (num_possible_cpus() > 1) | 
|  | 790 | tsc_unstable = 1; | 
|  | 791 | } | 
|  | 792 |  | 
|  | 793 | return tsc_unstable; | 
|  | 794 | } | 
|  | 795 |  | 
|  | 796 | static void __init init_tsc_clocksource(void) | 
|  | 797 | { | 
|  | 798 | clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, | 
|  | 799 | clocksource_tsc.shift); | 
| Alok Kataria | 395628e | 2008-10-24 17:22:01 -0700 | [diff] [blame] | 800 | if (tsc_clocksource_reliable) | 
|  | 801 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 802 | /* lower the rating if we already know its unstable: */ | 
|  | 803 | if (check_tsc_unstable()) { | 
|  | 804 | clocksource_tsc.rating = 0; | 
|  | 805 | clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; | 
|  | 806 | } | 
|  | 807 | clocksource_register(&clocksource_tsc); | 
|  | 808 | } | 
|  | 809 |  | 
|  | 810 | void __init tsc_init(void) | 
|  | 811 | { | 
|  | 812 | u64 lpj; | 
|  | 813 | int cpu; | 
|  | 814 |  | 
|  | 815 | if (!cpu_has_tsc) | 
|  | 816 | return; | 
|  | 817 |  | 
| Alok Kataria | e93ef94 | 2008-07-01 11:43:36 -0700 | [diff] [blame] | 818 | tsc_khz = calibrate_tsc(); | 
|  | 819 | cpu_khz = tsc_khz; | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 820 |  | 
| Alok Kataria | e93ef94 | 2008-07-01 11:43:36 -0700 | [diff] [blame] | 821 | if (!tsc_khz) { | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 822 | mark_tsc_unstable("could not calculate TSC khz"); | 
|  | 823 | return; | 
|  | 824 | } | 
|  | 825 |  | 
|  | 826 | #ifdef CONFIG_X86_64 | 
|  | 827 | if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && | 
|  | 828 | (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) | 
|  | 829 | cpu_khz = calibrate_cpu(); | 
|  | 830 | #endif | 
|  | 831 |  | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 832 | printk("Detected %lu.%03lu MHz processor.\n", | 
|  | 833 | (unsigned long)cpu_khz / 1000, | 
|  | 834 | (unsigned long)cpu_khz % 1000); | 
|  | 835 |  | 
|  | 836 | /* | 
|  | 837 | * Secondary CPUs do not run through tsc_init(), so set up | 
|  | 838 | * all the scale factors for all CPUs, assuming the same | 
|  | 839 | * speed as the bootup CPU. (cpufreq notifiers will fix this | 
|  | 840 | * up if their speed diverges) | 
|  | 841 | */ | 
|  | 842 | for_each_possible_cpu(cpu) | 
|  | 843 | set_cyc2ns_scale(cpu_khz, cpu); | 
|  | 844 |  | 
|  | 845 | if (tsc_disabled > 0) | 
|  | 846 | return; | 
|  | 847 |  | 
|  | 848 | /* now allow native_sched_clock() to use rdtsc */ | 
|  | 849 | tsc_disabled = 0; | 
|  | 850 |  | 
| Alok Kataria | 70de9a9 | 2008-11-03 11:18:47 -0800 | [diff] [blame] | 851 | lpj = ((u64)tsc_khz * 1000); | 
|  | 852 | do_div(lpj, HZ); | 
|  | 853 | lpj_fine = lpj; | 
|  | 854 |  | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 855 | use_tsc_delay(); | 
|  | 856 | /* Check and install the TSC clocksource */ | 
|  | 857 | dmi_check_system(bad_tsc_dmi_table); | 
|  | 858 |  | 
|  | 859 | if (unsynchronized_tsc()) | 
|  | 860 | mark_tsc_unstable("TSCs unsynchronized"); | 
|  | 861 |  | 
| Alok Kataria | 395628e | 2008-10-24 17:22:01 -0700 | [diff] [blame] | 862 | check_system_tsc_reliable(); | 
| Alok Kataria | 8fbbc4b | 2008-07-01 11:43:34 -0700 | [diff] [blame] | 863 | init_tsc_clocksource(); | 
|  | 864 | } | 
|  | 865 |  |