blob: 5abdee1e16a59c581e618f1fb0ea8473ddadd10a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * x86 SMP booting functions
3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
6 * Copyright 2001 Andi Kleen, SuSE Labs.
7 *
8 * Much of the core SMP work is based on previous work by Thomas Radke, to
9 * whom a great many thanks are extended.
10 *
11 * Thanks to Intel for making available several different Pentium,
12 * Pentium Pro and Pentium-II/Xeon MP machines.
13 * Original development of Linux SMP code supported by Caldera.
14 *
Andi Kleena8ab26f2005-04-16 15:25:19 -070015 * This code is released under the GNU General Public License version 2
Linus Torvalds1da177e2005-04-16 15:20:36 -070016 *
17 * Fixes
18 * Felix Koop : NR_CPUS used properly
19 * Jose Renau : Handle single CPU case.
20 * Alan Cox : By repeated request 8) - Total BogoMIP report.
21 * Greg Wright : Fix for kernel stacks panic.
22 * Erich Boleyn : MP v1.4 and additional changes.
23 * Matthias Sattler : Changes for 2.1 kernel map.
24 * Michel Lespinasse : Changes for 2.1 kernel map.
25 * Michael Chastain : Change trampoline.S to gnu as.
26 * Alan Cox : Dumb bug: 'B' step PPro's are fine
27 * Ingo Molnar : Added APIC timers, based on code
28 * from Jose Renau
29 * Ingo Molnar : various cleanups and rewrites
30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
32 * Andi Kleen : Changed for SMP boot into long mode.
Andi Kleena8ab26f2005-04-16 15:25:19 -070033 * Rusty Russell : Hacked into shape for new "hotplug" boot process.
34 * Andi Kleen : Converted to new state machine.
35 * Various cleanups.
36 * Probably mostly hotplug CPU ready now.
Linus Torvalds1da177e2005-04-16 15:20:36 -070037 */
38
Andi Kleena8ab26f2005-04-16 15:25:19 -070039
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/config.h>
41#include <linux/init.h>
42
43#include <linux/mm.h>
44#include <linux/kernel_stat.h>
45#include <linux/smp_lock.h>
46#include <linux/irq.h>
47#include <linux/bootmem.h>
48#include <linux/thread_info.h>
49#include <linux/module.h>
50
51#include <linux/delay.h>
52#include <linux/mc146818rtc.h>
53#include <asm/mtrr.h>
54#include <asm/pgalloc.h>
55#include <asm/desc.h>
56#include <asm/kdebug.h>
57#include <asm/tlbflush.h>
58#include <asm/proto.h>
Andi Kleen75152112005-05-16 21:53:34 -070059#include <asm/nmi.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070060
Andi Kleena8ab26f2005-04-16 15:25:19 -070061/* Change for real CPU hotplug. Note other files need to be fixed
62 first too. */
63#define __cpuinit __init
64#define __cpuinitdata __initdata
65
Linus Torvalds1da177e2005-04-16 15:20:36 -070066/* Number of siblings per CPU package */
67int smp_num_siblings = 1;
68/* Package ID of each logical CPU */
69u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
Andi Kleen3dd9d512005-04-16 15:25:15 -070070u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
Linus Torvalds1da177e2005-04-16 15:20:36 -070071EXPORT_SYMBOL(phys_proc_id);
Andi Kleen3dd9d512005-04-16 15:25:15 -070072EXPORT_SYMBOL(cpu_core_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -070073
74/* Bitmask of currently online CPUs */
75cpumask_t cpu_online_map;
76
Andi Kleena8ab26f2005-04-16 15:25:19 -070077EXPORT_SYMBOL(cpu_online_map);
78
79/*
80 * Private maps to synchronize booting between AP and BP.
81 * Probably not needed anymore, but it makes for easier debugging. -AK
82 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070083cpumask_t cpu_callin_map;
84cpumask_t cpu_callout_map;
Andi Kleena8ab26f2005-04-16 15:25:19 -070085
86cpumask_t cpu_possible_map;
87EXPORT_SYMBOL(cpu_possible_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -070088
89/* Per CPU bogomips and other parameters */
90struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
91
Andi Kleena8ab26f2005-04-16 15:25:19 -070092/* Set when the idlers are all forked */
93int smp_threads_ready;
94
Linus Torvalds1da177e2005-04-16 15:20:36 -070095cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
Andi Kleen3dd9d512005-04-16 15:25:15 -070096cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
Linus Torvalds1da177e2005-04-16 15:20:36 -070097
98/*
99 * Trampoline 80x86 program as an array.
100 */
101
Andi Kleena8ab26f2005-04-16 15:25:19 -0700102extern unsigned char trampoline_data[];
103extern unsigned char trampoline_end[];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104
105/*
106 * Currently trivial. Write the real->protected mode
107 * bootstrap into the page concerned. The caller
108 * has made sure it's suitably aligned.
109 */
110
Andi Kleena8ab26f2005-04-16 15:25:19 -0700111static unsigned long __cpuinit setup_trampoline(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112{
113 void *tramp = __va(SMP_TRAMPOLINE_BASE);
114 memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
115 return virt_to_phys(tramp);
116}
117
118/*
119 * The bootstrap kernel entry code has set these up. Save them for
120 * a given CPU
121 */
122
Andi Kleena8ab26f2005-04-16 15:25:19 -0700123static void __cpuinit smp_store_cpu_info(int id)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124{
125 struct cpuinfo_x86 *c = cpu_data + id;
126
127 *c = boot_cpu_data;
128 identify_cpu(c);
Andi Kleendda50e72005-05-16 21:53:25 -0700129 print_cpu_info(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130}
131
132/*
Andi Kleendda50e72005-05-16 21:53:25 -0700133 * New Funky TSC sync algorithm borrowed from IA64.
134 * Main advantage is that it doesn't reset the TSCs fully and
135 * in general looks more robust and it works better than my earlier
136 * attempts. I believe it was written by David Mosberger. Some minor
137 * adjustments for x86-64 by me -AK
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138 *
Andi Kleendda50e72005-05-16 21:53:25 -0700139 * Original comment reproduced below.
140 *
141 * Synchronize TSC of the current (slave) CPU with the TSC of the
142 * MASTER CPU (normally the time-keeper CPU). We use a closed loop to
143 * eliminate the possibility of unaccounted-for errors (such as
144 * getting a machine check in the middle of a calibration step). The
145 * basic idea is for the slave to ask the master what itc value it has
146 * and to read its own itc before and after the master responds. Each
147 * iteration gives us three timestamps:
148 *
149 * slave master
150 *
151 * t0 ---\
152 * ---\
153 * --->
154 * tm
155 * /---
156 * /---
157 * t1 <---
158 *
159 *
160 * The goal is to adjust the slave's TSC such that tm falls exactly
161 * half-way between t0 and t1. If we achieve this, the clocks are
162 * synchronized provided the interconnect between the slave and the
163 * master is symmetric. Even if the interconnect were asymmetric, we
164 * would still know that the synchronization error is smaller than the
165 * roundtrip latency (t0 - t1).
166 *
167 * When the interconnect is quiet and symmetric, this lets us
168 * synchronize the TSC to within one or two cycles. However, we can
169 * only *guarantee* that the synchronization is accurate to within a
170 * round-trip time, which is typically in the range of several hundred
171 * cycles (e.g., ~500 cycles). In practice, this means that the TSCs
172 * are usually almost perfectly synchronized, but we shouldn't assume
173 * that the accuracy is much better than half a micro second or so.
174 *
175 * [there are other errors like the latency of RDTSC and of the
176 * WRMSR. These can also account to hundreds of cycles. So it's
177 * probably worse. It claims 153 cycles error on a dual Opteron,
178 * but I suspect the numbers are actually somewhat worse -AK]
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179 */
180
Andi Kleendda50e72005-05-16 21:53:25 -0700181#define MASTER 0
182#define SLAVE (SMP_CACHE_BYTES/8)
183
184/* Intentionally don't use cpu_relax() while TSC synchronization
185 because we don't want to go into funky power save modi or cause
186 hypervisors to schedule us away. Going to sleep would likely affect
187 latency and low latency is the primary objective here. -AK */
188#define no_cpu_relax() barrier()
189
Andi Kleena8ab26f2005-04-16 15:25:19 -0700190static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
Andi Kleendda50e72005-05-16 21:53:25 -0700191static volatile __cpuinitdata unsigned long go[SLAVE + 1];
192static int notscsync __cpuinitdata;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193
Andi Kleendda50e72005-05-16 21:53:25 -0700194#undef DEBUG_TSC_SYNC
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195
Andi Kleendda50e72005-05-16 21:53:25 -0700196#define NUM_ROUNDS 64 /* magic value */
197#define NUM_ITERS 5 /* likewise */
198
199/* Callback on boot CPU */
200static __cpuinit void sync_master(void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201{
Andi Kleendda50e72005-05-16 21:53:25 -0700202 unsigned long flags, i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203
Andi Kleendda50e72005-05-16 21:53:25 -0700204 if (smp_processor_id() != boot_cpu_id)
Andi Kleena8ab26f2005-04-16 15:25:19 -0700205 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206
Andi Kleendda50e72005-05-16 21:53:25 -0700207 go[MASTER] = 0;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700208
Andi Kleendda50e72005-05-16 21:53:25 -0700209 local_irq_save(flags);
210 {
211 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
212 while (!go[MASTER])
213 no_cpu_relax();
214 go[MASTER] = 0;
215 rdtscll(go[SLAVE]);
216 }
Andi Kleena8ab26f2005-04-16 15:25:19 -0700217 }
Andi Kleendda50e72005-05-16 21:53:25 -0700218 local_irq_restore(flags);
Andi Kleena8ab26f2005-04-16 15:25:19 -0700219}
220
Andi Kleendda50e72005-05-16 21:53:25 -0700221/*
222 * Return the number of cycles by which our tsc differs from the tsc
223 * on the master (time-keeper) CPU. A positive number indicates our
224 * tsc is ahead of the master, negative that it is behind.
225 */
226static inline long
227get_delta(long *rt, long *master)
228{
229 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
230 unsigned long tcenter, t0, t1, tm;
231 int i;
232
233 for (i = 0; i < NUM_ITERS; ++i) {
234 rdtscll(t0);
235 go[MASTER] = 1;
236 while (!(tm = go[SLAVE]))
237 no_cpu_relax();
238 go[SLAVE] = 0;
239 rdtscll(t1);
240
241 if (t1 - t0 < best_t1 - best_t0)
242 best_t0 = t0, best_t1 = t1, best_tm = tm;
243 }
244
245 *rt = best_t1 - best_t0;
246 *master = best_tm - best_t0;
247
248 /* average best_t0 and best_t1 without overflow: */
249 tcenter = (best_t0/2 + best_t1/2);
250 if (best_t0 % 2 + best_t1 % 2 == 2)
251 ++tcenter;
252 return tcenter - best_tm;
253}
254
255static __cpuinit void sync_tsc(void)
256{
257 int i, done = 0;
258 long delta, adj, adjust_latency = 0;
259 unsigned long flags, rt, master_time_stamp, bound;
260#if DEBUG_TSC_SYNC
261 static struct syncdebug {
262 long rt; /* roundtrip time */
263 long master; /* master's timestamp */
264 long diff; /* difference between midpoint and master's timestamp */
265 long lat; /* estimate of tsc adjustment latency */
266 } t[NUM_ROUNDS] __cpuinitdata;
267#endif
268
269 go[MASTER] = 1;
270
271 smp_call_function(sync_master, NULL, 1, 0);
272
273 while (go[MASTER]) /* wait for master to be ready */
274 no_cpu_relax();
275
276 spin_lock_irqsave(&tsc_sync_lock, flags);
277 {
278 for (i = 0; i < NUM_ROUNDS; ++i) {
279 delta = get_delta(&rt, &master_time_stamp);
280 if (delta == 0) {
281 done = 1; /* let's lock on to this... */
282 bound = rt;
283 }
284
285 if (!done) {
286 unsigned long t;
287 if (i > 0) {
288 adjust_latency += -delta;
289 adj = -delta + adjust_latency/4;
290 } else
291 adj = -delta;
292
293 rdtscll(t);
294 wrmsrl(MSR_IA32_TSC, t + adj);
295 }
296#if DEBUG_TSC_SYNC
297 t[i].rt = rt;
298 t[i].master = master_time_stamp;
299 t[i].diff = delta;
300 t[i].lat = adjust_latency/4;
301#endif
302 }
303 }
304 spin_unlock_irqrestore(&tsc_sync_lock, flags);
305
306#if DEBUG_TSC_SYNC
307 for (i = 0; i < NUM_ROUNDS; ++i)
308 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
309 t[i].rt, t[i].master, t[i].diff, t[i].lat);
310#endif
311
312 printk(KERN_INFO
313 "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
314 "maxerr %lu cycles)\n",
315 smp_processor_id(), boot_cpu_id, delta, rt);
316}
317
318static void __cpuinit tsc_sync_wait(void)
319{
320 if (notscsync || !cpu_has_tsc)
321 return;
322 printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(),
323 boot_cpu_id);
324 sync_tsc();
325}
326
327static __init int notscsync_setup(char *s)
328{
329 notscsync = 1;
330 return 0;
331}
332__setup("notscsync", notscsync_setup);
333
Andi Kleena8ab26f2005-04-16 15:25:19 -0700334static atomic_t init_deasserted __cpuinitdata;
335
336/*
337 * Report back to the Boot Processor.
338 * Running on AP.
339 */
340void __cpuinit smp_callin(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341{
342 int cpuid, phys_id;
343 unsigned long timeout;
344
345 /*
346 * If waken up by an INIT in an 82489DX configuration
347 * we may get here before an INIT-deassert IPI reaches
348 * our local APIC. We have to wait for the IPI or we'll
349 * lock up on an APIC access.
350 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700351 while (!atomic_read(&init_deasserted))
352 cpu_relax();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353
354 /*
355 * (This works even if the APIC is not enabled.)
356 */
357 phys_id = GET_APIC_ID(apic_read(APIC_ID));
358 cpuid = smp_processor_id();
359 if (cpu_isset(cpuid, cpu_callin_map)) {
360 panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
361 phys_id, cpuid);
362 }
363 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
364
365 /*
366 * STARTUP IPIs are fragile beasts as they might sometimes
367 * trigger some glue motherboard logic. Complete APIC bus
368 * silence for 1 second, this overestimates the time the
369 * boot CPU is spending to send the up to 2 STARTUP IPIs
370 * by a factor of two. This should be enough.
371 */
372
373 /*
374 * Waiting 2s total for startup (udelay is not yet working)
375 */
376 timeout = jiffies + 2*HZ;
377 while (time_before(jiffies, timeout)) {
378 /*
379 * Has the boot CPU finished it's STARTUP sequence?
380 */
381 if (cpu_isset(cpuid, cpu_callout_map))
382 break;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700383 cpu_relax();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384 }
385
386 if (!time_before(jiffies, timeout)) {
387 panic("smp_callin: CPU%d started up but did not get a callout!\n",
388 cpuid);
389 }
390
391 /*
392 * the boot CPU has finished the init stage and is spinning
393 * on callin_map until we finish. We are free to set up this
394 * CPU, first the APIC. (this is probably redundant on most
395 * boards)
396 */
397
398 Dprintk("CALLIN, before setup_local_APIC().\n");
399 setup_local_APIC();
400
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 /*
402 * Get our bogomips.
403 */
404 calibrate_delay();
405 Dprintk("Stack at about %p\n",&cpuid);
406
407 disable_APIC_timer();
408
409 /*
410 * Save our processor parameters
411 */
412 smp_store_cpu_info(cpuid);
413
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 /*
415 * Allow the master to continue.
416 */
417 cpu_set(cpuid, cpu_callin_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418}
419
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420/*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700421 * Setup code on secondary processor (after comming out of the trampoline)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700423void __cpuinit start_secondary(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424{
425 /*
426 * Dont put anything before smp_callin(), SMP
427 * booting is too fragile that we want to limit the
428 * things done here to the most necessary things.
429 */
430 cpu_init();
431 smp_callin();
432
433 /* otherwise gcc will move up the smp_processor_id before the cpu_init */
434 barrier();
435
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());
437 setup_secondary_APIC_clock();
438
Andi Kleena8ab26f2005-04-16 15:25:19 -0700439 Dprintk("cpu %d: enabling apic timer\n", smp_processor_id());
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440
441 if (nmi_watchdog == NMI_IO_APIC) {
442 disable_8259A_irq(0);
443 enable_NMI_through_LVT0(NULL);
444 enable_8259A_irq(0);
445 }
446
Andi Kleena8ab26f2005-04-16 15:25:19 -0700447 enable_APIC_timer();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448
449 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700450 * Allow the master to continue.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 cpu_set(smp_processor_id(), cpu_online_map);
Andi Kleena8ab26f2005-04-16 15:25:19 -0700453 mb();
454
Andi Kleendda50e72005-05-16 21:53:25 -0700455 /* Wait for TSC sync to not schedule things before.
456 We still process interrupts, which could see an inconsistent
457 time in that window unfortunately. */
458 tsc_sync_wait();
459
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 cpu_idle();
461}
462
Andi Kleena8ab26f2005-04-16 15:25:19 -0700463extern volatile unsigned long init_rsp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464extern void (*initial_code)(void);
465
466#if APIC_DEBUG
Andi Kleena8ab26f2005-04-16 15:25:19 -0700467static void inquire_remote_apic(int apicid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468{
469 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
470 char *names[] = { "ID", "VERSION", "SPIV" };
471 int timeout, status;
472
473 printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
474
475 for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
476 printk("... APIC #%d %s: ", apicid, names[i]);
477
478 /*
479 * Wait for idle.
480 */
481 apic_wait_icr_idle();
482
483 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
484 apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
485
486 timeout = 0;
487 do {
488 udelay(100);
489 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
490 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
491
492 switch (status) {
493 case APIC_ICR_RR_VALID:
494 status = apic_read(APIC_RRR);
495 printk("%08x\n", status);
496 break;
497 default:
498 printk("failed\n");
499 }
500 }
501}
502#endif
503
Andi Kleena8ab26f2005-04-16 15:25:19 -0700504/*
505 * Kick the secondary to wake up.
506 */
507static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508{
509 unsigned long send_status = 0, accept_status = 0;
510 int maxlvt, timeout, num_starts, j;
511
512 Dprintk("Asserting INIT.\n");
513
514 /*
515 * Turn INIT on target chip
516 */
517 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
518
519 /*
520 * Send IPI
521 */
522 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
523 | APIC_DM_INIT);
524
525 Dprintk("Waiting for send to finish...\n");
526 timeout = 0;
527 do {
528 Dprintk("+");
529 udelay(100);
530 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
531 } while (send_status && (timeout++ < 1000));
532
533 mdelay(10);
534
535 Dprintk("Deasserting INIT.\n");
536
537 /* Target chip */
538 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
539
540 /* Send IPI */
541 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
542
543 Dprintk("Waiting for send to finish...\n");
544 timeout = 0;
545 do {
546 Dprintk("+");
547 udelay(100);
548 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
549 } while (send_status && (timeout++ < 1000));
550
551 atomic_set(&init_deasserted, 1);
552
553 /*
554 * Should we send STARTUP IPIs ?
555 *
556 * Determine this based on the APIC version.
557 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
558 */
559 if (APIC_INTEGRATED(apic_version[phys_apicid]))
560 num_starts = 2;
561 else
562 num_starts = 0;
563
564 /*
565 * Run STARTUP IPI loop.
566 */
567 Dprintk("#startup loops: %d.\n", num_starts);
568
569 maxlvt = get_maxlvt();
570
571 for (j = 1; j <= num_starts; j++) {
572 Dprintk("Sending STARTUP #%d.\n",j);
573 apic_read_around(APIC_SPIV);
574 apic_write(APIC_ESR, 0);
575 apic_read(APIC_ESR);
576 Dprintk("After apic_write.\n");
577
578 /*
579 * STARTUP IPI
580 */
581
582 /* Target chip */
583 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
584
585 /* Boot on the stack */
586 /* Kick the second */
587 apic_write_around(APIC_ICR, APIC_DM_STARTUP
588 | (start_rip >> 12));
589
590 /*
591 * Give the other CPU some time to accept the IPI.
592 */
593 udelay(300);
594
595 Dprintk("Startup point 1.\n");
596
597 Dprintk("Waiting for send to finish...\n");
598 timeout = 0;
599 do {
600 Dprintk("+");
601 udelay(100);
602 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
603 } while (send_status && (timeout++ < 1000));
604
605 /*
606 * Give the other CPU some time to accept the IPI.
607 */
608 udelay(200);
609 /*
610 * Due to the Pentium erratum 3AP.
611 */
612 if (maxlvt > 3) {
613 apic_read_around(APIC_SPIV);
614 apic_write(APIC_ESR, 0);
615 }
616 accept_status = (apic_read(APIC_ESR) & 0xEF);
617 if (send_status || accept_status)
618 break;
619 }
620 Dprintk("After Startup.\n");
621
622 if (send_status)
623 printk(KERN_ERR "APIC never delivered???\n");
624 if (accept_status)
625 printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
626
627 return (send_status | accept_status);
628}
629
Andi Kleena8ab26f2005-04-16 15:25:19 -0700630/*
631 * Boot one CPU.
632 */
633static int __cpuinit do_boot_cpu(int cpu, int apicid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634{
635 struct task_struct *idle;
636 unsigned long boot_error;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700637 int timeout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 unsigned long start_rip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639 /*
640 * We can't use kernel_thread since we must avoid to
641 * reschedule the child.
642 */
643 idle = fork_idle(cpu);
Andi Kleena8ab26f2005-04-16 15:25:19 -0700644 if (IS_ERR(idle)) {
645 printk("failed fork for CPU %d\n", cpu);
646 return PTR_ERR(idle);
647 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648
649 cpu_pda[cpu].pcurrent = idle;
650
651 start_rip = setup_trampoline();
652
Andi Kleena8ab26f2005-04-16 15:25:19 -0700653 init_rsp = idle->thread.rsp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 per_cpu(init_tss,cpu).rsp0 = init_rsp;
655 initial_code = start_secondary;
656 clear_ti_thread_flag(idle->thread_info, TIF_FORK);
657
Andi Kleena8ab26f2005-04-16 15:25:19 -0700658 printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659 start_rip, init_rsp);
660
661 /*
662 * This grunge runs the startup process for
663 * the targeted processor.
664 */
665
666 atomic_set(&init_deasserted, 0);
667
668 Dprintk("Setting warm reset code and vector.\n");
669
670 CMOS_WRITE(0xa, 0xf);
671 local_flush_tlb();
672 Dprintk("1.\n");
673 *((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4;
674 Dprintk("2.\n");
675 *((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf;
676 Dprintk("3.\n");
677
678 /*
679 * Be paranoid about clearing APIC errors.
680 */
681 if (APIC_INTEGRATED(apic_version[apicid])) {
682 apic_read_around(APIC_SPIV);
683 apic_write(APIC_ESR, 0);
684 apic_read(APIC_ESR);
685 }
686
687 /*
688 * Status is now clean
689 */
690 boot_error = 0;
691
692 /*
693 * Starting actual IPI sequence...
694 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700695 boot_error = wakeup_secondary_via_INIT(apicid, start_rip);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696
697 if (!boot_error) {
698 /*
699 * allow APs to start initializing.
700 */
701 Dprintk("Before Callout %d.\n", cpu);
702 cpu_set(cpu, cpu_callout_map);
703 Dprintk("After Callout %d.\n", cpu);
704
705 /*
706 * Wait 5s total for a response
707 */
708 for (timeout = 0; timeout < 50000; timeout++) {
709 if (cpu_isset(cpu, cpu_callin_map))
710 break; /* It has booted */
711 udelay(100);
712 }
713
714 if (cpu_isset(cpu, cpu_callin_map)) {
715 /* number CPUs logically, starting from 1 (BSP is 0) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 Dprintk("CPU has booted.\n");
717 } else {
718 boot_error = 1;
719 if (*((volatile unsigned char *)phys_to_virt(SMP_TRAMPOLINE_BASE))
720 == 0xA5)
721 /* trampoline started but...? */
722 printk("Stuck ??\n");
723 else
724 /* trampoline code not run */
725 printk("Not responding.\n");
726#if APIC_DEBUG
727 inquire_remote_apic(apicid);
728#endif
729 }
730 }
731 if (boot_error) {
732 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
733 clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700734 cpu_clear(cpu, cpu_present_map);
735 cpu_clear(cpu, cpu_possible_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736 x86_cpu_to_apicid[cpu] = BAD_APICID;
737 x86_cpu_to_log_apicid[cpu] = BAD_APICID;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700738 return -EIO;
739 }
740
741 return 0;
742}
743
744cycles_t cacheflush_time;
745unsigned long cache_decay_ticks;
746
747/*
748 * Construct cpu_sibling_map[], so that we can tell the sibling CPU
749 * on SMT systems efficiently.
750 */
751static __cpuinit void detect_siblings(void)
752{
753 int cpu;
754
755 for (cpu = 0; cpu < NR_CPUS; cpu++) {
756 cpus_clear(cpu_sibling_map[cpu]);
757 cpus_clear(cpu_core_map[cpu]);
758 }
759
760 for_each_online_cpu (cpu) {
761 struct cpuinfo_x86 *c = cpu_data + cpu;
762 int siblings = 0;
763 int i;
764 if (smp_num_siblings > 1) {
765 for_each_online_cpu (i) {
Siddha, Suresh Bd31ddaa2005-04-16 15:25:20 -0700766 if (cpu_core_id[cpu] == cpu_core_id[i]) {
Andi Kleena8ab26f2005-04-16 15:25:19 -0700767 siblings++;
768 cpu_set(i, cpu_sibling_map[cpu]);
769 }
770 }
771 } else {
772 siblings++;
773 cpu_set(cpu, cpu_sibling_map[cpu]);
774 }
775
776 if (siblings != smp_num_siblings) {
777 printk(KERN_WARNING
778 "WARNING: %d siblings found for CPU%d, should be %d\n",
779 siblings, cpu, smp_num_siblings);
780 smp_num_siblings = siblings;
781 }
782 if (c->x86_num_cores > 1) {
783 for_each_online_cpu(i) {
784 if (phys_proc_id[cpu] == phys_proc_id[i])
785 cpu_set(i, cpu_core_map[cpu]);
786 }
787 } else
788 cpu_core_map[cpu] = cpu_sibling_map[cpu];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789 }
790}
791
Andi Kleena8ab26f2005-04-16 15:25:19 -0700792/*
793 * Cleanup possible dangling ends...
794 */
795static __cpuinit void smp_cleanup_boot(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700798 * Paranoid: Set warm reset code and vector here back
799 * to default values.
800 */
801 CMOS_WRITE(0, 0xf);
802
803 /*
804 * Reset trampoline flag
805 */
806 *((volatile int *) phys_to_virt(0x467)) = 0;
807
808#ifndef CONFIG_HOTPLUG_CPU
809 /*
810 * Free pages reserved for SMP bootup.
811 * When you add hotplug CPU support later remove this
812 * Note there is more work to be done for later CPU bootup.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 */
814
Andi Kleena8ab26f2005-04-16 15:25:19 -0700815 free_page((unsigned long) __va(PAGE_SIZE));
816 free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE));
817#endif
818}
819
820/*
821 * Fall back to non SMP mode after errors.
822 *
823 * RED-PEN audit/test this more. I bet there is more state messed up here.
824 */
825static __cpuinit void disable_smp(void)
826{
827 cpu_present_map = cpumask_of_cpu(0);
828 cpu_possible_map = cpumask_of_cpu(0);
829 if (smp_found_config)
830 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
831 else
832 phys_cpu_present_map = physid_mask_of_physid(0);
833 cpu_set(0, cpu_sibling_map[0]);
834 cpu_set(0, cpu_core_map[0]);
835}
836
837/*
838 * Handle user cpus=... parameter.
839 */
840static __cpuinit void enforce_max_cpus(unsigned max_cpus)
841{
842 int i, k;
843 k = 0;
844 for (i = 0; i < NR_CPUS; i++) {
845 if (!cpu_possible(i))
846 continue;
847 if (++k > max_cpus) {
848 cpu_clear(i, cpu_possible_map);
849 cpu_clear(i, cpu_present_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 }
851 }
852}
853
854/*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700855 * Various sanity checks.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700857static int __cpuinit smp_sanity_check(unsigned max_cpus)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
860 printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
861 hard_smp_processor_id());
862 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
863 }
864
865 /*
866 * If we couldn't find an SMP configuration at boot time,
867 * get out of here now!
868 */
869 if (!smp_found_config) {
870 printk(KERN_NOTICE "SMP motherboard not detected.\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -0700871 disable_smp();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872 if (APIC_init_uniprocessor())
873 printk(KERN_NOTICE "Local APIC not detected."
874 " Using dummy APIC emulation.\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -0700875 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 }
877
878 /*
879 * Should not be necessary because the MP table should list the boot
880 * CPU too, but we do it for the sake of robustness anyway.
881 */
882 if (!physid_isset(boot_cpu_id, phys_cpu_present_map)) {
883 printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n",
884 boot_cpu_id);
885 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
886 }
887
888 /*
889 * If we couldn't find a local APIC, then get out of here now!
890 */
891 if (APIC_INTEGRATED(apic_version[boot_cpu_id]) && !cpu_has_apic) {
892 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
893 boot_cpu_id);
894 printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -0700895 nr_ioapics = 0;
896 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897 }
898
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 /*
900 * If SMP should be disabled, then really disable it!
901 */
902 if (!max_cpus) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -0700904 nr_ioapics = 0;
905 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906 }
907
Andi Kleena8ab26f2005-04-16 15:25:19 -0700908 return 0;
909}
910
911/*
912 * Prepare for SMP bootup. The MP table or ACPI has been read
913 * earlier. Just do some sanity checking here and enable APIC mode.
914 */
915void __cpuinit smp_prepare_cpus(unsigned int max_cpus)
916{
917 int i;
918
919 nmi_watchdog_default();
920 current_cpu_data = boot_cpu_data;
921 current_thread_info()->cpu = 0; /* needed? */
922
923 enforce_max_cpus(max_cpus);
924
925 /*
926 * Fill in cpu_present_mask
927 */
928 for (i = 0; i < NR_CPUS; i++) {
929 int apicid = cpu_present_to_apicid(i);
930 if (physid_isset(apicid, phys_cpu_present_map)) {
931 cpu_set(i, cpu_present_map);
932 /* possible map would be different if we supported real
933 CPU hotplug. */
934 cpu_set(i, cpu_possible_map);
935 }
936 }
937
938 if (smp_sanity_check(max_cpus) < 0) {
939 printk(KERN_INFO "SMP disabled\n");
940 disable_smp();
941 return;
942 }
943
944
945 /*
946 * Switch from PIC to APIC mode.
947 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948 connect_bsp_APIC();
949 setup_local_APIC();
950
Andi Kleena8ab26f2005-04-16 15:25:19 -0700951 if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
952 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
953 GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
954 /* Or can we switch back to PIC here? */
955 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956
957 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700958 * Now start the IO-APICs
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959 */
960 if (!skip_ioapic_setup && nr_ioapics)
961 setup_IO_APIC();
962 else
963 nr_ioapics = 0;
964
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700966 * Set up local APIC timer on boot CPU.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968
Andi Kleena8ab26f2005-04-16 15:25:19 -0700969 setup_boot_APIC_clock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970}
971
Andi Kleena8ab26f2005-04-16 15:25:19 -0700972/*
973 * Early setup to make printk work.
974 */
975void __init smp_prepare_boot_cpu(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976{
Andi Kleena8ab26f2005-04-16 15:25:19 -0700977 int me = smp_processor_id();
978 cpu_set(me, cpu_online_map);
979 cpu_set(me, cpu_callout_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980}
981
Andi Kleena8ab26f2005-04-16 15:25:19 -0700982/*
983 * Entry point to boot a CPU.
984 *
985 * This is all __cpuinit, not __devinit for now because we don't support
986 * CPU hotplug (yet).
987 */
988int __cpuinit __cpu_up(unsigned int cpu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989{
Andi Kleena8ab26f2005-04-16 15:25:19 -0700990 int err;
991 int apicid = cpu_present_to_apicid(cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992
Andi Kleena8ab26f2005-04-16 15:25:19 -0700993 WARN_ON(irqs_disabled());
994
995 Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu);
996
997 if (apicid == BAD_APICID || apicid == boot_cpu_id ||
998 !physid_isset(apicid, phys_cpu_present_map)) {
999 printk("__cpu_up: bad cpu %d\n", cpu);
1000 return -EINVAL;
1001 }
Andi Kleena8ab26f2005-04-16 15:25:19 -07001002
1003 /* Boot it! */
1004 err = do_boot_cpu(cpu, apicid);
1005 if (err < 0) {
Andi Kleena8ab26f2005-04-16 15:25:19 -07001006 Dprintk("do_boot_cpu failed %d\n", err);
1007 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008 }
1009
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010 /* Unleash the CPU! */
1011 Dprintk("waiting for cpu %d\n", cpu);
1012
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013 while (!cpu_isset(cpu, cpu_online_map))
Andi Kleena8ab26f2005-04-16 15:25:19 -07001014 cpu_relax();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015 return 0;
1016}
1017
Andi Kleena8ab26f2005-04-16 15:25:19 -07001018/*
1019 * Finish the SMP boot.
1020 */
1021void __cpuinit smp_cpus_done(unsigned int max_cpus)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022{
Andi Kleena8ab26f2005-04-16 15:25:19 -07001023 zap_low_mappings();
1024 smp_cleanup_boot();
1025
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026#ifdef CONFIG_X86_IO_APIC
1027 setup_ioapic_dest();
1028#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029
Andi Kleena8ab26f2005-04-16 15:25:19 -07001030 detect_siblings();
1031 time_init_gtod();
Andi Kleen75152112005-05-16 21:53:34 -07001032
1033 check_nmi_watchdog();
Andi Kleena8ab26f2005-04-16 15:25:19 -07001034}