blob: f1ec0f34594124f73feb76dcf0f270dc420dfca5 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * x86 SMP booting functions
3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
6 * Copyright 2001 Andi Kleen, SuSE Labs.
7 *
8 * Much of the core SMP work is based on previous work by Thomas Radke, to
9 * whom a great many thanks are extended.
10 *
11 * Thanks to Intel for making available several different Pentium,
12 * Pentium Pro and Pentium-II/Xeon MP machines.
13 * Original development of Linux SMP code supported by Caldera.
14 *
Andi Kleena8ab26f2005-04-16 15:25:19 -070015 * This code is released under the GNU General Public License version 2
Linus Torvalds1da177e2005-04-16 15:20:36 -070016 *
17 * Fixes
18 * Felix Koop : NR_CPUS used properly
19 * Jose Renau : Handle single CPU case.
20 * Alan Cox : By repeated request 8) - Total BogoMIP report.
21 * Greg Wright : Fix for kernel stacks panic.
22 * Erich Boleyn : MP v1.4 and additional changes.
23 * Matthias Sattler : Changes for 2.1 kernel map.
24 * Michel Lespinasse : Changes for 2.1 kernel map.
25 * Michael Chastain : Change trampoline.S to gnu as.
26 * Alan Cox : Dumb bug: 'B' step PPro's are fine
27 * Ingo Molnar : Added APIC timers, based on code
28 * from Jose Renau
29 * Ingo Molnar : various cleanups and rewrites
30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
32 * Andi Kleen : Changed for SMP boot into long mode.
Andi Kleena8ab26f2005-04-16 15:25:19 -070033 * Rusty Russell : Hacked into shape for new "hotplug" boot process.
34 * Andi Kleen : Converted to new state machine.
35 * Various cleanups.
36 * Probably mostly hotplug CPU ready now.
Linus Torvalds1da177e2005-04-16 15:20:36 -070037 */
38
Andi Kleena8ab26f2005-04-16 15:25:19 -070039
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/config.h>
41#include <linux/init.h>
42
43#include <linux/mm.h>
44#include <linux/kernel_stat.h>
45#include <linux/smp_lock.h>
46#include <linux/irq.h>
47#include <linux/bootmem.h>
48#include <linux/thread_info.h>
49#include <linux/module.h>
50
51#include <linux/delay.h>
52#include <linux/mc146818rtc.h>
53#include <asm/mtrr.h>
54#include <asm/pgalloc.h>
55#include <asm/desc.h>
56#include <asm/kdebug.h>
57#include <asm/tlbflush.h>
58#include <asm/proto.h>
Andi Kleen75152112005-05-16 21:53:34 -070059#include <asm/nmi.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070060
Andi Kleena8ab26f2005-04-16 15:25:19 -070061/* Change for real CPU hotplug. Note other files need to be fixed
62 first too. */
63#define __cpuinit __init
64#define __cpuinitdata __initdata
65
Linus Torvalds1da177e2005-04-16 15:20:36 -070066/* Number of siblings per CPU package */
67int smp_num_siblings = 1;
68/* Package ID of each logical CPU */
69u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
Andi Kleen3dd9d512005-04-16 15:25:15 -070070u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
Linus Torvalds1da177e2005-04-16 15:20:36 -070071EXPORT_SYMBOL(phys_proc_id);
Andi Kleen3dd9d512005-04-16 15:25:15 -070072EXPORT_SYMBOL(cpu_core_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -070073
74/* Bitmask of currently online CPUs */
75cpumask_t cpu_online_map;
76
Andi Kleena8ab26f2005-04-16 15:25:19 -070077EXPORT_SYMBOL(cpu_online_map);
78
79/*
80 * Private maps to synchronize booting between AP and BP.
81 * Probably not needed anymore, but it makes for easier debugging. -AK
82 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070083cpumask_t cpu_callin_map;
84cpumask_t cpu_callout_map;
Andi Kleena8ab26f2005-04-16 15:25:19 -070085
86cpumask_t cpu_possible_map;
87EXPORT_SYMBOL(cpu_possible_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -070088
89/* Per CPU bogomips and other parameters */
90struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
91
Andi Kleena8ab26f2005-04-16 15:25:19 -070092/* Set when the idlers are all forked */
93int smp_threads_ready;
94
Linus Torvalds1da177e2005-04-16 15:20:36 -070095cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
Andi Kleen3dd9d512005-04-16 15:25:15 -070096cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
Andi Kleen2df9fa32005-05-20 14:27:59 -070097EXPORT_SYMBOL(cpu_core_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -070098
99/*
100 * Trampoline 80x86 program as an array.
101 */
102
Andi Kleena8ab26f2005-04-16 15:25:19 -0700103extern unsigned char trampoline_data[];
104extern unsigned char trampoline_end[];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105
106/*
107 * Currently trivial. Write the real->protected mode
108 * bootstrap into the page concerned. The caller
109 * has made sure it's suitably aligned.
110 */
111
Andi Kleena8ab26f2005-04-16 15:25:19 -0700112static unsigned long __cpuinit setup_trampoline(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113{
114 void *tramp = __va(SMP_TRAMPOLINE_BASE);
115 memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
116 return virt_to_phys(tramp);
117}
118
119/*
120 * The bootstrap kernel entry code has set these up. Save them for
121 * a given CPU
122 */
123
Andi Kleena8ab26f2005-04-16 15:25:19 -0700124static void __cpuinit smp_store_cpu_info(int id)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125{
126 struct cpuinfo_x86 *c = cpu_data + id;
127
128 *c = boot_cpu_data;
129 identify_cpu(c);
Andi Kleendda50e72005-05-16 21:53:25 -0700130 print_cpu_info(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131}
132
133/*
Andi Kleendda50e72005-05-16 21:53:25 -0700134 * New Funky TSC sync algorithm borrowed from IA64.
135 * Main advantage is that it doesn't reset the TSCs fully and
136 * in general looks more robust and it works better than my earlier
137 * attempts. I believe it was written by David Mosberger. Some minor
138 * adjustments for x86-64 by me -AK
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 *
Andi Kleendda50e72005-05-16 21:53:25 -0700140 * Original comment reproduced below.
141 *
142 * Synchronize TSC of the current (slave) CPU with the TSC of the
143 * MASTER CPU (normally the time-keeper CPU). We use a closed loop to
144 * eliminate the possibility of unaccounted-for errors (such as
145 * getting a machine check in the middle of a calibration step). The
146 * basic idea is for the slave to ask the master what itc value it has
147 * and to read its own itc before and after the master responds. Each
148 * iteration gives us three timestamps:
149 *
150 * slave master
151 *
152 * t0 ---\
153 * ---\
154 * --->
155 * tm
156 * /---
157 * /---
158 * t1 <---
159 *
160 *
161 * The goal is to adjust the slave's TSC such that tm falls exactly
162 * half-way between t0 and t1. If we achieve this, the clocks are
163 * synchronized provided the interconnect between the slave and the
164 * master is symmetric. Even if the interconnect were asymmetric, we
165 * would still know that the synchronization error is smaller than the
166 * roundtrip latency (t0 - t1).
167 *
168 * When the interconnect is quiet and symmetric, this lets us
169 * synchronize the TSC to within one or two cycles. However, we can
170 * only *guarantee* that the synchronization is accurate to within a
171 * round-trip time, which is typically in the range of several hundred
172 * cycles (e.g., ~500 cycles). In practice, this means that the TSCs
173 * are usually almost perfectly synchronized, but we shouldn't assume
174 * that the accuracy is much better than half a micro second or so.
175 *
176 * [there are other errors like the latency of RDTSC and of the
177 * WRMSR. These can also account to hundreds of cycles. So it's
178 * probably worse. It claims 153 cycles error on a dual Opteron,
179 * but I suspect the numbers are actually somewhat worse -AK]
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 */
181
Andi Kleendda50e72005-05-16 21:53:25 -0700182#define MASTER 0
183#define SLAVE (SMP_CACHE_BYTES/8)
184
185/* Intentionally don't use cpu_relax() while TSC synchronization
186 because we don't want to go into funky power save modi or cause
187 hypervisors to schedule us away. Going to sleep would likely affect
188 latency and low latency is the primary objective here. -AK */
189#define no_cpu_relax() barrier()
190
Andi Kleena8ab26f2005-04-16 15:25:19 -0700191static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
Andi Kleendda50e72005-05-16 21:53:25 -0700192static volatile __cpuinitdata unsigned long go[SLAVE + 1];
193static int notscsync __cpuinitdata;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194
Andi Kleendda50e72005-05-16 21:53:25 -0700195#undef DEBUG_TSC_SYNC
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196
Andi Kleendda50e72005-05-16 21:53:25 -0700197#define NUM_ROUNDS 64 /* magic value */
198#define NUM_ITERS 5 /* likewise */
199
200/* Callback on boot CPU */
201static __cpuinit void sync_master(void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202{
Andi Kleendda50e72005-05-16 21:53:25 -0700203 unsigned long flags, i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204
Andi Kleendda50e72005-05-16 21:53:25 -0700205 if (smp_processor_id() != boot_cpu_id)
Andi Kleena8ab26f2005-04-16 15:25:19 -0700206 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207
Andi Kleendda50e72005-05-16 21:53:25 -0700208 go[MASTER] = 0;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700209
Andi Kleendda50e72005-05-16 21:53:25 -0700210 local_irq_save(flags);
211 {
212 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
213 while (!go[MASTER])
214 no_cpu_relax();
215 go[MASTER] = 0;
216 rdtscll(go[SLAVE]);
217 }
Andi Kleena8ab26f2005-04-16 15:25:19 -0700218 }
Andi Kleendda50e72005-05-16 21:53:25 -0700219 local_irq_restore(flags);
Andi Kleena8ab26f2005-04-16 15:25:19 -0700220}
221
Andi Kleendda50e72005-05-16 21:53:25 -0700222/*
223 * Return the number of cycles by which our tsc differs from the tsc
224 * on the master (time-keeper) CPU. A positive number indicates our
225 * tsc is ahead of the master, negative that it is behind.
226 */
227static inline long
228get_delta(long *rt, long *master)
229{
230 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
231 unsigned long tcenter, t0, t1, tm;
232 int i;
233
234 for (i = 0; i < NUM_ITERS; ++i) {
235 rdtscll(t0);
236 go[MASTER] = 1;
237 while (!(tm = go[SLAVE]))
238 no_cpu_relax();
239 go[SLAVE] = 0;
240 rdtscll(t1);
241
242 if (t1 - t0 < best_t1 - best_t0)
243 best_t0 = t0, best_t1 = t1, best_tm = tm;
244 }
245
246 *rt = best_t1 - best_t0;
247 *master = best_tm - best_t0;
248
249 /* average best_t0 and best_t1 without overflow: */
250 tcenter = (best_t0/2 + best_t1/2);
251 if (best_t0 % 2 + best_t1 % 2 == 2)
252 ++tcenter;
253 return tcenter - best_tm;
254}
255
256static __cpuinit void sync_tsc(void)
257{
258 int i, done = 0;
259 long delta, adj, adjust_latency = 0;
260 unsigned long flags, rt, master_time_stamp, bound;
261#if DEBUG_TSC_SYNC
262 static struct syncdebug {
263 long rt; /* roundtrip time */
264 long master; /* master's timestamp */
265 long diff; /* difference between midpoint and master's timestamp */
266 long lat; /* estimate of tsc adjustment latency */
267 } t[NUM_ROUNDS] __cpuinitdata;
268#endif
269
270 go[MASTER] = 1;
271
272 smp_call_function(sync_master, NULL, 1, 0);
273
274 while (go[MASTER]) /* wait for master to be ready */
275 no_cpu_relax();
276
277 spin_lock_irqsave(&tsc_sync_lock, flags);
278 {
279 for (i = 0; i < NUM_ROUNDS; ++i) {
280 delta = get_delta(&rt, &master_time_stamp);
281 if (delta == 0) {
282 done = 1; /* let's lock on to this... */
283 bound = rt;
284 }
285
286 if (!done) {
287 unsigned long t;
288 if (i > 0) {
289 adjust_latency += -delta;
290 adj = -delta + adjust_latency/4;
291 } else
292 adj = -delta;
293
294 rdtscll(t);
295 wrmsrl(MSR_IA32_TSC, t + adj);
296 }
297#if DEBUG_TSC_SYNC
298 t[i].rt = rt;
299 t[i].master = master_time_stamp;
300 t[i].diff = delta;
301 t[i].lat = adjust_latency/4;
302#endif
303 }
304 }
305 spin_unlock_irqrestore(&tsc_sync_lock, flags);
306
307#if DEBUG_TSC_SYNC
308 for (i = 0; i < NUM_ROUNDS; ++i)
309 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
310 t[i].rt, t[i].master, t[i].diff, t[i].lat);
311#endif
312
313 printk(KERN_INFO
314 "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
315 "maxerr %lu cycles)\n",
316 smp_processor_id(), boot_cpu_id, delta, rt);
317}
318
319static void __cpuinit tsc_sync_wait(void)
320{
321 if (notscsync || !cpu_has_tsc)
322 return;
323 printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(),
324 boot_cpu_id);
325 sync_tsc();
326}
327
328static __init int notscsync_setup(char *s)
329{
330 notscsync = 1;
331 return 0;
332}
333__setup("notscsync", notscsync_setup);
334
Andi Kleena8ab26f2005-04-16 15:25:19 -0700335static atomic_t init_deasserted __cpuinitdata;
336
337/*
338 * Report back to the Boot Processor.
339 * Running on AP.
340 */
341void __cpuinit smp_callin(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342{
343 int cpuid, phys_id;
344 unsigned long timeout;
345
346 /*
347 * If waken up by an INIT in an 82489DX configuration
348 * we may get here before an INIT-deassert IPI reaches
349 * our local APIC. We have to wait for the IPI or we'll
350 * lock up on an APIC access.
351 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700352 while (!atomic_read(&init_deasserted))
353 cpu_relax();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354
355 /*
356 * (This works even if the APIC is not enabled.)
357 */
358 phys_id = GET_APIC_ID(apic_read(APIC_ID));
359 cpuid = smp_processor_id();
360 if (cpu_isset(cpuid, cpu_callin_map)) {
361 panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
362 phys_id, cpuid);
363 }
364 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
365
366 /*
367 * STARTUP IPIs are fragile beasts as they might sometimes
368 * trigger some glue motherboard logic. Complete APIC bus
369 * silence for 1 second, this overestimates the time the
370 * boot CPU is spending to send the up to 2 STARTUP IPIs
371 * by a factor of two. This should be enough.
372 */
373
374 /*
375 * Waiting 2s total for startup (udelay is not yet working)
376 */
377 timeout = jiffies + 2*HZ;
378 while (time_before(jiffies, timeout)) {
379 /*
380 * Has the boot CPU finished it's STARTUP sequence?
381 */
382 if (cpu_isset(cpuid, cpu_callout_map))
383 break;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700384 cpu_relax();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 }
386
387 if (!time_before(jiffies, timeout)) {
388 panic("smp_callin: CPU%d started up but did not get a callout!\n",
389 cpuid);
390 }
391
392 /*
393 * the boot CPU has finished the init stage and is spinning
394 * on callin_map until we finish. We are free to set up this
395 * CPU, first the APIC. (this is probably redundant on most
396 * boards)
397 */
398
399 Dprintk("CALLIN, before setup_local_APIC().\n");
400 setup_local_APIC();
401
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402 /*
403 * Get our bogomips.
404 */
405 calibrate_delay();
406 Dprintk("Stack at about %p\n",&cpuid);
407
408 disable_APIC_timer();
409
410 /*
411 * Save our processor parameters
412 */
413 smp_store_cpu_info(cpuid);
414
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415 /*
416 * Allow the master to continue.
417 */
418 cpu_set(cpuid, cpu_callin_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419}
420
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421/*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700422 * Setup code on secondary processor (after comming out of the trampoline)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700424void __cpuinit start_secondary(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425{
426 /*
427 * Dont put anything before smp_callin(), SMP
428 * booting is too fragile that we want to limit the
429 * things done here to the most necessary things.
430 */
431 cpu_init();
432 smp_callin();
433
434 /* otherwise gcc will move up the smp_processor_id before the cpu_init */
435 barrier();
436
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());
438 setup_secondary_APIC_clock();
439
Andi Kleena8ab26f2005-04-16 15:25:19 -0700440 Dprintk("cpu %d: enabling apic timer\n", smp_processor_id());
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441
442 if (nmi_watchdog == NMI_IO_APIC) {
443 disable_8259A_irq(0);
444 enable_NMI_through_LVT0(NULL);
445 enable_8259A_irq(0);
446 }
447
Andi Kleena8ab26f2005-04-16 15:25:19 -0700448 enable_APIC_timer();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449
450 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700451 * Allow the master to continue.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 cpu_set(smp_processor_id(), cpu_online_map);
Andi Kleena8ab26f2005-04-16 15:25:19 -0700454 mb();
455
Andi Kleendda50e72005-05-16 21:53:25 -0700456 /* Wait for TSC sync to not schedule things before.
457 We still process interrupts, which could see an inconsistent
458 time in that window unfortunately. */
459 tsc_sync_wait();
460
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 cpu_idle();
462}
463
Andi Kleena8ab26f2005-04-16 15:25:19 -0700464extern volatile unsigned long init_rsp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465extern void (*initial_code)(void);
466
467#if APIC_DEBUG
Andi Kleena8ab26f2005-04-16 15:25:19 -0700468static void inquire_remote_apic(int apicid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469{
470 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
471 char *names[] = { "ID", "VERSION", "SPIV" };
472 int timeout, status;
473
474 printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
475
476 for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
477 printk("... APIC #%d %s: ", apicid, names[i]);
478
479 /*
480 * Wait for idle.
481 */
482 apic_wait_icr_idle();
483
484 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
485 apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
486
487 timeout = 0;
488 do {
489 udelay(100);
490 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
491 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
492
493 switch (status) {
494 case APIC_ICR_RR_VALID:
495 status = apic_read(APIC_RRR);
496 printk("%08x\n", status);
497 break;
498 default:
499 printk("failed\n");
500 }
501 }
502}
503#endif
504
Andi Kleena8ab26f2005-04-16 15:25:19 -0700505/*
506 * Kick the secondary to wake up.
507 */
508static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509{
510 unsigned long send_status = 0, accept_status = 0;
511 int maxlvt, timeout, num_starts, j;
512
513 Dprintk("Asserting INIT.\n");
514
515 /*
516 * Turn INIT on target chip
517 */
518 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
519
520 /*
521 * Send IPI
522 */
523 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
524 | APIC_DM_INIT);
525
526 Dprintk("Waiting for send to finish...\n");
527 timeout = 0;
528 do {
529 Dprintk("+");
530 udelay(100);
531 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
532 } while (send_status && (timeout++ < 1000));
533
534 mdelay(10);
535
536 Dprintk("Deasserting INIT.\n");
537
538 /* Target chip */
539 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
540
541 /* Send IPI */
542 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
543
544 Dprintk("Waiting for send to finish...\n");
545 timeout = 0;
546 do {
547 Dprintk("+");
548 udelay(100);
549 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
550 } while (send_status && (timeout++ < 1000));
551
552 atomic_set(&init_deasserted, 1);
553
554 /*
555 * Should we send STARTUP IPIs ?
556 *
557 * Determine this based on the APIC version.
558 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
559 */
560 if (APIC_INTEGRATED(apic_version[phys_apicid]))
561 num_starts = 2;
562 else
563 num_starts = 0;
564
565 /*
566 * Run STARTUP IPI loop.
567 */
568 Dprintk("#startup loops: %d.\n", num_starts);
569
570 maxlvt = get_maxlvt();
571
572 for (j = 1; j <= num_starts; j++) {
573 Dprintk("Sending STARTUP #%d.\n",j);
574 apic_read_around(APIC_SPIV);
575 apic_write(APIC_ESR, 0);
576 apic_read(APIC_ESR);
577 Dprintk("After apic_write.\n");
578
579 /*
580 * STARTUP IPI
581 */
582
583 /* Target chip */
584 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
585
586 /* Boot on the stack */
587 /* Kick the second */
588 apic_write_around(APIC_ICR, APIC_DM_STARTUP
589 | (start_rip >> 12));
590
591 /*
592 * Give the other CPU some time to accept the IPI.
593 */
594 udelay(300);
595
596 Dprintk("Startup point 1.\n");
597
598 Dprintk("Waiting for send to finish...\n");
599 timeout = 0;
600 do {
601 Dprintk("+");
602 udelay(100);
603 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
604 } while (send_status && (timeout++ < 1000));
605
606 /*
607 * Give the other CPU some time to accept the IPI.
608 */
609 udelay(200);
610 /*
611 * Due to the Pentium erratum 3AP.
612 */
613 if (maxlvt > 3) {
614 apic_read_around(APIC_SPIV);
615 apic_write(APIC_ESR, 0);
616 }
617 accept_status = (apic_read(APIC_ESR) & 0xEF);
618 if (send_status || accept_status)
619 break;
620 }
621 Dprintk("After Startup.\n");
622
623 if (send_status)
624 printk(KERN_ERR "APIC never delivered???\n");
625 if (accept_status)
626 printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
627
628 return (send_status | accept_status);
629}
630
Andi Kleena8ab26f2005-04-16 15:25:19 -0700631/*
632 * Boot one CPU.
633 */
634static int __cpuinit do_boot_cpu(int cpu, int apicid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635{
636 struct task_struct *idle;
637 unsigned long boot_error;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700638 int timeout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639 unsigned long start_rip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 /*
641 * We can't use kernel_thread since we must avoid to
642 * reschedule the child.
643 */
644 idle = fork_idle(cpu);
Andi Kleena8ab26f2005-04-16 15:25:19 -0700645 if (IS_ERR(idle)) {
646 printk("failed fork for CPU %d\n", cpu);
647 return PTR_ERR(idle);
648 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649
650 cpu_pda[cpu].pcurrent = idle;
651
652 start_rip = setup_trampoline();
653
Andi Kleena8ab26f2005-04-16 15:25:19 -0700654 init_rsp = idle->thread.rsp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655 per_cpu(init_tss,cpu).rsp0 = init_rsp;
656 initial_code = start_secondary;
657 clear_ti_thread_flag(idle->thread_info, TIF_FORK);
658
Andi Kleena8ab26f2005-04-16 15:25:19 -0700659 printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 start_rip, init_rsp);
661
662 /*
663 * This grunge runs the startup process for
664 * the targeted processor.
665 */
666
667 atomic_set(&init_deasserted, 0);
668
669 Dprintk("Setting warm reset code and vector.\n");
670
671 CMOS_WRITE(0xa, 0xf);
672 local_flush_tlb();
673 Dprintk("1.\n");
674 *((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4;
675 Dprintk("2.\n");
676 *((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf;
677 Dprintk("3.\n");
678
679 /*
680 * Be paranoid about clearing APIC errors.
681 */
682 if (APIC_INTEGRATED(apic_version[apicid])) {
683 apic_read_around(APIC_SPIV);
684 apic_write(APIC_ESR, 0);
685 apic_read(APIC_ESR);
686 }
687
688 /*
689 * Status is now clean
690 */
691 boot_error = 0;
692
693 /*
694 * Starting actual IPI sequence...
695 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700696 boot_error = wakeup_secondary_via_INIT(apicid, start_rip);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697
698 if (!boot_error) {
699 /*
700 * allow APs to start initializing.
701 */
702 Dprintk("Before Callout %d.\n", cpu);
703 cpu_set(cpu, cpu_callout_map);
704 Dprintk("After Callout %d.\n", cpu);
705
706 /*
707 * Wait 5s total for a response
708 */
709 for (timeout = 0; timeout < 50000; timeout++) {
710 if (cpu_isset(cpu, cpu_callin_map))
711 break; /* It has booted */
712 udelay(100);
713 }
714
715 if (cpu_isset(cpu, cpu_callin_map)) {
716 /* number CPUs logically, starting from 1 (BSP is 0) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717 Dprintk("CPU has booted.\n");
718 } else {
719 boot_error = 1;
720 if (*((volatile unsigned char *)phys_to_virt(SMP_TRAMPOLINE_BASE))
721 == 0xA5)
722 /* trampoline started but...? */
723 printk("Stuck ??\n");
724 else
725 /* trampoline code not run */
726 printk("Not responding.\n");
727#if APIC_DEBUG
728 inquire_remote_apic(apicid);
729#endif
730 }
731 }
732 if (boot_error) {
733 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
734 clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700735 cpu_clear(cpu, cpu_present_map);
736 cpu_clear(cpu, cpu_possible_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 x86_cpu_to_apicid[cpu] = BAD_APICID;
738 x86_cpu_to_log_apicid[cpu] = BAD_APICID;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700739 return -EIO;
740 }
741
742 return 0;
743}
744
745cycles_t cacheflush_time;
746unsigned long cache_decay_ticks;
747
748/*
749 * Construct cpu_sibling_map[], so that we can tell the sibling CPU
750 * on SMT systems efficiently.
751 */
752static __cpuinit void detect_siblings(void)
753{
754 int cpu;
755
756 for (cpu = 0; cpu < NR_CPUS; cpu++) {
757 cpus_clear(cpu_sibling_map[cpu]);
758 cpus_clear(cpu_core_map[cpu]);
759 }
760
761 for_each_online_cpu (cpu) {
762 struct cpuinfo_x86 *c = cpu_data + cpu;
763 int siblings = 0;
764 int i;
765 if (smp_num_siblings > 1) {
766 for_each_online_cpu (i) {
Siddha, Suresh Bd31ddaa2005-04-16 15:25:20 -0700767 if (cpu_core_id[cpu] == cpu_core_id[i]) {
Andi Kleena8ab26f2005-04-16 15:25:19 -0700768 siblings++;
769 cpu_set(i, cpu_sibling_map[cpu]);
770 }
771 }
772 } else {
773 siblings++;
774 cpu_set(cpu, cpu_sibling_map[cpu]);
775 }
776
777 if (siblings != smp_num_siblings) {
778 printk(KERN_WARNING
779 "WARNING: %d siblings found for CPU%d, should be %d\n",
780 siblings, cpu, smp_num_siblings);
781 smp_num_siblings = siblings;
782 }
783 if (c->x86_num_cores > 1) {
784 for_each_online_cpu(i) {
785 if (phys_proc_id[cpu] == phys_proc_id[i])
786 cpu_set(i, cpu_core_map[cpu]);
787 }
788 } else
789 cpu_core_map[cpu] = cpu_sibling_map[cpu];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790 }
791}
792
Andi Kleena8ab26f2005-04-16 15:25:19 -0700793/*
794 * Cleanup possible dangling ends...
795 */
796static __cpuinit void smp_cleanup_boot(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700799 * Paranoid: Set warm reset code and vector here back
800 * to default values.
801 */
802 CMOS_WRITE(0, 0xf);
803
804 /*
805 * Reset trampoline flag
806 */
807 *((volatile int *) phys_to_virt(0x467)) = 0;
808
809#ifndef CONFIG_HOTPLUG_CPU
810 /*
811 * Free pages reserved for SMP bootup.
812 * When you add hotplug CPU support later remove this
813 * Note there is more work to be done for later CPU bootup.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814 */
815
Andi Kleena8ab26f2005-04-16 15:25:19 -0700816 free_page((unsigned long) __va(PAGE_SIZE));
817 free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE));
818#endif
819}
820
821/*
822 * Fall back to non SMP mode after errors.
823 *
824 * RED-PEN audit/test this more. I bet there is more state messed up here.
825 */
826static __cpuinit void disable_smp(void)
827{
828 cpu_present_map = cpumask_of_cpu(0);
829 cpu_possible_map = cpumask_of_cpu(0);
830 if (smp_found_config)
831 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
832 else
833 phys_cpu_present_map = physid_mask_of_physid(0);
834 cpu_set(0, cpu_sibling_map[0]);
835 cpu_set(0, cpu_core_map[0]);
836}
837
838/*
839 * Handle user cpus=... parameter.
840 */
841static __cpuinit void enforce_max_cpus(unsigned max_cpus)
842{
843 int i, k;
844 k = 0;
845 for (i = 0; i < NR_CPUS; i++) {
846 if (!cpu_possible(i))
847 continue;
848 if (++k > max_cpus) {
849 cpu_clear(i, cpu_possible_map);
850 cpu_clear(i, cpu_present_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851 }
852 }
853}
854
855/*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700856 * Various sanity checks.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700858static int __cpuinit smp_sanity_check(unsigned max_cpus)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
861 printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
862 hard_smp_processor_id());
863 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
864 }
865
866 /*
867 * If we couldn't find an SMP configuration at boot time,
868 * get out of here now!
869 */
870 if (!smp_found_config) {
871 printk(KERN_NOTICE "SMP motherboard not detected.\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -0700872 disable_smp();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873 if (APIC_init_uniprocessor())
874 printk(KERN_NOTICE "Local APIC not detected."
875 " Using dummy APIC emulation.\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -0700876 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877 }
878
879 /*
880 * Should not be necessary because the MP table should list the boot
881 * CPU too, but we do it for the sake of robustness anyway.
882 */
883 if (!physid_isset(boot_cpu_id, phys_cpu_present_map)) {
884 printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n",
885 boot_cpu_id);
886 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
887 }
888
889 /*
890 * If we couldn't find a local APIC, then get out of here now!
891 */
892 if (APIC_INTEGRATED(apic_version[boot_cpu_id]) && !cpu_has_apic) {
893 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
894 boot_cpu_id);
895 printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -0700896 nr_ioapics = 0;
897 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898 }
899
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 /*
901 * If SMP should be disabled, then really disable it!
902 */
903 if (!max_cpus) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -0700905 nr_ioapics = 0;
906 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907 }
908
Andi Kleena8ab26f2005-04-16 15:25:19 -0700909 return 0;
910}
911
912/*
913 * Prepare for SMP bootup. The MP table or ACPI has been read
914 * earlier. Just do some sanity checking here and enable APIC mode.
915 */
916void __cpuinit smp_prepare_cpus(unsigned int max_cpus)
917{
918 int i;
919
920 nmi_watchdog_default();
921 current_cpu_data = boot_cpu_data;
922 current_thread_info()->cpu = 0; /* needed? */
923
924 enforce_max_cpus(max_cpus);
925
926 /*
927 * Fill in cpu_present_mask
928 */
929 for (i = 0; i < NR_CPUS; i++) {
930 int apicid = cpu_present_to_apicid(i);
931 if (physid_isset(apicid, phys_cpu_present_map)) {
932 cpu_set(i, cpu_present_map);
933 /* possible map would be different if we supported real
934 CPU hotplug. */
935 cpu_set(i, cpu_possible_map);
936 }
937 }
938
939 if (smp_sanity_check(max_cpus) < 0) {
940 printk(KERN_INFO "SMP disabled\n");
941 disable_smp();
942 return;
943 }
944
945
946 /*
947 * Switch from PIC to APIC mode.
948 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949 connect_bsp_APIC();
950 setup_local_APIC();
951
Andi Kleena8ab26f2005-04-16 15:25:19 -0700952 if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
953 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
954 GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
955 /* Or can we switch back to PIC here? */
956 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957
958 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700959 * Now start the IO-APICs
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960 */
961 if (!skip_ioapic_setup && nr_ioapics)
962 setup_IO_APIC();
963 else
964 nr_ioapics = 0;
965
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700967 * Set up local APIC timer on boot CPU.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969
Andi Kleena8ab26f2005-04-16 15:25:19 -0700970 setup_boot_APIC_clock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971}
972
Andi Kleena8ab26f2005-04-16 15:25:19 -0700973/*
974 * Early setup to make printk work.
975 */
976void __init smp_prepare_boot_cpu(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977{
Andi Kleena8ab26f2005-04-16 15:25:19 -0700978 int me = smp_processor_id();
979 cpu_set(me, cpu_online_map);
980 cpu_set(me, cpu_callout_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981}
982
Andi Kleena8ab26f2005-04-16 15:25:19 -0700983/*
984 * Entry point to boot a CPU.
985 *
986 * This is all __cpuinit, not __devinit for now because we don't support
987 * CPU hotplug (yet).
988 */
989int __cpuinit __cpu_up(unsigned int cpu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700990{
Andi Kleena8ab26f2005-04-16 15:25:19 -0700991 int err;
992 int apicid = cpu_present_to_apicid(cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993
Andi Kleena8ab26f2005-04-16 15:25:19 -0700994 WARN_ON(irqs_disabled());
995
996 Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu);
997
998 if (apicid == BAD_APICID || apicid == boot_cpu_id ||
999 !physid_isset(apicid, phys_cpu_present_map)) {
1000 printk("__cpu_up: bad cpu %d\n", cpu);
1001 return -EINVAL;
1002 }
Andi Kleena8ab26f2005-04-16 15:25:19 -07001003
1004 /* Boot it! */
1005 err = do_boot_cpu(cpu, apicid);
1006 if (err < 0) {
Andi Kleena8ab26f2005-04-16 15:25:19 -07001007 Dprintk("do_boot_cpu failed %d\n", err);
1008 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009 }
1010
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011 /* Unleash the CPU! */
1012 Dprintk("waiting for cpu %d\n", cpu);
1013
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014 while (!cpu_isset(cpu, cpu_online_map))
Andi Kleena8ab26f2005-04-16 15:25:19 -07001015 cpu_relax();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 return 0;
1017}
1018
Andi Kleena8ab26f2005-04-16 15:25:19 -07001019/*
1020 * Finish the SMP boot.
1021 */
1022void __cpuinit smp_cpus_done(unsigned int max_cpus)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023{
Andi Kleena8ab26f2005-04-16 15:25:19 -07001024 zap_low_mappings();
1025 smp_cleanup_boot();
1026
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027#ifdef CONFIG_X86_IO_APIC
1028 setup_ioapic_dest();
1029#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030
Andi Kleena8ab26f2005-04-16 15:25:19 -07001031 detect_siblings();
1032 time_init_gtod();
Andi Kleen75152112005-05-16 21:53:34 -07001033
1034 check_nmi_watchdog();
Andi Kleena8ab26f2005-04-16 15:25:19 -07001035}