blob: bc98a6722cbaf0133c14d686f6119431e0ab9e86 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * x86 SMP booting functions
3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
6 * Copyright 2001 Andi Kleen, SuSE Labs.
7 *
8 * Much of the core SMP work is based on previous work by Thomas Radke, to
9 * whom a great many thanks are extended.
10 *
11 * Thanks to Intel for making available several different Pentium,
12 * Pentium Pro and Pentium-II/Xeon MP machines.
13 * Original development of Linux SMP code supported by Caldera.
14 *
Andi Kleena8ab26f2005-04-16 15:25:19 -070015 * This code is released under the GNU General Public License version 2
Linus Torvalds1da177e2005-04-16 15:20:36 -070016 *
17 * Fixes
18 * Felix Koop : NR_CPUS used properly
19 * Jose Renau : Handle single CPU case.
20 * Alan Cox : By repeated request 8) - Total BogoMIP report.
21 * Greg Wright : Fix for kernel stacks panic.
22 * Erich Boleyn : MP v1.4 and additional changes.
23 * Matthias Sattler : Changes for 2.1 kernel map.
24 * Michel Lespinasse : Changes for 2.1 kernel map.
25 * Michael Chastain : Change trampoline.S to gnu as.
26 * Alan Cox : Dumb bug: 'B' step PPro's are fine
27 * Ingo Molnar : Added APIC timers, based on code
28 * from Jose Renau
29 * Ingo Molnar : various cleanups and rewrites
30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
32 * Andi Kleen : Changed for SMP boot into long mode.
Andi Kleena8ab26f2005-04-16 15:25:19 -070033 * Rusty Russell : Hacked into shape for new "hotplug" boot process.
34 * Andi Kleen : Converted to new state machine.
35 * Various cleanups.
36 * Probably mostly hotplug CPU ready now.
Linus Torvalds1da177e2005-04-16 15:20:36 -070037 */
38
Andi Kleena8ab26f2005-04-16 15:25:19 -070039
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/config.h>
41#include <linux/init.h>
42
43#include <linux/mm.h>
44#include <linux/kernel_stat.h>
45#include <linux/smp_lock.h>
46#include <linux/irq.h>
47#include <linux/bootmem.h>
48#include <linux/thread_info.h>
49#include <linux/module.h>
50
51#include <linux/delay.h>
52#include <linux/mc146818rtc.h>
53#include <asm/mtrr.h>
54#include <asm/pgalloc.h>
55#include <asm/desc.h>
56#include <asm/kdebug.h>
57#include <asm/tlbflush.h>
58#include <asm/proto.h>
Andi Kleen75152112005-05-16 21:53:34 -070059#include <asm/nmi.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070060
61/* Number of siblings per CPU package */
62int smp_num_siblings = 1;
63/* Package ID of each logical CPU */
64u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
Andi Kleen3dd9d512005-04-16 15:25:15 -070065u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
Linus Torvalds1da177e2005-04-16 15:20:36 -070066EXPORT_SYMBOL(phys_proc_id);
Andi Kleen3dd9d512005-04-16 15:25:15 -070067EXPORT_SYMBOL(cpu_core_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -070068
69/* Bitmask of currently online CPUs */
70cpumask_t cpu_online_map;
71
Andi Kleena8ab26f2005-04-16 15:25:19 -070072EXPORT_SYMBOL(cpu_online_map);
73
74/*
75 * Private maps to synchronize booting between AP and BP.
76 * Probably not needed anymore, but it makes for easier debugging. -AK
77 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070078cpumask_t cpu_callin_map;
79cpumask_t cpu_callout_map;
Andi Kleena8ab26f2005-04-16 15:25:19 -070080
81cpumask_t cpu_possible_map;
82EXPORT_SYMBOL(cpu_possible_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
84/* Per CPU bogomips and other parameters */
85struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
86
Andi Kleena8ab26f2005-04-16 15:25:19 -070087/* Set when the idlers are all forked */
88int smp_threads_ready;
89
Linus Torvalds1da177e2005-04-16 15:20:36 -070090cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
Andi Kleen3dd9d512005-04-16 15:25:15 -070091cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
Andi Kleen2df9fa32005-05-20 14:27:59 -070092EXPORT_SYMBOL(cpu_core_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -070093
94/*
95 * Trampoline 80x86 program as an array.
96 */
97
Andi Kleena8ab26f2005-04-16 15:25:19 -070098extern unsigned char trampoline_data[];
99extern unsigned char trampoline_end[];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100
101/*
102 * Currently trivial. Write the real->protected mode
103 * bootstrap into the page concerned. The caller
104 * has made sure it's suitably aligned.
105 */
106
Andi Kleena8ab26f2005-04-16 15:25:19 -0700107static unsigned long __cpuinit setup_trampoline(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108{
109 void *tramp = __va(SMP_TRAMPOLINE_BASE);
110 memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
111 return virt_to_phys(tramp);
112}
113
114/*
115 * The bootstrap kernel entry code has set these up. Save them for
116 * a given CPU
117 */
118
Andi Kleena8ab26f2005-04-16 15:25:19 -0700119static void __cpuinit smp_store_cpu_info(int id)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120{
121 struct cpuinfo_x86 *c = cpu_data + id;
122
123 *c = boot_cpu_data;
124 identify_cpu(c);
Andi Kleendda50e72005-05-16 21:53:25 -0700125 print_cpu_info(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126}
127
128/*
Andi Kleendda50e72005-05-16 21:53:25 -0700129 * New Funky TSC sync algorithm borrowed from IA64.
130 * Main advantage is that it doesn't reset the TSCs fully and
131 * in general looks more robust and it works better than my earlier
132 * attempts. I believe it was written by David Mosberger. Some minor
133 * adjustments for x86-64 by me -AK
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 *
Andi Kleendda50e72005-05-16 21:53:25 -0700135 * Original comment reproduced below.
136 *
137 * Synchronize TSC of the current (slave) CPU with the TSC of the
138 * MASTER CPU (normally the time-keeper CPU). We use a closed loop to
139 * eliminate the possibility of unaccounted-for errors (such as
140 * getting a machine check in the middle of a calibration step). The
141 * basic idea is for the slave to ask the master what itc value it has
142 * and to read its own itc before and after the master responds. Each
143 * iteration gives us three timestamps:
144 *
145 * slave master
146 *
147 * t0 ---\
148 * ---\
149 * --->
150 * tm
151 * /---
152 * /---
153 * t1 <---
154 *
155 *
156 * The goal is to adjust the slave's TSC such that tm falls exactly
157 * half-way between t0 and t1. If we achieve this, the clocks are
158 * synchronized provided the interconnect between the slave and the
159 * master is symmetric. Even if the interconnect were asymmetric, we
160 * would still know that the synchronization error is smaller than the
161 * roundtrip latency (t0 - t1).
162 *
163 * When the interconnect is quiet and symmetric, this lets us
164 * synchronize the TSC to within one or two cycles. However, we can
165 * only *guarantee* that the synchronization is accurate to within a
166 * round-trip time, which is typically in the range of several hundred
167 * cycles (e.g., ~500 cycles). In practice, this means that the TSCs
168 * are usually almost perfectly synchronized, but we shouldn't assume
169 * that the accuracy is much better than half a micro second or so.
170 *
171 * [there are other errors like the latency of RDTSC and of the
172 * WRMSR. These can also account to hundreds of cycles. So it's
173 * probably worse. It claims 153 cycles error on a dual Opteron,
174 * but I suspect the numbers are actually somewhat worse -AK]
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 */
176
Andi Kleendda50e72005-05-16 21:53:25 -0700177#define MASTER 0
178#define SLAVE (SMP_CACHE_BYTES/8)
179
180/* Intentionally don't use cpu_relax() while TSC synchronization
181 because we don't want to go into funky power save modi or cause
182 hypervisors to schedule us away. Going to sleep would likely affect
183 latency and low latency is the primary objective here. -AK */
184#define no_cpu_relax() barrier()
185
Andi Kleena8ab26f2005-04-16 15:25:19 -0700186static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
Andi Kleendda50e72005-05-16 21:53:25 -0700187static volatile __cpuinitdata unsigned long go[SLAVE + 1];
188static int notscsync __cpuinitdata;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189
Andi Kleendda50e72005-05-16 21:53:25 -0700190#undef DEBUG_TSC_SYNC
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191
Andi Kleendda50e72005-05-16 21:53:25 -0700192#define NUM_ROUNDS 64 /* magic value */
193#define NUM_ITERS 5 /* likewise */
194
195/* Callback on boot CPU */
196static __cpuinit void sync_master(void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197{
Andi Kleendda50e72005-05-16 21:53:25 -0700198 unsigned long flags, i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199
Andi Kleendda50e72005-05-16 21:53:25 -0700200 if (smp_processor_id() != boot_cpu_id)
Andi Kleena8ab26f2005-04-16 15:25:19 -0700201 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202
Andi Kleendda50e72005-05-16 21:53:25 -0700203 go[MASTER] = 0;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700204
Andi Kleendda50e72005-05-16 21:53:25 -0700205 local_irq_save(flags);
206 {
207 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
208 while (!go[MASTER])
209 no_cpu_relax();
210 go[MASTER] = 0;
211 rdtscll(go[SLAVE]);
212 }
Andi Kleena8ab26f2005-04-16 15:25:19 -0700213 }
Andi Kleendda50e72005-05-16 21:53:25 -0700214 local_irq_restore(flags);
Andi Kleena8ab26f2005-04-16 15:25:19 -0700215}
216
Andi Kleendda50e72005-05-16 21:53:25 -0700217/*
218 * Return the number of cycles by which our tsc differs from the tsc
219 * on the master (time-keeper) CPU. A positive number indicates our
220 * tsc is ahead of the master, negative that it is behind.
221 */
222static inline long
223get_delta(long *rt, long *master)
224{
225 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
226 unsigned long tcenter, t0, t1, tm;
227 int i;
228
229 for (i = 0; i < NUM_ITERS; ++i) {
230 rdtscll(t0);
231 go[MASTER] = 1;
232 while (!(tm = go[SLAVE]))
233 no_cpu_relax();
234 go[SLAVE] = 0;
235 rdtscll(t1);
236
237 if (t1 - t0 < best_t1 - best_t0)
238 best_t0 = t0, best_t1 = t1, best_tm = tm;
239 }
240
241 *rt = best_t1 - best_t0;
242 *master = best_tm - best_t0;
243
244 /* average best_t0 and best_t1 without overflow: */
245 tcenter = (best_t0/2 + best_t1/2);
246 if (best_t0 % 2 + best_t1 % 2 == 2)
247 ++tcenter;
248 return tcenter - best_tm;
249}
250
251static __cpuinit void sync_tsc(void)
252{
253 int i, done = 0;
254 long delta, adj, adjust_latency = 0;
255 unsigned long flags, rt, master_time_stamp, bound;
256#if DEBUG_TSC_SYNC
257 static struct syncdebug {
258 long rt; /* roundtrip time */
259 long master; /* master's timestamp */
260 long diff; /* difference between midpoint and master's timestamp */
261 long lat; /* estimate of tsc adjustment latency */
262 } t[NUM_ROUNDS] __cpuinitdata;
263#endif
264
265 go[MASTER] = 1;
266
267 smp_call_function(sync_master, NULL, 1, 0);
268
269 while (go[MASTER]) /* wait for master to be ready */
270 no_cpu_relax();
271
272 spin_lock_irqsave(&tsc_sync_lock, flags);
273 {
274 for (i = 0; i < NUM_ROUNDS; ++i) {
275 delta = get_delta(&rt, &master_time_stamp);
276 if (delta == 0) {
277 done = 1; /* let's lock on to this... */
278 bound = rt;
279 }
280
281 if (!done) {
282 unsigned long t;
283 if (i > 0) {
284 adjust_latency += -delta;
285 adj = -delta + adjust_latency/4;
286 } else
287 adj = -delta;
288
289 rdtscll(t);
290 wrmsrl(MSR_IA32_TSC, t + adj);
291 }
292#if DEBUG_TSC_SYNC
293 t[i].rt = rt;
294 t[i].master = master_time_stamp;
295 t[i].diff = delta;
296 t[i].lat = adjust_latency/4;
297#endif
298 }
299 }
300 spin_unlock_irqrestore(&tsc_sync_lock, flags);
301
302#if DEBUG_TSC_SYNC
303 for (i = 0; i < NUM_ROUNDS; ++i)
304 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
305 t[i].rt, t[i].master, t[i].diff, t[i].lat);
306#endif
307
308 printk(KERN_INFO
309 "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
310 "maxerr %lu cycles)\n",
311 smp_processor_id(), boot_cpu_id, delta, rt);
312}
313
314static void __cpuinit tsc_sync_wait(void)
315{
316 if (notscsync || !cpu_has_tsc)
317 return;
318 printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(),
319 boot_cpu_id);
320 sync_tsc();
321}
322
323static __init int notscsync_setup(char *s)
324{
325 notscsync = 1;
326 return 0;
327}
328__setup("notscsync", notscsync_setup);
329
Andi Kleena8ab26f2005-04-16 15:25:19 -0700330static atomic_t init_deasserted __cpuinitdata;
331
332/*
333 * Report back to the Boot Processor.
334 * Running on AP.
335 */
336void __cpuinit smp_callin(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337{
338 int cpuid, phys_id;
339 unsigned long timeout;
340
341 /*
342 * If waken up by an INIT in an 82489DX configuration
343 * we may get here before an INIT-deassert IPI reaches
344 * our local APIC. We have to wait for the IPI or we'll
345 * lock up on an APIC access.
346 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700347 while (!atomic_read(&init_deasserted))
348 cpu_relax();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349
350 /*
351 * (This works even if the APIC is not enabled.)
352 */
353 phys_id = GET_APIC_ID(apic_read(APIC_ID));
354 cpuid = smp_processor_id();
355 if (cpu_isset(cpuid, cpu_callin_map)) {
356 panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
357 phys_id, cpuid);
358 }
359 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
360
361 /*
362 * STARTUP IPIs are fragile beasts as they might sometimes
363 * trigger some glue motherboard logic. Complete APIC bus
364 * silence for 1 second, this overestimates the time the
365 * boot CPU is spending to send the up to 2 STARTUP IPIs
366 * by a factor of two. This should be enough.
367 */
368
369 /*
370 * Waiting 2s total for startup (udelay is not yet working)
371 */
372 timeout = jiffies + 2*HZ;
373 while (time_before(jiffies, timeout)) {
374 /*
375 * Has the boot CPU finished it's STARTUP sequence?
376 */
377 if (cpu_isset(cpuid, cpu_callout_map))
378 break;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700379 cpu_relax();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380 }
381
382 if (!time_before(jiffies, timeout)) {
383 panic("smp_callin: CPU%d started up but did not get a callout!\n",
384 cpuid);
385 }
386
387 /*
388 * the boot CPU has finished the init stage and is spinning
389 * on callin_map until we finish. We are free to set up this
390 * CPU, first the APIC. (this is probably redundant on most
391 * boards)
392 */
393
394 Dprintk("CALLIN, before setup_local_APIC().\n");
395 setup_local_APIC();
396
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 /*
398 * Get our bogomips.
399 */
400 calibrate_delay();
401 Dprintk("Stack at about %p\n",&cpuid);
402
403 disable_APIC_timer();
404
405 /*
406 * Save our processor parameters
407 */
408 smp_store_cpu_info(cpuid);
409
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410 /*
411 * Allow the master to continue.
412 */
413 cpu_set(cpuid, cpu_callin_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414}
415
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416/*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700417 * Setup code on secondary processor (after comming out of the trampoline)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700419void __cpuinit start_secondary(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420{
421 /*
422 * Dont put anything before smp_callin(), SMP
423 * booting is too fragile that we want to limit the
424 * things done here to the most necessary things.
425 */
426 cpu_init();
427 smp_callin();
428
429 /* otherwise gcc will move up the smp_processor_id before the cpu_init */
430 barrier();
431
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());
433 setup_secondary_APIC_clock();
434
Andi Kleena8ab26f2005-04-16 15:25:19 -0700435 Dprintk("cpu %d: enabling apic timer\n", smp_processor_id());
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436
437 if (nmi_watchdog == NMI_IO_APIC) {
438 disable_8259A_irq(0);
439 enable_NMI_through_LVT0(NULL);
440 enable_8259A_irq(0);
441 }
442
Andi Kleena8ab26f2005-04-16 15:25:19 -0700443 enable_APIC_timer();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444
445 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700446 * Allow the master to continue.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 cpu_set(smp_processor_id(), cpu_online_map);
Andi Kleena8ab26f2005-04-16 15:25:19 -0700449 mb();
450
Andi Kleendda50e72005-05-16 21:53:25 -0700451 /* Wait for TSC sync to not schedule things before.
452 We still process interrupts, which could see an inconsistent
453 time in that window unfortunately. */
454 tsc_sync_wait();
455
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 cpu_idle();
457}
458
Andi Kleena8ab26f2005-04-16 15:25:19 -0700459extern volatile unsigned long init_rsp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460extern void (*initial_code)(void);
461
462#if APIC_DEBUG
Andi Kleena8ab26f2005-04-16 15:25:19 -0700463static void inquire_remote_apic(int apicid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464{
465 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
466 char *names[] = { "ID", "VERSION", "SPIV" };
467 int timeout, status;
468
469 printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
470
471 for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
472 printk("... APIC #%d %s: ", apicid, names[i]);
473
474 /*
475 * Wait for idle.
476 */
477 apic_wait_icr_idle();
478
479 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
480 apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
481
482 timeout = 0;
483 do {
484 udelay(100);
485 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
486 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
487
488 switch (status) {
489 case APIC_ICR_RR_VALID:
490 status = apic_read(APIC_RRR);
491 printk("%08x\n", status);
492 break;
493 default:
494 printk("failed\n");
495 }
496 }
497}
498#endif
499
Andi Kleena8ab26f2005-04-16 15:25:19 -0700500/*
501 * Kick the secondary to wake up.
502 */
503static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504{
505 unsigned long send_status = 0, accept_status = 0;
506 int maxlvt, timeout, num_starts, j;
507
508 Dprintk("Asserting INIT.\n");
509
510 /*
511 * Turn INIT on target chip
512 */
513 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
514
515 /*
516 * Send IPI
517 */
518 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
519 | APIC_DM_INIT);
520
521 Dprintk("Waiting for send to finish...\n");
522 timeout = 0;
523 do {
524 Dprintk("+");
525 udelay(100);
526 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
527 } while (send_status && (timeout++ < 1000));
528
529 mdelay(10);
530
531 Dprintk("Deasserting INIT.\n");
532
533 /* Target chip */
534 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
535
536 /* Send IPI */
537 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
538
539 Dprintk("Waiting for send to finish...\n");
540 timeout = 0;
541 do {
542 Dprintk("+");
543 udelay(100);
544 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
545 } while (send_status && (timeout++ < 1000));
546
547 atomic_set(&init_deasserted, 1);
548
549 /*
550 * Should we send STARTUP IPIs ?
551 *
552 * Determine this based on the APIC version.
553 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
554 */
555 if (APIC_INTEGRATED(apic_version[phys_apicid]))
556 num_starts = 2;
557 else
558 num_starts = 0;
559
560 /*
561 * Run STARTUP IPI loop.
562 */
563 Dprintk("#startup loops: %d.\n", num_starts);
564
565 maxlvt = get_maxlvt();
566
567 for (j = 1; j <= num_starts; j++) {
568 Dprintk("Sending STARTUP #%d.\n",j);
569 apic_read_around(APIC_SPIV);
570 apic_write(APIC_ESR, 0);
571 apic_read(APIC_ESR);
572 Dprintk("After apic_write.\n");
573
574 /*
575 * STARTUP IPI
576 */
577
578 /* Target chip */
579 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
580
581 /* Boot on the stack */
582 /* Kick the second */
583 apic_write_around(APIC_ICR, APIC_DM_STARTUP
584 | (start_rip >> 12));
585
586 /*
587 * Give the other CPU some time to accept the IPI.
588 */
589 udelay(300);
590
591 Dprintk("Startup point 1.\n");
592
593 Dprintk("Waiting for send to finish...\n");
594 timeout = 0;
595 do {
596 Dprintk("+");
597 udelay(100);
598 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
599 } while (send_status && (timeout++ < 1000));
600
601 /*
602 * Give the other CPU some time to accept the IPI.
603 */
604 udelay(200);
605 /*
606 * Due to the Pentium erratum 3AP.
607 */
608 if (maxlvt > 3) {
609 apic_read_around(APIC_SPIV);
610 apic_write(APIC_ESR, 0);
611 }
612 accept_status = (apic_read(APIC_ESR) & 0xEF);
613 if (send_status || accept_status)
614 break;
615 }
616 Dprintk("After Startup.\n");
617
618 if (send_status)
619 printk(KERN_ERR "APIC never delivered???\n");
620 if (accept_status)
621 printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
622
623 return (send_status | accept_status);
624}
625
Andi Kleena8ab26f2005-04-16 15:25:19 -0700626/*
627 * Boot one CPU.
628 */
629static int __cpuinit do_boot_cpu(int cpu, int apicid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630{
631 struct task_struct *idle;
632 unsigned long boot_error;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700633 int timeout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634 unsigned long start_rip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635 /*
636 * We can't use kernel_thread since we must avoid to
637 * reschedule the child.
638 */
639 idle = fork_idle(cpu);
Andi Kleena8ab26f2005-04-16 15:25:19 -0700640 if (IS_ERR(idle)) {
641 printk("failed fork for CPU %d\n", cpu);
642 return PTR_ERR(idle);
643 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644
645 cpu_pda[cpu].pcurrent = idle;
646
647 start_rip = setup_trampoline();
648
Andi Kleena8ab26f2005-04-16 15:25:19 -0700649 init_rsp = idle->thread.rsp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650 per_cpu(init_tss,cpu).rsp0 = init_rsp;
651 initial_code = start_secondary;
652 clear_ti_thread_flag(idle->thread_info, TIF_FORK);
653
Andi Kleena8ab26f2005-04-16 15:25:19 -0700654 printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655 start_rip, init_rsp);
656
657 /*
658 * This grunge runs the startup process for
659 * the targeted processor.
660 */
661
662 atomic_set(&init_deasserted, 0);
663
664 Dprintk("Setting warm reset code and vector.\n");
665
666 CMOS_WRITE(0xa, 0xf);
667 local_flush_tlb();
668 Dprintk("1.\n");
669 *((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4;
670 Dprintk("2.\n");
671 *((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf;
672 Dprintk("3.\n");
673
674 /*
675 * Be paranoid about clearing APIC errors.
676 */
677 if (APIC_INTEGRATED(apic_version[apicid])) {
678 apic_read_around(APIC_SPIV);
679 apic_write(APIC_ESR, 0);
680 apic_read(APIC_ESR);
681 }
682
683 /*
684 * Status is now clean
685 */
686 boot_error = 0;
687
688 /*
689 * Starting actual IPI sequence...
690 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700691 boot_error = wakeup_secondary_via_INIT(apicid, start_rip);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692
693 if (!boot_error) {
694 /*
695 * allow APs to start initializing.
696 */
697 Dprintk("Before Callout %d.\n", cpu);
698 cpu_set(cpu, cpu_callout_map);
699 Dprintk("After Callout %d.\n", cpu);
700
701 /*
702 * Wait 5s total for a response
703 */
704 for (timeout = 0; timeout < 50000; timeout++) {
705 if (cpu_isset(cpu, cpu_callin_map))
706 break; /* It has booted */
707 udelay(100);
708 }
709
710 if (cpu_isset(cpu, cpu_callin_map)) {
711 /* number CPUs logically, starting from 1 (BSP is 0) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712 Dprintk("CPU has booted.\n");
713 } else {
714 boot_error = 1;
715 if (*((volatile unsigned char *)phys_to_virt(SMP_TRAMPOLINE_BASE))
716 == 0xA5)
717 /* trampoline started but...? */
718 printk("Stuck ??\n");
719 else
720 /* trampoline code not run */
721 printk("Not responding.\n");
722#if APIC_DEBUG
723 inquire_remote_apic(apicid);
724#endif
725 }
726 }
727 if (boot_error) {
728 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
729 clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700730 cpu_clear(cpu, cpu_present_map);
731 cpu_clear(cpu, cpu_possible_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 x86_cpu_to_apicid[cpu] = BAD_APICID;
733 x86_cpu_to_log_apicid[cpu] = BAD_APICID;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700734 return -EIO;
735 }
736
737 return 0;
738}
739
740cycles_t cacheflush_time;
741unsigned long cache_decay_ticks;
742
743/*
744 * Construct cpu_sibling_map[], so that we can tell the sibling CPU
745 * on SMT systems efficiently.
746 */
747static __cpuinit void detect_siblings(void)
748{
749 int cpu;
750
751 for (cpu = 0; cpu < NR_CPUS; cpu++) {
752 cpus_clear(cpu_sibling_map[cpu]);
753 cpus_clear(cpu_core_map[cpu]);
754 }
755
756 for_each_online_cpu (cpu) {
757 struct cpuinfo_x86 *c = cpu_data + cpu;
758 int siblings = 0;
759 int i;
760 if (smp_num_siblings > 1) {
761 for_each_online_cpu (i) {
Siddha, Suresh Bd31ddaa2005-04-16 15:25:20 -0700762 if (cpu_core_id[cpu] == cpu_core_id[i]) {
Andi Kleena8ab26f2005-04-16 15:25:19 -0700763 siblings++;
764 cpu_set(i, cpu_sibling_map[cpu]);
765 }
766 }
767 } else {
768 siblings++;
769 cpu_set(cpu, cpu_sibling_map[cpu]);
770 }
771
772 if (siblings != smp_num_siblings) {
773 printk(KERN_WARNING
774 "WARNING: %d siblings found for CPU%d, should be %d\n",
775 siblings, cpu, smp_num_siblings);
776 smp_num_siblings = siblings;
777 }
778 if (c->x86_num_cores > 1) {
779 for_each_online_cpu(i) {
780 if (phys_proc_id[cpu] == phys_proc_id[i])
781 cpu_set(i, cpu_core_map[cpu]);
782 }
783 } else
784 cpu_core_map[cpu] = cpu_sibling_map[cpu];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785 }
786}
787
Andi Kleena8ab26f2005-04-16 15:25:19 -0700788/*
789 * Cleanup possible dangling ends...
790 */
791static __cpuinit void smp_cleanup_boot(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700794 * Paranoid: Set warm reset code and vector here back
795 * to default values.
796 */
797 CMOS_WRITE(0, 0xf);
798
799 /*
800 * Reset trampoline flag
801 */
802 *((volatile int *) phys_to_virt(0x467)) = 0;
803
804#ifndef CONFIG_HOTPLUG_CPU
805 /*
806 * Free pages reserved for SMP bootup.
807 * When you add hotplug CPU support later remove this
808 * Note there is more work to be done for later CPU bootup.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 */
810
Andi Kleena8ab26f2005-04-16 15:25:19 -0700811 free_page((unsigned long) __va(PAGE_SIZE));
812 free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE));
813#endif
814}
815
816/*
817 * Fall back to non SMP mode after errors.
818 *
819 * RED-PEN audit/test this more. I bet there is more state messed up here.
820 */
Ashok Raje6982c62005-06-25 14:54:58 -0700821static __init void disable_smp(void)
Andi Kleena8ab26f2005-04-16 15:25:19 -0700822{
823 cpu_present_map = cpumask_of_cpu(0);
824 cpu_possible_map = cpumask_of_cpu(0);
825 if (smp_found_config)
826 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
827 else
828 phys_cpu_present_map = physid_mask_of_physid(0);
829 cpu_set(0, cpu_sibling_map[0]);
830 cpu_set(0, cpu_core_map[0]);
831}
832
833/*
834 * Handle user cpus=... parameter.
835 */
Ashok Raje6982c62005-06-25 14:54:58 -0700836static __init void enforce_max_cpus(unsigned max_cpus)
Andi Kleena8ab26f2005-04-16 15:25:19 -0700837{
838 int i, k;
839 k = 0;
840 for (i = 0; i < NR_CPUS; i++) {
841 if (!cpu_possible(i))
842 continue;
843 if (++k > max_cpus) {
844 cpu_clear(i, cpu_possible_map);
845 cpu_clear(i, cpu_present_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 }
847 }
848}
849
850/*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700851 * Various sanity checks.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852 */
Ashok Raje6982c62005-06-25 14:54:58 -0700853static int __init smp_sanity_check(unsigned max_cpus)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
856 printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
857 hard_smp_processor_id());
858 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
859 }
860
861 /*
862 * If we couldn't find an SMP configuration at boot time,
863 * get out of here now!
864 */
865 if (!smp_found_config) {
866 printk(KERN_NOTICE "SMP motherboard not detected.\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -0700867 disable_smp();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868 if (APIC_init_uniprocessor())
869 printk(KERN_NOTICE "Local APIC not detected."
870 " Using dummy APIC emulation.\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -0700871 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872 }
873
874 /*
875 * Should not be necessary because the MP table should list the boot
876 * CPU too, but we do it for the sake of robustness anyway.
877 */
878 if (!physid_isset(boot_cpu_id, phys_cpu_present_map)) {
879 printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n",
880 boot_cpu_id);
881 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
882 }
883
884 /*
885 * If we couldn't find a local APIC, then get out of here now!
886 */
887 if (APIC_INTEGRATED(apic_version[boot_cpu_id]) && !cpu_has_apic) {
888 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
889 boot_cpu_id);
890 printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -0700891 nr_ioapics = 0;
892 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 }
894
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 /*
896 * If SMP should be disabled, then really disable it!
897 */
898 if (!max_cpus) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -0700900 nr_ioapics = 0;
901 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 }
903
Andi Kleena8ab26f2005-04-16 15:25:19 -0700904 return 0;
905}
906
907/*
908 * Prepare for SMP bootup. The MP table or ACPI has been read
909 * earlier. Just do some sanity checking here and enable APIC mode.
910 */
Ashok Raje6982c62005-06-25 14:54:58 -0700911void __init smp_prepare_cpus(unsigned int max_cpus)
Andi Kleena8ab26f2005-04-16 15:25:19 -0700912{
913 int i;
914
915 nmi_watchdog_default();
916 current_cpu_data = boot_cpu_data;
917 current_thread_info()->cpu = 0; /* needed? */
918
919 enforce_max_cpus(max_cpus);
920
921 /*
922 * Fill in cpu_present_mask
923 */
924 for (i = 0; i < NR_CPUS; i++) {
925 int apicid = cpu_present_to_apicid(i);
926 if (physid_isset(apicid, phys_cpu_present_map)) {
927 cpu_set(i, cpu_present_map);
928 /* possible map would be different if we supported real
929 CPU hotplug. */
930 cpu_set(i, cpu_possible_map);
931 }
932 }
933
934 if (smp_sanity_check(max_cpus) < 0) {
935 printk(KERN_INFO "SMP disabled\n");
936 disable_smp();
937 return;
938 }
939
940
941 /*
942 * Switch from PIC to APIC mode.
943 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 connect_bsp_APIC();
945 setup_local_APIC();
946
Andi Kleena8ab26f2005-04-16 15:25:19 -0700947 if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
948 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
949 GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
950 /* Or can we switch back to PIC here? */
951 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952
953 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700954 * Now start the IO-APICs
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 */
956 if (!skip_ioapic_setup && nr_ioapics)
957 setup_IO_APIC();
958 else
959 nr_ioapics = 0;
960
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700962 * Set up local APIC timer on boot CPU.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964
Andi Kleena8ab26f2005-04-16 15:25:19 -0700965 setup_boot_APIC_clock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966}
967
Andi Kleena8ab26f2005-04-16 15:25:19 -0700968/*
969 * Early setup to make printk work.
970 */
971void __init smp_prepare_boot_cpu(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972{
Andi Kleena8ab26f2005-04-16 15:25:19 -0700973 int me = smp_processor_id();
974 cpu_set(me, cpu_online_map);
975 cpu_set(me, cpu_callout_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976}
977
Andi Kleena8ab26f2005-04-16 15:25:19 -0700978/*
979 * Entry point to boot a CPU.
980 *
981 * This is all __cpuinit, not __devinit for now because we don't support
982 * CPU hotplug (yet).
983 */
984int __cpuinit __cpu_up(unsigned int cpu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985{
Andi Kleena8ab26f2005-04-16 15:25:19 -0700986 int err;
987 int apicid = cpu_present_to_apicid(cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988
Andi Kleena8ab26f2005-04-16 15:25:19 -0700989 WARN_ON(irqs_disabled());
990
991 Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu);
992
993 if (apicid == BAD_APICID || apicid == boot_cpu_id ||
994 !physid_isset(apicid, phys_cpu_present_map)) {
995 printk("__cpu_up: bad cpu %d\n", cpu);
996 return -EINVAL;
997 }
Andi Kleena8ab26f2005-04-16 15:25:19 -0700998
999 /* Boot it! */
1000 err = do_boot_cpu(cpu, apicid);
1001 if (err < 0) {
Andi Kleena8ab26f2005-04-16 15:25:19 -07001002 Dprintk("do_boot_cpu failed %d\n", err);
1003 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 }
1005
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006 /* Unleash the CPU! */
1007 Dprintk("waiting for cpu %d\n", cpu);
1008
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009 while (!cpu_isset(cpu, cpu_online_map))
Andi Kleena8ab26f2005-04-16 15:25:19 -07001010 cpu_relax();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011 return 0;
1012}
1013
Andi Kleena8ab26f2005-04-16 15:25:19 -07001014/*
1015 * Finish the SMP boot.
1016 */
Ashok Raje6982c62005-06-25 14:54:58 -07001017void __init smp_cpus_done(unsigned int max_cpus)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018{
Andi Kleena8ab26f2005-04-16 15:25:19 -07001019 zap_low_mappings();
1020 smp_cleanup_boot();
1021
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022#ifdef CONFIG_X86_IO_APIC
1023 setup_ioapic_dest();
1024#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025
Andi Kleena8ab26f2005-04-16 15:25:19 -07001026 detect_siblings();
1027 time_init_gtod();
Andi Kleen75152112005-05-16 21:53:34 -07001028
1029 check_nmi_watchdog();
Andi Kleena8ab26f2005-04-16 15:25:19 -07001030}