blob: 5a3f955b6576e2394b37b912925e74a411a542b2 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * x86 SMP booting functions
3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
6 * Copyright 2001 Andi Kleen, SuSE Labs.
7 *
8 * Much of the core SMP work is based on previous work by Thomas Radke, to
9 * whom a great many thanks are extended.
10 *
11 * Thanks to Intel for making available several different Pentium,
12 * Pentium Pro and Pentium-II/Xeon MP machines.
13 * Original development of Linux SMP code supported by Caldera.
14 *
Andi Kleena8ab26f2005-04-16 15:25:19 -070015 * This code is released under the GNU General Public License version 2
Linus Torvalds1da177e2005-04-16 15:20:36 -070016 *
17 * Fixes
18 * Felix Koop : NR_CPUS used properly
19 * Jose Renau : Handle single CPU case.
20 * Alan Cox : By repeated request 8) - Total BogoMIP report.
21 * Greg Wright : Fix for kernel stacks panic.
22 * Erich Boleyn : MP v1.4 and additional changes.
23 * Matthias Sattler : Changes for 2.1 kernel map.
24 * Michel Lespinasse : Changes for 2.1 kernel map.
25 * Michael Chastain : Change trampoline.S to gnu as.
26 * Alan Cox : Dumb bug: 'B' step PPro's are fine
27 * Ingo Molnar : Added APIC timers, based on code
28 * from Jose Renau
29 * Ingo Molnar : various cleanups and rewrites
30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
32 * Andi Kleen : Changed for SMP boot into long mode.
Andi Kleena8ab26f2005-04-16 15:25:19 -070033 * Rusty Russell : Hacked into shape for new "hotplug" boot process.
34 * Andi Kleen : Converted to new state machine.
35 * Various cleanups.
36 * Probably mostly hotplug CPU ready now.
Ashok Raj76e4f662005-06-25 14:55:00 -070037 * Ashok Raj : CPU hotplug support
Linus Torvalds1da177e2005-04-16 15:20:36 -070038 */
39
Andi Kleena8ab26f2005-04-16 15:25:19 -070040
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/config.h>
42#include <linux/init.h>
43
44#include <linux/mm.h>
45#include <linux/kernel_stat.h>
46#include <linux/smp_lock.h>
47#include <linux/irq.h>
48#include <linux/bootmem.h>
49#include <linux/thread_info.h>
50#include <linux/module.h>
51
52#include <linux/delay.h>
53#include <linux/mc146818rtc.h>
54#include <asm/mtrr.h>
55#include <asm/pgalloc.h>
56#include <asm/desc.h>
57#include <asm/kdebug.h>
58#include <asm/tlbflush.h>
59#include <asm/proto.h>
Andi Kleen75152112005-05-16 21:53:34 -070060#include <asm/nmi.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070061
62/* Number of siblings per CPU package */
63int smp_num_siblings = 1;
64/* Package ID of each logical CPU */
65u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
Andi Kleen3dd9d512005-04-16 15:25:15 -070066u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
Linus Torvalds1da177e2005-04-16 15:20:36 -070067EXPORT_SYMBOL(phys_proc_id);
Andi Kleen3dd9d512005-04-16 15:25:15 -070068EXPORT_SYMBOL(cpu_core_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -070069
70/* Bitmask of currently online CPUs */
71cpumask_t cpu_online_map;
72
Andi Kleena8ab26f2005-04-16 15:25:19 -070073EXPORT_SYMBOL(cpu_online_map);
74
75/*
76 * Private maps to synchronize booting between AP and BP.
77 * Probably not needed anymore, but it makes for easier debugging. -AK
78 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070079cpumask_t cpu_callin_map;
80cpumask_t cpu_callout_map;
Andi Kleena8ab26f2005-04-16 15:25:19 -070081
82cpumask_t cpu_possible_map;
83EXPORT_SYMBOL(cpu_possible_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85/* Per CPU bogomips and other parameters */
86struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
87
Andi Kleena8ab26f2005-04-16 15:25:19 -070088/* Set when the idlers are all forked */
89int smp_threads_ready;
90
Linus Torvalds1da177e2005-04-16 15:20:36 -070091cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
Andi Kleen3dd9d512005-04-16 15:25:15 -070092cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
Andi Kleen2df9fa32005-05-20 14:27:59 -070093EXPORT_SYMBOL(cpu_core_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -070094
95/*
96 * Trampoline 80x86 program as an array.
97 */
98
Andi Kleena8ab26f2005-04-16 15:25:19 -070099extern unsigned char trampoline_data[];
100extern unsigned char trampoline_end[];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101
Ashok Raj76e4f662005-06-25 14:55:00 -0700102/* State of each CPU */
103DEFINE_PER_CPU(int, cpu_state) = { 0 };
104
105/*
106 * Store all idle threads, this can be reused instead of creating
107 * a new thread. Also avoids complicated thread destroy functionality
108 * for idle threads.
109 */
110struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
111
112#define get_idle_for_cpu(x) (idle_thread_array[(x)])
113#define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p))
114
115/*
116 * cpu_possible_map should be static, it cannot change as cpu's
117 * are onlined, or offlined. The reason is per-cpu data-structures
118 * are allocated by some modules at init time, and dont expect to
119 * do this dynamically on cpu arrival/departure.
120 * cpu_present_map on the other hand can change dynamically.
121 * In case when cpu_hotplug is not compiled, then we resort to current
122 * behaviour, which is cpu_possible == cpu_present.
123 * If cpu-hotplug is supported, then we need to preallocate for all
124 * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range.
125 * - Ashok Raj
126 */
127#ifdef CONFIG_HOTPLUG_CPU
128#define fixup_cpu_possible_map(x) cpu_set((x), cpu_possible_map)
129#else
130#define fixup_cpu_possible_map(x)
131#endif
132
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133/*
134 * Currently trivial. Write the real->protected mode
135 * bootstrap into the page concerned. The caller
136 * has made sure it's suitably aligned.
137 */
138
Andi Kleena8ab26f2005-04-16 15:25:19 -0700139static unsigned long __cpuinit setup_trampoline(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140{
141 void *tramp = __va(SMP_TRAMPOLINE_BASE);
142 memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
143 return virt_to_phys(tramp);
144}
145
146/*
147 * The bootstrap kernel entry code has set these up. Save them for
148 * a given CPU
149 */
150
Andi Kleena8ab26f2005-04-16 15:25:19 -0700151static void __cpuinit smp_store_cpu_info(int id)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152{
153 struct cpuinfo_x86 *c = cpu_data + id;
154
155 *c = boot_cpu_data;
156 identify_cpu(c);
Andi Kleendda50e72005-05-16 21:53:25 -0700157 print_cpu_info(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158}
159
160/*
Andi Kleendda50e72005-05-16 21:53:25 -0700161 * New Funky TSC sync algorithm borrowed from IA64.
162 * Main advantage is that it doesn't reset the TSCs fully and
163 * in general looks more robust and it works better than my earlier
164 * attempts. I believe it was written by David Mosberger. Some minor
165 * adjustments for x86-64 by me -AK
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 *
Andi Kleendda50e72005-05-16 21:53:25 -0700167 * Original comment reproduced below.
168 *
169 * Synchronize TSC of the current (slave) CPU with the TSC of the
170 * MASTER CPU (normally the time-keeper CPU). We use a closed loop to
171 * eliminate the possibility of unaccounted-for errors (such as
172 * getting a machine check in the middle of a calibration step). The
173 * basic idea is for the slave to ask the master what itc value it has
174 * and to read its own itc before and after the master responds. Each
175 * iteration gives us three timestamps:
176 *
177 * slave master
178 *
179 * t0 ---\
180 * ---\
181 * --->
182 * tm
183 * /---
184 * /---
185 * t1 <---
186 *
187 *
188 * The goal is to adjust the slave's TSC such that tm falls exactly
189 * half-way between t0 and t1. If we achieve this, the clocks are
190 * synchronized provided the interconnect between the slave and the
191 * master is symmetric. Even if the interconnect were asymmetric, we
192 * would still know that the synchronization error is smaller than the
193 * roundtrip latency (t0 - t1).
194 *
195 * When the interconnect is quiet and symmetric, this lets us
196 * synchronize the TSC to within one or two cycles. However, we can
197 * only *guarantee* that the synchronization is accurate to within a
198 * round-trip time, which is typically in the range of several hundred
199 * cycles (e.g., ~500 cycles). In practice, this means that the TSCs
200 * are usually almost perfectly synchronized, but we shouldn't assume
201 * that the accuracy is much better than half a micro second or so.
202 *
203 * [there are other errors like the latency of RDTSC and of the
204 * WRMSR. These can also account to hundreds of cycles. So it's
205 * probably worse. It claims 153 cycles error on a dual Opteron,
206 * but I suspect the numbers are actually somewhat worse -AK]
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 */
208
Andi Kleendda50e72005-05-16 21:53:25 -0700209#define MASTER 0
210#define SLAVE (SMP_CACHE_BYTES/8)
211
212/* Intentionally don't use cpu_relax() while TSC synchronization
213 because we don't want to go into funky power save modi or cause
214 hypervisors to schedule us away. Going to sleep would likely affect
215 latency and low latency is the primary objective here. -AK */
216#define no_cpu_relax() barrier()
217
Andi Kleena8ab26f2005-04-16 15:25:19 -0700218static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
Andi Kleendda50e72005-05-16 21:53:25 -0700219static volatile __cpuinitdata unsigned long go[SLAVE + 1];
220static int notscsync __cpuinitdata;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221
Andi Kleendda50e72005-05-16 21:53:25 -0700222#undef DEBUG_TSC_SYNC
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223
Andi Kleendda50e72005-05-16 21:53:25 -0700224#define NUM_ROUNDS 64 /* magic value */
225#define NUM_ITERS 5 /* likewise */
226
227/* Callback on boot CPU */
228static __cpuinit void sync_master(void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229{
Andi Kleendda50e72005-05-16 21:53:25 -0700230 unsigned long flags, i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231
Andi Kleendda50e72005-05-16 21:53:25 -0700232 if (smp_processor_id() != boot_cpu_id)
Andi Kleena8ab26f2005-04-16 15:25:19 -0700233 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234
Andi Kleendda50e72005-05-16 21:53:25 -0700235 go[MASTER] = 0;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700236
Andi Kleendda50e72005-05-16 21:53:25 -0700237 local_irq_save(flags);
238 {
239 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
240 while (!go[MASTER])
241 no_cpu_relax();
242 go[MASTER] = 0;
243 rdtscll(go[SLAVE]);
244 }
Andi Kleena8ab26f2005-04-16 15:25:19 -0700245 }
Andi Kleendda50e72005-05-16 21:53:25 -0700246 local_irq_restore(flags);
Andi Kleena8ab26f2005-04-16 15:25:19 -0700247}
248
Andi Kleendda50e72005-05-16 21:53:25 -0700249/*
250 * Return the number of cycles by which our tsc differs from the tsc
251 * on the master (time-keeper) CPU. A positive number indicates our
252 * tsc is ahead of the master, negative that it is behind.
253 */
254static inline long
255get_delta(long *rt, long *master)
256{
257 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
258 unsigned long tcenter, t0, t1, tm;
259 int i;
260
261 for (i = 0; i < NUM_ITERS; ++i) {
262 rdtscll(t0);
263 go[MASTER] = 1;
264 while (!(tm = go[SLAVE]))
265 no_cpu_relax();
266 go[SLAVE] = 0;
267 rdtscll(t1);
268
269 if (t1 - t0 < best_t1 - best_t0)
270 best_t0 = t0, best_t1 = t1, best_tm = tm;
271 }
272
273 *rt = best_t1 - best_t0;
274 *master = best_tm - best_t0;
275
276 /* average best_t0 and best_t1 without overflow: */
277 tcenter = (best_t0/2 + best_t1/2);
278 if (best_t0 % 2 + best_t1 % 2 == 2)
279 ++tcenter;
280 return tcenter - best_tm;
281}
282
283static __cpuinit void sync_tsc(void)
284{
285 int i, done = 0;
286 long delta, adj, adjust_latency = 0;
287 unsigned long flags, rt, master_time_stamp, bound;
288#if DEBUG_TSC_SYNC
289 static struct syncdebug {
290 long rt; /* roundtrip time */
291 long master; /* master's timestamp */
292 long diff; /* difference between midpoint and master's timestamp */
293 long lat; /* estimate of tsc adjustment latency */
294 } t[NUM_ROUNDS] __cpuinitdata;
295#endif
296
297 go[MASTER] = 1;
298
299 smp_call_function(sync_master, NULL, 1, 0);
300
301 while (go[MASTER]) /* wait for master to be ready */
302 no_cpu_relax();
303
304 spin_lock_irqsave(&tsc_sync_lock, flags);
305 {
306 for (i = 0; i < NUM_ROUNDS; ++i) {
307 delta = get_delta(&rt, &master_time_stamp);
308 if (delta == 0) {
309 done = 1; /* let's lock on to this... */
310 bound = rt;
311 }
312
313 if (!done) {
314 unsigned long t;
315 if (i > 0) {
316 adjust_latency += -delta;
317 adj = -delta + adjust_latency/4;
318 } else
319 adj = -delta;
320
321 rdtscll(t);
322 wrmsrl(MSR_IA32_TSC, t + adj);
323 }
324#if DEBUG_TSC_SYNC
325 t[i].rt = rt;
326 t[i].master = master_time_stamp;
327 t[i].diff = delta;
328 t[i].lat = adjust_latency/4;
329#endif
330 }
331 }
332 spin_unlock_irqrestore(&tsc_sync_lock, flags);
333
334#if DEBUG_TSC_SYNC
335 for (i = 0; i < NUM_ROUNDS; ++i)
336 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
337 t[i].rt, t[i].master, t[i].diff, t[i].lat);
338#endif
339
340 printk(KERN_INFO
341 "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
342 "maxerr %lu cycles)\n",
343 smp_processor_id(), boot_cpu_id, delta, rt);
344}
345
346static void __cpuinit tsc_sync_wait(void)
347{
348 if (notscsync || !cpu_has_tsc)
349 return;
350 printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(),
351 boot_cpu_id);
352 sync_tsc();
353}
354
355static __init int notscsync_setup(char *s)
356{
357 notscsync = 1;
358 return 0;
359}
360__setup("notscsync", notscsync_setup);
361
Andi Kleena8ab26f2005-04-16 15:25:19 -0700362static atomic_t init_deasserted __cpuinitdata;
363
364/*
365 * Report back to the Boot Processor.
366 * Running on AP.
367 */
368void __cpuinit smp_callin(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369{
370 int cpuid, phys_id;
371 unsigned long timeout;
372
373 /*
374 * If waken up by an INIT in an 82489DX configuration
375 * we may get here before an INIT-deassert IPI reaches
376 * our local APIC. We have to wait for the IPI or we'll
377 * lock up on an APIC access.
378 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700379 while (!atomic_read(&init_deasserted))
380 cpu_relax();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381
382 /*
383 * (This works even if the APIC is not enabled.)
384 */
385 phys_id = GET_APIC_ID(apic_read(APIC_ID));
386 cpuid = smp_processor_id();
387 if (cpu_isset(cpuid, cpu_callin_map)) {
388 panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
389 phys_id, cpuid);
390 }
391 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
392
393 /*
394 * STARTUP IPIs are fragile beasts as they might sometimes
395 * trigger some glue motherboard logic. Complete APIC bus
396 * silence for 1 second, this overestimates the time the
397 * boot CPU is spending to send the up to 2 STARTUP IPIs
398 * by a factor of two. This should be enough.
399 */
400
401 /*
402 * Waiting 2s total for startup (udelay is not yet working)
403 */
404 timeout = jiffies + 2*HZ;
405 while (time_before(jiffies, timeout)) {
406 /*
407 * Has the boot CPU finished it's STARTUP sequence?
408 */
409 if (cpu_isset(cpuid, cpu_callout_map))
410 break;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700411 cpu_relax();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 }
413
414 if (!time_before(jiffies, timeout)) {
415 panic("smp_callin: CPU%d started up but did not get a callout!\n",
416 cpuid);
417 }
418
419 /*
420 * the boot CPU has finished the init stage and is spinning
421 * on callin_map until we finish. We are free to set up this
422 * CPU, first the APIC. (this is probably redundant on most
423 * boards)
424 */
425
426 Dprintk("CALLIN, before setup_local_APIC().\n");
427 setup_local_APIC();
428
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 /*
430 * Get our bogomips.
431 */
432 calibrate_delay();
433 Dprintk("Stack at about %p\n",&cpuid);
434
435 disable_APIC_timer();
436
437 /*
438 * Save our processor parameters
439 */
440 smp_store_cpu_info(cpuid);
441
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 /*
443 * Allow the master to continue.
444 */
445 cpu_set(cpuid, cpu_callin_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446}
447
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448/*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700449 * Setup code on secondary processor (after comming out of the trampoline)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700451void __cpuinit start_secondary(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452{
453 /*
454 * Dont put anything before smp_callin(), SMP
455 * booting is too fragile that we want to limit the
456 * things done here to the most necessary things.
457 */
458 cpu_init();
459 smp_callin();
460
461 /* otherwise gcc will move up the smp_processor_id before the cpu_init */
462 barrier();
463
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464 Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());
465 setup_secondary_APIC_clock();
466
Andi Kleena8ab26f2005-04-16 15:25:19 -0700467 Dprintk("cpu %d: enabling apic timer\n", smp_processor_id());
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468
469 if (nmi_watchdog == NMI_IO_APIC) {
470 disable_8259A_irq(0);
471 enable_NMI_through_LVT0(NULL);
472 enable_8259A_irq(0);
473 }
474
Andi Kleena8ab26f2005-04-16 15:25:19 -0700475 enable_APIC_timer();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476
477 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700478 * Allow the master to continue.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700480 cpu_set(smp_processor_id(), cpu_online_map);
Andi Kleena8ab26f2005-04-16 15:25:19 -0700481 mb();
482
Andi Kleendda50e72005-05-16 21:53:25 -0700483 /* Wait for TSC sync to not schedule things before.
484 We still process interrupts, which could see an inconsistent
485 time in that window unfortunately. */
486 tsc_sync_wait();
487
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488 cpu_idle();
489}
490
Andi Kleena8ab26f2005-04-16 15:25:19 -0700491extern volatile unsigned long init_rsp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492extern void (*initial_code)(void);
493
494#if APIC_DEBUG
Andi Kleena8ab26f2005-04-16 15:25:19 -0700495static void inquire_remote_apic(int apicid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496{
497 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
498 char *names[] = { "ID", "VERSION", "SPIV" };
499 int timeout, status;
500
501 printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
502
503 for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
504 printk("... APIC #%d %s: ", apicid, names[i]);
505
506 /*
507 * Wait for idle.
508 */
509 apic_wait_icr_idle();
510
511 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
512 apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
513
514 timeout = 0;
515 do {
516 udelay(100);
517 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
518 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
519
520 switch (status) {
521 case APIC_ICR_RR_VALID:
522 status = apic_read(APIC_RRR);
523 printk("%08x\n", status);
524 break;
525 default:
526 printk("failed\n");
527 }
528 }
529}
530#endif
531
Andi Kleena8ab26f2005-04-16 15:25:19 -0700532/*
533 * Kick the secondary to wake up.
534 */
535static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536{
537 unsigned long send_status = 0, accept_status = 0;
538 int maxlvt, timeout, num_starts, j;
539
540 Dprintk("Asserting INIT.\n");
541
542 /*
543 * Turn INIT on target chip
544 */
545 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
546
547 /*
548 * Send IPI
549 */
550 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
551 | APIC_DM_INIT);
552
553 Dprintk("Waiting for send to finish...\n");
554 timeout = 0;
555 do {
556 Dprintk("+");
557 udelay(100);
558 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
559 } while (send_status && (timeout++ < 1000));
560
561 mdelay(10);
562
563 Dprintk("Deasserting INIT.\n");
564
565 /* Target chip */
566 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
567
568 /* Send IPI */
569 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
570
571 Dprintk("Waiting for send to finish...\n");
572 timeout = 0;
573 do {
574 Dprintk("+");
575 udelay(100);
576 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
577 } while (send_status && (timeout++ < 1000));
578
579 atomic_set(&init_deasserted, 1);
580
581 /*
582 * Should we send STARTUP IPIs ?
583 *
584 * Determine this based on the APIC version.
585 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
586 */
587 if (APIC_INTEGRATED(apic_version[phys_apicid]))
588 num_starts = 2;
589 else
590 num_starts = 0;
591
592 /*
593 * Run STARTUP IPI loop.
594 */
595 Dprintk("#startup loops: %d.\n", num_starts);
596
597 maxlvt = get_maxlvt();
598
599 for (j = 1; j <= num_starts; j++) {
600 Dprintk("Sending STARTUP #%d.\n",j);
601 apic_read_around(APIC_SPIV);
602 apic_write(APIC_ESR, 0);
603 apic_read(APIC_ESR);
604 Dprintk("After apic_write.\n");
605
606 /*
607 * STARTUP IPI
608 */
609
610 /* Target chip */
611 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
612
613 /* Boot on the stack */
614 /* Kick the second */
615 apic_write_around(APIC_ICR, APIC_DM_STARTUP
616 | (start_rip >> 12));
617
618 /*
619 * Give the other CPU some time to accept the IPI.
620 */
621 udelay(300);
622
623 Dprintk("Startup point 1.\n");
624
625 Dprintk("Waiting for send to finish...\n");
626 timeout = 0;
627 do {
628 Dprintk("+");
629 udelay(100);
630 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
631 } while (send_status && (timeout++ < 1000));
632
633 /*
634 * Give the other CPU some time to accept the IPI.
635 */
636 udelay(200);
637 /*
638 * Due to the Pentium erratum 3AP.
639 */
640 if (maxlvt > 3) {
641 apic_read_around(APIC_SPIV);
642 apic_write(APIC_ESR, 0);
643 }
644 accept_status = (apic_read(APIC_ESR) & 0xEF);
645 if (send_status || accept_status)
646 break;
647 }
648 Dprintk("After Startup.\n");
649
650 if (send_status)
651 printk(KERN_ERR "APIC never delivered???\n");
652 if (accept_status)
653 printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
654
655 return (send_status | accept_status);
656}
657
Ashok Raj76e4f662005-06-25 14:55:00 -0700658struct create_idle {
659 struct task_struct *idle;
660 struct completion done;
661 int cpu;
662};
663
664void do_fork_idle(void *_c_idle)
665{
666 struct create_idle *c_idle = _c_idle;
667
668 c_idle->idle = fork_idle(c_idle->cpu);
669 complete(&c_idle->done);
670}
671
Andi Kleena8ab26f2005-04-16 15:25:19 -0700672/*
673 * Boot one CPU.
674 */
675static int __cpuinit do_boot_cpu(int cpu, int apicid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677 unsigned long boot_error;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700678 int timeout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679 unsigned long start_rip;
Ashok Raj76e4f662005-06-25 14:55:00 -0700680 struct create_idle c_idle = {
681 .cpu = cpu,
682 .done = COMPLETION_INITIALIZER(c_idle.done),
683 };
684 DECLARE_WORK(work, do_fork_idle, &c_idle);
685
686 c_idle.idle = get_idle_for_cpu(cpu);
687
688 if (c_idle.idle) {
689 c_idle.idle->thread.rsp = (unsigned long) (((struct pt_regs *)
690 (THREAD_SIZE + (unsigned long) c_idle.idle->thread_info)) - 1);
691 init_idle(c_idle.idle, cpu);
692 goto do_rest;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700693 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694
Ashok Raj76e4f662005-06-25 14:55:00 -0700695 /*
696 * During cold boot process, keventd thread is not spun up yet.
697 * When we do cpu hot-add, we create idle threads on the fly, we should
698 * not acquire any attributes from the calling context. Hence the clean
699 * way to create kernel_threads() is to do that from keventd().
700 * We do the current_is_keventd() due to the fact that ACPI notifier
701 * was also queuing to keventd() and when the caller is already running
702 * in context of keventd(), we would end up with locking up the keventd
703 * thread.
704 */
705 if (!keventd_up() || current_is_keventd())
706 work.func(work.data);
707 else {
708 schedule_work(&work);
709 wait_for_completion(&c_idle.done);
710 }
711
712 if (IS_ERR(c_idle.idle)) {
713 printk("failed fork for CPU %d\n", cpu);
714 return PTR_ERR(c_idle.idle);
715 }
716
717 set_idle_for_cpu(cpu, c_idle.idle);
718
719do_rest:
720
721 cpu_pda[cpu].pcurrent = c_idle.idle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722
723 start_rip = setup_trampoline();
724
Ashok Raj76e4f662005-06-25 14:55:00 -0700725 init_rsp = c_idle.idle->thread.rsp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726 per_cpu(init_tss,cpu).rsp0 = init_rsp;
727 initial_code = start_secondary;
Ashok Raj76e4f662005-06-25 14:55:00 -0700728 clear_ti_thread_flag(c_idle.idle->thread_info, TIF_FORK);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729
Andi Kleena8ab26f2005-04-16 15:25:19 -0700730 printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 start_rip, init_rsp);
732
733 /*
734 * This grunge runs the startup process for
735 * the targeted processor.
736 */
737
738 atomic_set(&init_deasserted, 0);
739
740 Dprintk("Setting warm reset code and vector.\n");
741
742 CMOS_WRITE(0xa, 0xf);
743 local_flush_tlb();
744 Dprintk("1.\n");
745 *((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4;
746 Dprintk("2.\n");
747 *((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf;
748 Dprintk("3.\n");
749
750 /*
751 * Be paranoid about clearing APIC errors.
752 */
753 if (APIC_INTEGRATED(apic_version[apicid])) {
754 apic_read_around(APIC_SPIV);
755 apic_write(APIC_ESR, 0);
756 apic_read(APIC_ESR);
757 }
758
759 /*
760 * Status is now clean
761 */
762 boot_error = 0;
763
764 /*
765 * Starting actual IPI sequence...
766 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700767 boot_error = wakeup_secondary_via_INIT(apicid, start_rip);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768
769 if (!boot_error) {
770 /*
771 * allow APs to start initializing.
772 */
773 Dprintk("Before Callout %d.\n", cpu);
774 cpu_set(cpu, cpu_callout_map);
775 Dprintk("After Callout %d.\n", cpu);
776
777 /*
778 * Wait 5s total for a response
779 */
780 for (timeout = 0; timeout < 50000; timeout++) {
781 if (cpu_isset(cpu, cpu_callin_map))
782 break; /* It has booted */
783 udelay(100);
784 }
785
786 if (cpu_isset(cpu, cpu_callin_map)) {
787 /* number CPUs logically, starting from 1 (BSP is 0) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788 Dprintk("CPU has booted.\n");
789 } else {
790 boot_error = 1;
791 if (*((volatile unsigned char *)phys_to_virt(SMP_TRAMPOLINE_BASE))
792 == 0xA5)
793 /* trampoline started but...? */
794 printk("Stuck ??\n");
795 else
796 /* trampoline code not run */
797 printk("Not responding.\n");
798#if APIC_DEBUG
799 inquire_remote_apic(apicid);
800#endif
801 }
802 }
803 if (boot_error) {
804 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
805 clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700806 cpu_clear(cpu, cpu_present_map);
807 cpu_clear(cpu, cpu_possible_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 x86_cpu_to_apicid[cpu] = BAD_APICID;
809 x86_cpu_to_log_apicid[cpu] = BAD_APICID;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700810 return -EIO;
811 }
812
813 return 0;
814}
815
816cycles_t cacheflush_time;
817unsigned long cache_decay_ticks;
818
819/*
820 * Construct cpu_sibling_map[], so that we can tell the sibling CPU
821 * on SMT systems efficiently.
822 */
823static __cpuinit void detect_siblings(void)
824{
825 int cpu;
826
827 for (cpu = 0; cpu < NR_CPUS; cpu++) {
828 cpus_clear(cpu_sibling_map[cpu]);
829 cpus_clear(cpu_core_map[cpu]);
830 }
831
832 for_each_online_cpu (cpu) {
833 struct cpuinfo_x86 *c = cpu_data + cpu;
834 int siblings = 0;
835 int i;
836 if (smp_num_siblings > 1) {
837 for_each_online_cpu (i) {
Siddha, Suresh Bd31ddaa2005-04-16 15:25:20 -0700838 if (cpu_core_id[cpu] == cpu_core_id[i]) {
Andi Kleena8ab26f2005-04-16 15:25:19 -0700839 siblings++;
840 cpu_set(i, cpu_sibling_map[cpu]);
841 }
842 }
843 } else {
844 siblings++;
845 cpu_set(cpu, cpu_sibling_map[cpu]);
846 }
847
848 if (siblings != smp_num_siblings) {
849 printk(KERN_WARNING
850 "WARNING: %d siblings found for CPU%d, should be %d\n",
851 siblings, cpu, smp_num_siblings);
852 smp_num_siblings = siblings;
853 }
854 if (c->x86_num_cores > 1) {
855 for_each_online_cpu(i) {
856 if (phys_proc_id[cpu] == phys_proc_id[i])
857 cpu_set(i, cpu_core_map[cpu]);
858 }
859 } else
860 cpu_core_map[cpu] = cpu_sibling_map[cpu];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861 }
862}
863
Andi Kleena8ab26f2005-04-16 15:25:19 -0700864/*
865 * Cleanup possible dangling ends...
866 */
867static __cpuinit void smp_cleanup_boot(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700870 * Paranoid: Set warm reset code and vector here back
871 * to default values.
872 */
873 CMOS_WRITE(0, 0xf);
874
875 /*
876 * Reset trampoline flag
877 */
878 *((volatile int *) phys_to_virt(0x467)) = 0;
879
880#ifndef CONFIG_HOTPLUG_CPU
881 /*
882 * Free pages reserved for SMP bootup.
883 * When you add hotplug CPU support later remove this
884 * Note there is more work to be done for later CPU bootup.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885 */
886
Andi Kleena8ab26f2005-04-16 15:25:19 -0700887 free_page((unsigned long) __va(PAGE_SIZE));
888 free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE));
889#endif
890}
891
892/*
893 * Fall back to non SMP mode after errors.
894 *
895 * RED-PEN audit/test this more. I bet there is more state messed up here.
896 */
Ashok Raje6982c62005-06-25 14:54:58 -0700897static __init void disable_smp(void)
Andi Kleena8ab26f2005-04-16 15:25:19 -0700898{
899 cpu_present_map = cpumask_of_cpu(0);
900 cpu_possible_map = cpumask_of_cpu(0);
901 if (smp_found_config)
902 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
903 else
904 phys_cpu_present_map = physid_mask_of_physid(0);
905 cpu_set(0, cpu_sibling_map[0]);
906 cpu_set(0, cpu_core_map[0]);
907}
908
909/*
910 * Handle user cpus=... parameter.
911 */
Ashok Raje6982c62005-06-25 14:54:58 -0700912static __init void enforce_max_cpus(unsigned max_cpus)
Andi Kleena8ab26f2005-04-16 15:25:19 -0700913{
914 int i, k;
915 k = 0;
916 for (i = 0; i < NR_CPUS; i++) {
917 if (!cpu_possible(i))
918 continue;
919 if (++k > max_cpus) {
920 cpu_clear(i, cpu_possible_map);
921 cpu_clear(i, cpu_present_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922 }
923 }
924}
925
926/*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700927 * Various sanity checks.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928 */
Ashok Raje6982c62005-06-25 14:54:58 -0700929static int __init smp_sanity_check(unsigned max_cpus)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
932 printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
933 hard_smp_processor_id());
934 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
935 }
936
937 /*
938 * If we couldn't find an SMP configuration at boot time,
939 * get out of here now!
940 */
941 if (!smp_found_config) {
942 printk(KERN_NOTICE "SMP motherboard not detected.\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -0700943 disable_smp();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 if (APIC_init_uniprocessor())
945 printk(KERN_NOTICE "Local APIC not detected."
946 " Using dummy APIC emulation.\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -0700947 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948 }
949
950 /*
951 * Should not be necessary because the MP table should list the boot
952 * CPU too, but we do it for the sake of robustness anyway.
953 */
954 if (!physid_isset(boot_cpu_id, phys_cpu_present_map)) {
955 printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n",
956 boot_cpu_id);
957 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
958 }
959
960 /*
961 * If we couldn't find a local APIC, then get out of here now!
962 */
963 if (APIC_INTEGRATED(apic_version[boot_cpu_id]) && !cpu_has_apic) {
964 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
965 boot_cpu_id);
966 printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -0700967 nr_ioapics = 0;
968 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969 }
970
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 /*
972 * If SMP should be disabled, then really disable it!
973 */
974 if (!max_cpus) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -0700976 nr_ioapics = 0;
977 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 }
979
Andi Kleena8ab26f2005-04-16 15:25:19 -0700980 return 0;
981}
982
983/*
984 * Prepare for SMP bootup. The MP table or ACPI has been read
985 * earlier. Just do some sanity checking here and enable APIC mode.
986 */
Ashok Raje6982c62005-06-25 14:54:58 -0700987void __init smp_prepare_cpus(unsigned int max_cpus)
Andi Kleena8ab26f2005-04-16 15:25:19 -0700988{
989 int i;
990
991 nmi_watchdog_default();
992 current_cpu_data = boot_cpu_data;
993 current_thread_info()->cpu = 0; /* needed? */
994
995 enforce_max_cpus(max_cpus);
996
997 /*
998 * Fill in cpu_present_mask
999 */
1000 for (i = 0; i < NR_CPUS; i++) {
1001 int apicid = cpu_present_to_apicid(i);
1002 if (physid_isset(apicid, phys_cpu_present_map)) {
1003 cpu_set(i, cpu_present_map);
Andi Kleena8ab26f2005-04-16 15:25:19 -07001004 cpu_set(i, cpu_possible_map);
1005 }
Ashok Raj76e4f662005-06-25 14:55:00 -07001006 fixup_cpu_possible_map(i);
Andi Kleena8ab26f2005-04-16 15:25:19 -07001007 }
1008
1009 if (smp_sanity_check(max_cpus) < 0) {
1010 printk(KERN_INFO "SMP disabled\n");
1011 disable_smp();
1012 return;
1013 }
1014
1015
1016 /*
1017 * Switch from PIC to APIC mode.
1018 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 connect_bsp_APIC();
1020 setup_local_APIC();
1021
Andi Kleena8ab26f2005-04-16 15:25:19 -07001022 if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
1023 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
1024 GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
1025 /* Or can we switch back to PIC here? */
1026 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027
1028 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -07001029 * Now start the IO-APICs
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030 */
1031 if (!skip_ioapic_setup && nr_ioapics)
1032 setup_IO_APIC();
1033 else
1034 nr_ioapics = 0;
1035
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -07001037 * Set up local APIC timer on boot CPU.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001038 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039
Andi Kleena8ab26f2005-04-16 15:25:19 -07001040 setup_boot_APIC_clock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041}
1042
Andi Kleena8ab26f2005-04-16 15:25:19 -07001043/*
1044 * Early setup to make printk work.
1045 */
1046void __init smp_prepare_boot_cpu(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047{
Andi Kleena8ab26f2005-04-16 15:25:19 -07001048 int me = smp_processor_id();
1049 cpu_set(me, cpu_online_map);
1050 cpu_set(me, cpu_callout_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051}
1052
Andi Kleena8ab26f2005-04-16 15:25:19 -07001053/*
1054 * Entry point to boot a CPU.
Andi Kleena8ab26f2005-04-16 15:25:19 -07001055 */
1056int __cpuinit __cpu_up(unsigned int cpu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057{
Andi Kleena8ab26f2005-04-16 15:25:19 -07001058 int err;
1059 int apicid = cpu_present_to_apicid(cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060
Andi Kleena8ab26f2005-04-16 15:25:19 -07001061 WARN_ON(irqs_disabled());
1062
1063 Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu);
1064
1065 if (apicid == BAD_APICID || apicid == boot_cpu_id ||
1066 !physid_isset(apicid, phys_cpu_present_map)) {
1067 printk("__cpu_up: bad cpu %d\n", cpu);
1068 return -EINVAL;
1069 }
Andi Kleena8ab26f2005-04-16 15:25:19 -07001070
Ashok Raj76e4f662005-06-25 14:55:00 -07001071 /*
1072 * Already booted CPU?
1073 */
1074 if (cpu_isset(cpu, cpu_callin_map)) {
1075 Dprintk("do_boot_cpu %d Already started\n", cpu);
1076 return -ENOSYS;
1077 }
1078
Andi Kleena8ab26f2005-04-16 15:25:19 -07001079 /* Boot it! */
1080 err = do_boot_cpu(cpu, apicid);
1081 if (err < 0) {
Andi Kleena8ab26f2005-04-16 15:25:19 -07001082 Dprintk("do_boot_cpu failed %d\n", err);
1083 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084 }
1085
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086 /* Unleash the CPU! */
1087 Dprintk("waiting for cpu %d\n", cpu);
1088
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089 while (!cpu_isset(cpu, cpu_online_map))
Andi Kleena8ab26f2005-04-16 15:25:19 -07001090 cpu_relax();
Ashok Raj76e4f662005-06-25 14:55:00 -07001091 err = 0;
1092
1093 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094}
1095
Andi Kleena8ab26f2005-04-16 15:25:19 -07001096/*
1097 * Finish the SMP boot.
1098 */
Ashok Raje6982c62005-06-25 14:54:58 -07001099void __init smp_cpus_done(unsigned int max_cpus)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100{
Ashok Raj76e4f662005-06-25 14:55:00 -07001101#ifndef CONFIG_HOTPLUG_CPU
Andi Kleena8ab26f2005-04-16 15:25:19 -07001102 zap_low_mappings();
Ashok Raj76e4f662005-06-25 14:55:00 -07001103#endif
Andi Kleena8ab26f2005-04-16 15:25:19 -07001104 smp_cleanup_boot();
1105
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106#ifdef CONFIG_X86_IO_APIC
1107 setup_ioapic_dest();
1108#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109
Andi Kleena8ab26f2005-04-16 15:25:19 -07001110 detect_siblings();
1111 time_init_gtod();
Andi Kleen75152112005-05-16 21:53:34 -07001112
1113 check_nmi_watchdog();
Andi Kleena8ab26f2005-04-16 15:25:19 -07001114}
Ashok Raj76e4f662005-06-25 14:55:00 -07001115
1116#ifdef CONFIG_HOTPLUG_CPU
1117
1118static void
1119remove_siblinginfo(int cpu)
1120{
1121 int sibling;
1122
1123 for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
1124 cpu_clear(cpu, cpu_sibling_map[sibling]);
1125 for_each_cpu_mask(sibling, cpu_core_map[cpu])
1126 cpu_clear(cpu, cpu_core_map[sibling]);
1127 cpus_clear(cpu_sibling_map[cpu]);
1128 cpus_clear(cpu_core_map[cpu]);
1129 phys_proc_id[cpu] = BAD_APICID;
1130 cpu_core_id[cpu] = BAD_APICID;
1131}
1132
1133void remove_cpu_from_maps(void)
1134{
1135 int cpu = smp_processor_id();
1136
1137 cpu_clear(cpu, cpu_callout_map);
1138 cpu_clear(cpu, cpu_callin_map);
1139 clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
1140}
1141
1142int __cpu_disable(void)
1143{
1144 int cpu = smp_processor_id();
1145
1146 /*
1147 * Perhaps use cpufreq to drop frequency, but that could go
1148 * into generic code.
1149 *
1150 * We won't take down the boot processor on i386 due to some
1151 * interrupts only being able to be serviced by the BSP.
1152 * Especially so if we're not using an IOAPIC -zwane
1153 */
1154 if (cpu == 0)
1155 return -EBUSY;
1156
1157 disable_APIC_timer();
1158
1159 /*
1160 * HACK:
1161 * Allow any queued timer interrupts to get serviced
1162 * This is only a temporary solution until we cleanup
1163 * fixup_irqs as we do for IA64.
1164 */
1165 local_irq_enable();
1166 mdelay(1);
1167
1168 local_irq_disable();
1169 remove_siblinginfo(cpu);
1170
1171 /* It's now safe to remove this processor from the online map */
1172 cpu_clear(cpu, cpu_online_map);
1173 remove_cpu_from_maps();
1174 fixup_irqs(cpu_online_map);
1175 return 0;
1176}
1177
1178void __cpu_die(unsigned int cpu)
1179{
1180 /* We don't do anything here: idle task is faking death itself. */
1181 unsigned int i;
1182
1183 for (i = 0; i < 10; i++) {
1184 /* They ack this in play_dead by setting CPU_DEAD */
1185 if (per_cpu(cpu_state, cpu) == CPU_DEAD)
1186 return;
1187 current->state = TASK_UNINTERRUPTIBLE;
1188 schedule_timeout(HZ/10);
1189 }
1190 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1191}
1192
1193#else /* ... !CONFIG_HOTPLUG_CPU */
1194
1195int __cpu_disable(void)
1196{
1197 return -ENOSYS;
1198}
1199
1200void __cpu_die(unsigned int cpu)
1201{
1202 /* We said "no" in __cpu_disable */
1203 BUG();
1204}
1205#endif /* CONFIG_HOTPLUG_CPU */