blob: c5330f601b68b740d00eef34f0e615cd3f34790c [file] [log] [blame]
Glauber de Oliveira Costa4fe29a82008-03-19 14:25:23 -03001#include <linux/kernel.h>
2#include <linux/module.h>
3#include <linux/init.h>
4#include <linux/bootmem.h>
5#include <linux/percpu.h>
Bernhard Walle1ecd2762008-06-20 15:38:22 +02006#include <linux/kexec.h>
Glauber de Oliveira Costa4fe29a82008-03-19 14:25:23 -03007#include <asm/smp.h>
8#include <asm/percpu.h>
9#include <asm/sections.h>
10#include <asm/processor.h>
11#include <asm/setup.h>
12#include <asm/topology.h>
Alexey Starikovskiy0fc09062008-04-04 23:40:48 +040013#include <asm/mpspec.h>
Alexey Starikovskiy76eb4132008-04-04 23:40:41 +040014#include <asm/apicdef.h>
Bernhard Walle1ecd2762008-06-20 15:38:22 +020015#include <asm/highmem.h>
Alexey Starikovskiy76eb4132008-04-04 23:40:41 +040016
James Bottomleyf8955eb2008-05-10 09:01:48 -050017#ifdef CONFIG_X86_LOCAL_APIC
Alexey Starikovskiy2fe60142008-04-04 23:41:44 +040018unsigned int num_processors;
19unsigned disabled_cpus __cpuinitdata;
20/* Processor that is doing the boot up */
21unsigned int boot_cpu_physical_apicid = -1U;
Yinghai Lue0da3362008-06-08 18:29:22 -070022unsigned int max_physical_apicid;
Alexey Starikovskiy2fe60142008-04-04 23:41:44 +040023EXPORT_SYMBOL(boot_cpu_physical_apicid);
24
Alexey Starikovskiy0fc09062008-04-04 23:40:48 +040025/* Bitmask of physically existing CPUs */
26physid_mask_t phys_cpu_present_map;
James Bottomleyf8955eb2008-05-10 09:01:48 -050027#endif
Alexey Starikovskiy0fc09062008-04-04 23:40:48 +040028
Mike Travis23ca4bb2008-05-12 21:21:12 +020029/* map cpu index to physical APIC ID */
30DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
31DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
32EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
33EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
34
35#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
36#define X86_64_NUMA 1
37
Mike Travis7891a242008-05-12 21:21:12 +020038/* map cpu index to node index */
Mike Travis23ca4bb2008-05-12 21:21:12 +020039DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
40EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
Mike Travis9f248bd2008-05-12 21:21:12 +020041
42/* which logical CPUs are on which nodes */
43cpumask_t *node_to_cpumask_map;
44EXPORT_SYMBOL(node_to_cpumask_map);
45
46/* setup node_to_cpumask_map */
47static void __init setup_node_to_cpumask_map(void);
48
49#else
50static inline void setup_node_to_cpumask_map(void) { }
Mike Travis23ca4bb2008-05-12 21:21:12 +020051#endif
52
James Bottomleyf8955eb2008-05-10 09:01:48 -050053#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
Glauber de Oliveira Costa4fe29a82008-03-19 14:25:23 -030054/*
55 * Copy data used in early init routines from the initial arrays to the
56 * per cpu data areas. These arrays then become expendable and the
57 * *_early_ptr's are zeroed indicating that the static arrays are gone.
58 */
59static void __init setup_per_cpu_maps(void)
60{
61 int cpu;
62
63 for_each_possible_cpu(cpu) {
Mike Travis23ca4bb2008-05-12 21:21:12 +020064 per_cpu(x86_cpu_to_apicid, cpu) =
65 early_per_cpu_map(x86_cpu_to_apicid, cpu);
Mike Travisb447a462008-03-25 15:06:51 -070066 per_cpu(x86_bios_cpu_apicid, cpu) =
Mike Travis23ca4bb2008-05-12 21:21:12 +020067 early_per_cpu_map(x86_bios_cpu_apicid, cpu);
68#ifdef X86_64_NUMA
Mike Travisb447a462008-03-25 15:06:51 -070069 per_cpu(x86_cpu_to_node_map, cpu) =
Mike Travis23ca4bb2008-05-12 21:21:12 +020070 early_per_cpu_map(x86_cpu_to_node_map, cpu);
Glauber de Oliveira Costa4fe29a82008-03-19 14:25:23 -030071#endif
Glauber de Oliveira Costa4fe29a82008-03-19 14:25:23 -030072 }
73
74 /* indicate the early static arrays will soon be gone */
Mike Travis23ca4bb2008-05-12 21:21:12 +020075 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
76 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
77#ifdef X86_64_NUMA
78 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
Glauber de Oliveira Costa4fe29a82008-03-19 14:25:23 -030079#endif
80}
81
Mike Travis9f0e8d02008-04-04 18:11:01 -070082#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
83cpumask_t *cpumask_of_cpu_map __read_mostly;
84EXPORT_SYMBOL(cpumask_of_cpu_map);
85
86/* requires nr_cpu_ids to be initialized */
87static void __init setup_cpumask_of_cpu(void)
88{
89 int i;
90
91 /* alloc_bootmem zeroes memory */
92 cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
93 for (i = 0; i < nr_cpu_ids; i++)
94 cpu_set(i, cpumask_of_cpu_map[i]);
95}
96#else
97static inline void setup_cpumask_of_cpu(void) { }
98#endif
99
Glauber de Oliveira Costa4fe29a82008-03-19 14:25:23 -0300100#ifdef CONFIG_X86_32
101/*
102 * Great future not-so-futuristic plan: make i386 and x86_64 do it
103 * the same way
104 */
105unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
106EXPORT_SYMBOL(__per_cpu_offset);
Mike Travis3461b0a2008-05-12 21:21:13 +0200107static inline void setup_cpu_pda_map(void) { }
108
109#elif !defined(CONFIG_SMP)
110static inline void setup_cpu_pda_map(void) { }
111
112#else /* CONFIG_SMP && CONFIG_X86_64 */
113
114/*
115 * Allocate cpu_pda pointer table and array via alloc_bootmem.
116 */
117static void __init setup_cpu_pda_map(void)
118{
119 char *pda;
120 struct x8664_pda **new_cpu_pda;
121 unsigned long size;
122 int cpu;
123
124 size = roundup(sizeof(struct x8664_pda), cache_line_size());
125
126 /* allocate cpu_pda array and pointer table */
127 {
128 unsigned long tsize = nr_cpu_ids * sizeof(void *);
129 unsigned long asize = size * (nr_cpu_ids - 1);
130
131 tsize = roundup(tsize, cache_line_size());
132 new_cpu_pda = alloc_bootmem(tsize + asize);
133 pda = (char *)new_cpu_pda + tsize;
134 }
135
136 /* initialize pointer table to static pda's */
137 for_each_possible_cpu(cpu) {
138 if (cpu == 0) {
139 /* leave boot cpu pda in place */
140 new_cpu_pda[0] = cpu_pda(0);
141 continue;
142 }
143 new_cpu_pda[cpu] = (struct x8664_pda *)pda;
144 new_cpu_pda[cpu]->in_bootmem = 1;
145 pda += size;
146 }
147
148 /* point to new pointer table */
149 _cpu_pda = new_cpu_pda;
150}
Glauber de Oliveira Costa4fe29a82008-03-19 14:25:23 -0300151#endif
152
153/*
154 * Great future plan:
155 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
156 * Always point %gs to its beginning
157 */
158void __init setup_per_cpu_areas(void)
159{
Mike Travis3461b0a2008-05-12 21:21:13 +0200160 ssize_t size = PERCPU_ENOUGH_ROOM;
161 char *ptr;
162 int cpu;
Glauber de Oliveira Costa4fe29a82008-03-19 14:25:23 -0300163
164#ifdef CONFIG_HOTPLUG_CPU
165 prefill_possible_map();
Mike Travis3461b0a2008-05-12 21:21:13 +0200166#else
167 nr_cpu_ids = num_processors;
Glauber de Oliveira Costa4fe29a82008-03-19 14:25:23 -0300168#endif
169
Mike Travis3461b0a2008-05-12 21:21:13 +0200170 /* Setup cpu_pda map */
171 setup_cpu_pda_map();
172
Glauber de Oliveira Costa4fe29a82008-03-19 14:25:23 -0300173 /* Copy section for each CPU (we discard the original) */
174 size = PERCPU_ENOUGH_ROOM;
Randy Dunlap053713f2008-06-05 11:10:59 -0700175 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
Glauber de Oliveira Costa4fe29a82008-03-19 14:25:23 -0300176 size);
Mike Travisb447a462008-03-25 15:06:51 -0700177
Mike Travis3461b0a2008-05-12 21:21:13 +0200178 for_each_possible_cpu(cpu) {
Glauber de Oliveira Costa4fe29a82008-03-19 14:25:23 -0300179#ifndef CONFIG_NEED_MULTIPLE_NODES
180 ptr = alloc_bootmem_pages(size);
181#else
Mike Travis3461b0a2008-05-12 21:21:13 +0200182 int node = early_cpu_to_node(cpu);
Mike Travisb447a462008-03-25 15:06:51 -0700183 if (!node_online(node) || !NODE_DATA(node)) {
Glauber de Oliveira Costa4fe29a82008-03-19 14:25:23 -0300184 ptr = alloc_bootmem_pages(size);
Mike Travisb447a462008-03-25 15:06:51 -0700185 printk(KERN_INFO
Mike Travis23ca4bb2008-05-12 21:21:12 +0200186 "cpu %d has no node %d or node-local memory\n",
Mike Travis3461b0a2008-05-12 21:21:13 +0200187 cpu, node);
Mike Travisb447a462008-03-25 15:06:51 -0700188 }
Glauber de Oliveira Costa4fe29a82008-03-19 14:25:23 -0300189 else
190 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
191#endif
Mike Travis3461b0a2008-05-12 21:21:13 +0200192 per_cpu_offset(cpu) = ptr - __per_cpu_start;
Glauber de Oliveira Costa4fe29a82008-03-19 14:25:23 -0300193 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
Mike Travis9f0e8d02008-04-04 18:11:01 -0700194
Glauber de Oliveira Costa4fe29a82008-03-19 14:25:23 -0300195 }
196
Mike Travis9f248bd2008-05-12 21:21:12 +0200197 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
198 NR_CPUS, nr_cpu_ids, nr_node_ids);
Mike Travis9f0e8d02008-04-04 18:11:01 -0700199
Mike Travisb447a462008-03-25 15:06:51 -0700200 /* Setup percpu data maps */
Glauber de Oliveira Costa4fe29a82008-03-19 14:25:23 -0300201 setup_per_cpu_maps();
Mike Travis9f0e8d02008-04-04 18:11:01 -0700202
Mike Travis9f248bd2008-05-12 21:21:12 +0200203 /* Setup node to cpumask map */
204 setup_node_to_cpumask_map();
205
Mike Travis9f0e8d02008-04-04 18:11:01 -0700206 /* Setup cpumask_of_cpu map */
207 setup_cpumask_of_cpu();
Glauber de Oliveira Costa4fe29a82008-03-19 14:25:23 -0300208}
209
210#endif
Huang, Yingc45a7072008-06-02 14:26:25 +0800211
212void __init parse_setup_data(void)
213{
214 struct setup_data *data;
215 u64 pa_data;
216
217 if (boot_params.hdr.version < 0x0209)
218 return;
219 pa_data = boot_params.hdr.setup_data;
220 while (pa_data) {
221 data = early_ioremap(pa_data, PAGE_SIZE);
222 switch (data->type) {
Huang, Ying8c5beb52008-06-11 11:33:39 +0800223 case SETUP_E820_EXT:
224 parse_e820_ext(data, pa_data);
225 break;
Huang, Yingc45a7072008-06-02 14:26:25 +0800226 default:
227 break;
228 }
229#ifndef CONFIG_DEBUG_BOOT_PARAMS
230 free_early(pa_data, pa_data+sizeof(*data)+data->len);
231#endif
232 pa_data = data->next;
233 early_iounmap(data, PAGE_SIZE);
234 }
235}
Ingo Molnar2b4fa852008-07-08 11:59:23 +0200236
Mike Travis23ca4bb2008-05-12 21:21:12 +0200237#ifdef X86_64_NUMA
Mike Travis9f248bd2008-05-12 21:21:12 +0200238
239/*
240 * Allocate node_to_cpumask_map based on number of available nodes
241 * Requires node_possible_map to be valid.
242 *
243 * Note: node_to_cpumask() is not valid until after this is done.
244 */
245static void __init setup_node_to_cpumask_map(void)
246{
247 unsigned int node, num = 0;
248 cpumask_t *map;
249
250 /* setup nr_node_ids if not done yet */
251 if (nr_node_ids == MAX_NUMNODES) {
252 for_each_node_mask(node, node_possible_map)
253 num = node;
254 nr_node_ids = num + 1;
255 }
256
257 /* allocate the map */
258 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
259
260 Dprintk(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n",
261 map, nr_node_ids);
262
263 /* node_to_cpumask() will now work */
264 node_to_cpumask_map = map;
265}
266
Mike Travis23ca4bb2008-05-12 21:21:12 +0200267void __cpuinit numa_set_node(int cpu, int node)
268{
269 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
270
Mike Travis3461b0a2008-05-12 21:21:13 +0200271 if (cpu_pda(cpu) && node != NUMA_NO_NODE)
Mike Travis7891a242008-05-12 21:21:12 +0200272 cpu_pda(cpu)->nodenumber = node;
273
Mike Travis23ca4bb2008-05-12 21:21:12 +0200274 if (cpu_to_node_map)
275 cpu_to_node_map[cpu] = node;
276
277 else if (per_cpu_offset(cpu))
278 per_cpu(x86_cpu_to_node_map, cpu) = node;
279
280 else
281 Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu);
282}
283
284void __cpuinit numa_clear_node(int cpu)
285{
286 numa_set_node(cpu, NUMA_NO_NODE);
287}
288
Mike Travis9f248bd2008-05-12 21:21:12 +0200289#ifndef CONFIG_DEBUG_PER_CPU_MAPS
290
Mike Travis23ca4bb2008-05-12 21:21:12 +0200291void __cpuinit numa_add_cpu(int cpu)
292{
293 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
294}
295
296void __cpuinit numa_remove_cpu(int cpu)
297{
298 cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]);
299}
Mike Travis23ca4bb2008-05-12 21:21:12 +0200300
Mike Travis9f248bd2008-05-12 21:21:12 +0200301#else /* CONFIG_DEBUG_PER_CPU_MAPS */
302
303/*
304 * --------- debug versions of the numa functions ---------
305 */
306static void __cpuinit numa_set_cpumask(int cpu, int enable)
307{
308 int node = cpu_to_node(cpu);
309 cpumask_t *mask;
310 char buf[64];
311
312 if (node_to_cpumask_map == NULL) {
313 printk(KERN_ERR "node_to_cpumask_map NULL\n");
314 dump_stack();
315 return;
316 }
317
318 mask = &node_to_cpumask_map[node];
319 if (enable)
320 cpu_set(cpu, *mask);
321 else
322 cpu_clear(cpu, *mask);
323
324 cpulist_scnprintf(buf, sizeof(buf), *mask);
325 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
326 enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf);
327 }
328
329void __cpuinit numa_add_cpu(int cpu)
330{
331 numa_set_cpumask(cpu, 1);
332}
333
334void __cpuinit numa_remove_cpu(int cpu)
335{
336 numa_set_cpumask(cpu, 0);
337}
Mike Travis23ca4bb2008-05-12 21:21:12 +0200338
339int cpu_to_node(int cpu)
340{
341 if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
342 printk(KERN_WARNING
343 "cpu_to_node(%d): usage too early!\n", cpu);
344 dump_stack();
345 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
346 }
347 return per_cpu(x86_cpu_to_node_map, cpu);
348}
349EXPORT_SYMBOL(cpu_to_node);
350
Mike Travis9f248bd2008-05-12 21:21:12 +0200351/*
352 * Same function as cpu_to_node() but used if called before the
353 * per_cpu areas are setup.
354 */
Mike Travis23ca4bb2008-05-12 21:21:12 +0200355int early_cpu_to_node(int cpu)
356{
357 if (early_per_cpu_ptr(x86_cpu_to_node_map))
358 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
359
360 if (!per_cpu_offset(cpu)) {
361 printk(KERN_WARNING
362 "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
Mike Travis9f248bd2008-05-12 21:21:12 +0200363 dump_stack();
Mike Travis23ca4bb2008-05-12 21:21:12 +0200364 return NUMA_NO_NODE;
365 }
366 return per_cpu(x86_cpu_to_node_map, cpu);
367}
Mike Travis9f248bd2008-05-12 21:21:12 +0200368
369/*
370 * Returns a pointer to the bitmask of CPUs on Node 'node'.
371 */
372cpumask_t *_node_to_cpumask_ptr(int node)
373{
374 if (node_to_cpumask_map == NULL) {
375 printk(KERN_WARNING
376 "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n",
377 node);
378 dump_stack();
379 return &cpu_online_map;
380 }
Vegard Nossum03db1f72008-06-06 16:33:25 +0200381 BUG_ON(node >= nr_node_ids);
Mike Travis9f248bd2008-05-12 21:21:12 +0200382 return &node_to_cpumask_map[node];
383}
384EXPORT_SYMBOL(_node_to_cpumask_ptr);
385
386/*
387 * Returns a bitmask of CPUs on Node 'node'.
388 */
389cpumask_t node_to_cpumask(int node)
390{
391 if (node_to_cpumask_map == NULL) {
392 printk(KERN_WARNING
393 "node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
394 dump_stack();
395 return cpu_online_map;
396 }
Vegard Nossum03db1f72008-06-06 16:33:25 +0200397 BUG_ON(node >= nr_node_ids);
Mike Travis9f248bd2008-05-12 21:21:12 +0200398 return node_to_cpumask_map[node];
399}
400EXPORT_SYMBOL(node_to_cpumask);
401
402/*
403 * --------- end of debug versions of the numa functions ---------
404 */
405
406#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
407
408#endif /* X86_64_NUMA */
Bernhard Walle1ecd2762008-06-20 15:38:22 +0200409
410
411/*
412 * --------- Crashkernel reservation ------------------------------
413 */
414
415static inline unsigned long long get_total_mem(void)
416{
417 unsigned long long total;
418
419 total = max_low_pfn - min_low_pfn;
420#ifdef CONFIG_HIGHMEM
421 total += highend_pfn - highstart_pfn;
422#endif
423
424 return total << PAGE_SHIFT;
425}
426
427#ifdef CONFIG_KEXEC
428void __init reserve_crashkernel(void)
429{
430 unsigned long long total_mem;
431 unsigned long long crash_size, crash_base;
432 int ret;
433
434 total_mem = get_total_mem();
435
436 ret = parse_crashkernel(boot_command_line, total_mem,
437 &crash_size, &crash_base);
438 if (ret == 0 && crash_size > 0) {
439 if (crash_base <= 0) {
440 printk(KERN_INFO "crashkernel reservation failed - "
441 "you have to specify a base address\n");
442 return;
443 }
444
445 if (reserve_bootmem_generic(crash_base, crash_size,
446 BOOTMEM_EXCLUSIVE) < 0) {
447 printk(KERN_INFO "crashkernel reservation failed - "
448 "memory is in use\n");
449 return;
450 }
451
452 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
453 "for crashkernel (System RAM: %ldMB)\n",
454 (unsigned long)(crash_size >> 20),
455 (unsigned long)(crash_base >> 20),
456 (unsigned long)(total_mem >> 20));
457
458 crashk_res.start = crash_base;
459 crashk_res.end = crash_base + crash_size - 1;
460 insert_resource(&iomem_resource, &crashk_res);
461 }
462}
463#else
464void __init reserve_crashkernel(void)
465{}
466#endif
467