Glauber de Oliveira Costa | 4fe29a8 | 2008-03-19 14:25:23 -0300 | [diff] [blame] | 1 | #include <linux/kernel.h> |
| 2 | #include <linux/module.h> |
| 3 | #include <linux/init.h> |
| 4 | #include <linux/bootmem.h> |
| 5 | #include <linux/percpu.h> |
| 6 | #include <asm/smp.h> |
| 7 | #include <asm/percpu.h> |
| 8 | #include <asm/sections.h> |
| 9 | #include <asm/processor.h> |
| 10 | #include <asm/setup.h> |
| 11 | #include <asm/topology.h> |
Alexey Starikovskiy | 0fc0906 | 2008-04-04 23:40:48 +0400 | [diff] [blame] | 12 | #include <asm/mpspec.h> |
Alexey Starikovskiy | 76eb413 | 2008-04-04 23:40:41 +0400 | [diff] [blame] | 13 | #include <asm/apicdef.h> |
| 14 | |
James Bottomley | f8955eb | 2008-05-10 09:01:48 -0500 | [diff] [blame] | 15 | #ifdef CONFIG_X86_LOCAL_APIC |
Alexey Starikovskiy | 2fe6014 | 2008-04-04 23:41:44 +0400 | [diff] [blame] | 16 | unsigned int num_processors; |
| 17 | unsigned disabled_cpus __cpuinitdata; |
| 18 | /* Processor that is doing the boot up */ |
| 19 | unsigned int boot_cpu_physical_apicid = -1U; |
| 20 | EXPORT_SYMBOL(boot_cpu_physical_apicid); |
| 21 | |
Alexey Starikovskiy | 0fc0906 | 2008-04-04 23:40:48 +0400 | [diff] [blame] | 22 | /* Bitmask of physically existing CPUs */ |
| 23 | physid_mask_t phys_cpu_present_map; |
James Bottomley | f8955eb | 2008-05-10 09:01:48 -0500 | [diff] [blame] | 24 | #endif |
Alexey Starikovskiy | 0fc0906 | 2008-04-04 23:40:48 +0400 | [diff] [blame] | 25 | |
Mike Travis | 23ca4bb | 2008-05-12 21:21:12 +0200 | [diff] [blame] | 26 | /* map cpu index to physical APIC ID */ |
| 27 | DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); |
| 28 | DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); |
| 29 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); |
| 30 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); |
| 31 | |
| 32 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) |
| 33 | #define X86_64_NUMA 1 |
| 34 | |
Mike Travis | 7891a24 | 2008-05-12 21:21:12 +0200 | [diff] [blame] | 35 | /* map cpu index to node index */ |
Mike Travis | 23ca4bb | 2008-05-12 21:21:12 +0200 | [diff] [blame] | 36 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); |
| 37 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); |
Mike Travis | 9f248bd | 2008-05-12 21:21:12 +0200 | [diff] [blame^] | 38 | |
| 39 | /* which logical CPUs are on which nodes */ |
| 40 | cpumask_t *node_to_cpumask_map; |
| 41 | EXPORT_SYMBOL(node_to_cpumask_map); |
| 42 | |
| 43 | /* setup node_to_cpumask_map */ |
| 44 | static void __init setup_node_to_cpumask_map(void); |
| 45 | |
| 46 | #else |
| 47 | static inline void setup_node_to_cpumask_map(void) { } |
Mike Travis | 23ca4bb | 2008-05-12 21:21:12 +0200 | [diff] [blame] | 48 | #endif |
| 49 | |
James Bottomley | f8955eb | 2008-05-10 09:01:48 -0500 | [diff] [blame] | 50 | #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) |
Glauber de Oliveira Costa | 4fe29a8 | 2008-03-19 14:25:23 -0300 | [diff] [blame] | 51 | /* |
| 52 | * Copy data used in early init routines from the initial arrays to the |
| 53 | * per cpu data areas. These arrays then become expendable and the |
| 54 | * *_early_ptr's are zeroed indicating that the static arrays are gone. |
| 55 | */ |
| 56 | static void __init setup_per_cpu_maps(void) |
| 57 | { |
| 58 | int cpu; |
| 59 | |
| 60 | for_each_possible_cpu(cpu) { |
Mike Travis | 23ca4bb | 2008-05-12 21:21:12 +0200 | [diff] [blame] | 61 | per_cpu(x86_cpu_to_apicid, cpu) = |
| 62 | early_per_cpu_map(x86_cpu_to_apicid, cpu); |
Mike Travis | b447a46 | 2008-03-25 15:06:51 -0700 | [diff] [blame] | 63 | per_cpu(x86_bios_cpu_apicid, cpu) = |
Mike Travis | 23ca4bb | 2008-05-12 21:21:12 +0200 | [diff] [blame] | 64 | early_per_cpu_map(x86_bios_cpu_apicid, cpu); |
| 65 | #ifdef X86_64_NUMA |
Mike Travis | b447a46 | 2008-03-25 15:06:51 -0700 | [diff] [blame] | 66 | per_cpu(x86_cpu_to_node_map, cpu) = |
Mike Travis | 23ca4bb | 2008-05-12 21:21:12 +0200 | [diff] [blame] | 67 | early_per_cpu_map(x86_cpu_to_node_map, cpu); |
Glauber de Oliveira Costa | 4fe29a8 | 2008-03-19 14:25:23 -0300 | [diff] [blame] | 68 | #endif |
Glauber de Oliveira Costa | 4fe29a8 | 2008-03-19 14:25:23 -0300 | [diff] [blame] | 69 | } |
| 70 | |
| 71 | /* indicate the early static arrays will soon be gone */ |
Mike Travis | 23ca4bb | 2008-05-12 21:21:12 +0200 | [diff] [blame] | 72 | early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; |
| 73 | early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; |
| 74 | #ifdef X86_64_NUMA |
| 75 | early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; |
Glauber de Oliveira Costa | 4fe29a8 | 2008-03-19 14:25:23 -0300 | [diff] [blame] | 76 | #endif |
| 77 | } |
| 78 | |
Mike Travis | 9f0e8d0 | 2008-04-04 18:11:01 -0700 | [diff] [blame] | 79 | #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP |
| 80 | cpumask_t *cpumask_of_cpu_map __read_mostly; |
| 81 | EXPORT_SYMBOL(cpumask_of_cpu_map); |
| 82 | |
| 83 | /* requires nr_cpu_ids to be initialized */ |
| 84 | static void __init setup_cpumask_of_cpu(void) |
| 85 | { |
| 86 | int i; |
| 87 | |
| 88 | /* alloc_bootmem zeroes memory */ |
| 89 | cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids); |
| 90 | for (i = 0; i < nr_cpu_ids; i++) |
| 91 | cpu_set(i, cpumask_of_cpu_map[i]); |
| 92 | } |
| 93 | #else |
| 94 | static inline void setup_cpumask_of_cpu(void) { } |
| 95 | #endif |
| 96 | |
Glauber de Oliveira Costa | 4fe29a8 | 2008-03-19 14:25:23 -0300 | [diff] [blame] | 97 | #ifdef CONFIG_X86_32 |
| 98 | /* |
| 99 | * Great future not-so-futuristic plan: make i386 and x86_64 do it |
| 100 | * the same way |
| 101 | */ |
| 102 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; |
| 103 | EXPORT_SYMBOL(__per_cpu_offset); |
| 104 | #endif |
| 105 | |
| 106 | /* |
| 107 | * Great future plan: |
| 108 | * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. |
| 109 | * Always point %gs to its beginning |
| 110 | */ |
| 111 | void __init setup_per_cpu_areas(void) |
| 112 | { |
Mike Travis | 9f0e8d0 | 2008-04-04 18:11:01 -0700 | [diff] [blame] | 113 | int i, highest_cpu = 0; |
Glauber de Oliveira Costa | 4fe29a8 | 2008-03-19 14:25:23 -0300 | [diff] [blame] | 114 | unsigned long size; |
| 115 | |
| 116 | #ifdef CONFIG_HOTPLUG_CPU |
| 117 | prefill_possible_map(); |
| 118 | #endif |
| 119 | |
| 120 | /* Copy section for each CPU (we discard the original) */ |
| 121 | size = PERCPU_ENOUGH_ROOM; |
Thomas Gleixner | 5ecddce | 2008-05-08 16:38:11 +0200 | [diff] [blame] | 122 | printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", |
Glauber de Oliveira Costa | 4fe29a8 | 2008-03-19 14:25:23 -0300 | [diff] [blame] | 123 | size); |
Mike Travis | b447a46 | 2008-03-25 15:06:51 -0700 | [diff] [blame] | 124 | |
| 125 | for_each_possible_cpu(i) { |
Glauber de Oliveira Costa | 4fe29a8 | 2008-03-19 14:25:23 -0300 | [diff] [blame] | 126 | char *ptr; |
| 127 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
| 128 | ptr = alloc_bootmem_pages(size); |
| 129 | #else |
| 130 | int node = early_cpu_to_node(i); |
Mike Travis | b447a46 | 2008-03-25 15:06:51 -0700 | [diff] [blame] | 131 | if (!node_online(node) || !NODE_DATA(node)) { |
Glauber de Oliveira Costa | 4fe29a8 | 2008-03-19 14:25:23 -0300 | [diff] [blame] | 132 | ptr = alloc_bootmem_pages(size); |
Mike Travis | b447a46 | 2008-03-25 15:06:51 -0700 | [diff] [blame] | 133 | printk(KERN_INFO |
Mike Travis | 23ca4bb | 2008-05-12 21:21:12 +0200 | [diff] [blame] | 134 | "cpu %d has no node %d or node-local memory\n", |
| 135 | i, node); |
Mike Travis | b447a46 | 2008-03-25 15:06:51 -0700 | [diff] [blame] | 136 | } |
Glauber de Oliveira Costa | 4fe29a8 | 2008-03-19 14:25:23 -0300 | [diff] [blame] | 137 | else |
| 138 | ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); |
| 139 | #endif |
| 140 | if (!ptr) |
| 141 | panic("Cannot allocate cpu data for CPU %d\n", i); |
| 142 | #ifdef CONFIG_X86_64 |
| 143 | cpu_pda(i)->data_offset = ptr - __per_cpu_start; |
| 144 | #else |
| 145 | __per_cpu_offset[i] = ptr - __per_cpu_start; |
| 146 | #endif |
| 147 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); |
Mike Travis | 9f0e8d0 | 2008-04-04 18:11:01 -0700 | [diff] [blame] | 148 | |
| 149 | highest_cpu = i; |
Glauber de Oliveira Costa | 4fe29a8 | 2008-03-19 14:25:23 -0300 | [diff] [blame] | 150 | } |
| 151 | |
Mike Travis | 9f0e8d0 | 2008-04-04 18:11:01 -0700 | [diff] [blame] | 152 | nr_cpu_ids = highest_cpu + 1; |
Mike Travis | 9f248bd | 2008-05-12 21:21:12 +0200 | [diff] [blame^] | 153 | printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", |
| 154 | NR_CPUS, nr_cpu_ids, nr_node_ids); |
Mike Travis | 9f0e8d0 | 2008-04-04 18:11:01 -0700 | [diff] [blame] | 155 | |
Mike Travis | b447a46 | 2008-03-25 15:06:51 -0700 | [diff] [blame] | 156 | /* Setup percpu data maps */ |
Glauber de Oliveira Costa | 4fe29a8 | 2008-03-19 14:25:23 -0300 | [diff] [blame] | 157 | setup_per_cpu_maps(); |
Mike Travis | 9f0e8d0 | 2008-04-04 18:11:01 -0700 | [diff] [blame] | 158 | |
Mike Travis | 9f248bd | 2008-05-12 21:21:12 +0200 | [diff] [blame^] | 159 | /* Setup node to cpumask map */ |
| 160 | setup_node_to_cpumask_map(); |
| 161 | |
Mike Travis | 9f0e8d0 | 2008-04-04 18:11:01 -0700 | [diff] [blame] | 162 | /* Setup cpumask_of_cpu map */ |
| 163 | setup_cpumask_of_cpu(); |
Glauber de Oliveira Costa | 4fe29a8 | 2008-03-19 14:25:23 -0300 | [diff] [blame] | 164 | } |
| 165 | |
| 166 | #endif |
Mike Travis | 23ca4bb | 2008-05-12 21:21:12 +0200 | [diff] [blame] | 167 | |
| 168 | #ifdef X86_64_NUMA |
Mike Travis | 9f248bd | 2008-05-12 21:21:12 +0200 | [diff] [blame^] | 169 | |
| 170 | /* |
| 171 | * Allocate node_to_cpumask_map based on number of available nodes |
| 172 | * Requires node_possible_map to be valid. |
| 173 | * |
| 174 | * Note: node_to_cpumask() is not valid until after this is done. |
| 175 | */ |
| 176 | static void __init setup_node_to_cpumask_map(void) |
| 177 | { |
| 178 | unsigned int node, num = 0; |
| 179 | cpumask_t *map; |
| 180 | |
| 181 | /* setup nr_node_ids if not done yet */ |
| 182 | if (nr_node_ids == MAX_NUMNODES) { |
| 183 | for_each_node_mask(node, node_possible_map) |
| 184 | num = node; |
| 185 | nr_node_ids = num + 1; |
| 186 | } |
| 187 | |
| 188 | /* allocate the map */ |
| 189 | map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); |
| 190 | |
| 191 | Dprintk(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", |
| 192 | map, nr_node_ids); |
| 193 | |
| 194 | /* node_to_cpumask() will now work */ |
| 195 | node_to_cpumask_map = map; |
| 196 | } |
| 197 | |
Mike Travis | 23ca4bb | 2008-05-12 21:21:12 +0200 | [diff] [blame] | 198 | void __cpuinit numa_set_node(int cpu, int node) |
| 199 | { |
| 200 | int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); |
| 201 | |
Mike Travis | 7891a24 | 2008-05-12 21:21:12 +0200 | [diff] [blame] | 202 | if (node != NUMA_NO_NODE) |
| 203 | cpu_pda(cpu)->nodenumber = node; |
| 204 | |
Mike Travis | 23ca4bb | 2008-05-12 21:21:12 +0200 | [diff] [blame] | 205 | if (cpu_to_node_map) |
| 206 | cpu_to_node_map[cpu] = node; |
| 207 | |
| 208 | else if (per_cpu_offset(cpu)) |
| 209 | per_cpu(x86_cpu_to_node_map, cpu) = node; |
| 210 | |
| 211 | else |
| 212 | Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu); |
| 213 | } |
| 214 | |
| 215 | void __cpuinit numa_clear_node(int cpu) |
| 216 | { |
| 217 | numa_set_node(cpu, NUMA_NO_NODE); |
| 218 | } |
| 219 | |
Mike Travis | 9f248bd | 2008-05-12 21:21:12 +0200 | [diff] [blame^] | 220 | #ifndef CONFIG_DEBUG_PER_CPU_MAPS |
| 221 | |
Mike Travis | 23ca4bb | 2008-05-12 21:21:12 +0200 | [diff] [blame] | 222 | void __cpuinit numa_add_cpu(int cpu) |
| 223 | { |
| 224 | cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); |
| 225 | } |
| 226 | |
| 227 | void __cpuinit numa_remove_cpu(int cpu) |
| 228 | { |
| 229 | cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); |
| 230 | } |
Mike Travis | 23ca4bb | 2008-05-12 21:21:12 +0200 | [diff] [blame] | 231 | |
Mike Travis | 9f248bd | 2008-05-12 21:21:12 +0200 | [diff] [blame^] | 232 | #else /* CONFIG_DEBUG_PER_CPU_MAPS */ |
| 233 | |
| 234 | /* |
| 235 | * --------- debug versions of the numa functions --------- |
| 236 | */ |
| 237 | static void __cpuinit numa_set_cpumask(int cpu, int enable) |
| 238 | { |
| 239 | int node = cpu_to_node(cpu); |
| 240 | cpumask_t *mask; |
| 241 | char buf[64]; |
| 242 | |
| 243 | if (node_to_cpumask_map == NULL) { |
| 244 | printk(KERN_ERR "node_to_cpumask_map NULL\n"); |
| 245 | dump_stack(); |
| 246 | return; |
| 247 | } |
| 248 | |
| 249 | mask = &node_to_cpumask_map[node]; |
| 250 | if (enable) |
| 251 | cpu_set(cpu, *mask); |
| 252 | else |
| 253 | cpu_clear(cpu, *mask); |
| 254 | |
| 255 | cpulist_scnprintf(buf, sizeof(buf), *mask); |
| 256 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", |
| 257 | enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); |
| 258 | } |
| 259 | |
| 260 | void __cpuinit numa_add_cpu(int cpu) |
| 261 | { |
| 262 | numa_set_cpumask(cpu, 1); |
| 263 | } |
| 264 | |
| 265 | void __cpuinit numa_remove_cpu(int cpu) |
| 266 | { |
| 267 | numa_set_cpumask(cpu, 0); |
| 268 | } |
Mike Travis | 23ca4bb | 2008-05-12 21:21:12 +0200 | [diff] [blame] | 269 | |
| 270 | int cpu_to_node(int cpu) |
| 271 | { |
| 272 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) { |
| 273 | printk(KERN_WARNING |
| 274 | "cpu_to_node(%d): usage too early!\n", cpu); |
| 275 | dump_stack(); |
| 276 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; |
| 277 | } |
| 278 | return per_cpu(x86_cpu_to_node_map, cpu); |
| 279 | } |
| 280 | EXPORT_SYMBOL(cpu_to_node); |
| 281 | |
Mike Travis | 9f248bd | 2008-05-12 21:21:12 +0200 | [diff] [blame^] | 282 | /* |
| 283 | * Same function as cpu_to_node() but used if called before the |
| 284 | * per_cpu areas are setup. |
| 285 | */ |
Mike Travis | 23ca4bb | 2008-05-12 21:21:12 +0200 | [diff] [blame] | 286 | int early_cpu_to_node(int cpu) |
| 287 | { |
| 288 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) |
| 289 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; |
| 290 | |
| 291 | if (!per_cpu_offset(cpu)) { |
| 292 | printk(KERN_WARNING |
| 293 | "early_cpu_to_node(%d): no per_cpu area!\n", cpu); |
Mike Travis | 9f248bd | 2008-05-12 21:21:12 +0200 | [diff] [blame^] | 294 | dump_stack(); |
Mike Travis | 23ca4bb | 2008-05-12 21:21:12 +0200 | [diff] [blame] | 295 | return NUMA_NO_NODE; |
| 296 | } |
| 297 | return per_cpu(x86_cpu_to_node_map, cpu); |
| 298 | } |
Mike Travis | 9f248bd | 2008-05-12 21:21:12 +0200 | [diff] [blame^] | 299 | |
| 300 | /* |
| 301 | * Returns a pointer to the bitmask of CPUs on Node 'node'. |
| 302 | */ |
| 303 | cpumask_t *_node_to_cpumask_ptr(int node) |
| 304 | { |
| 305 | if (node_to_cpumask_map == NULL) { |
| 306 | printk(KERN_WARNING |
| 307 | "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", |
| 308 | node); |
| 309 | dump_stack(); |
| 310 | return &cpu_online_map; |
| 311 | } |
| 312 | return &node_to_cpumask_map[node]; |
| 313 | } |
| 314 | EXPORT_SYMBOL(_node_to_cpumask_ptr); |
| 315 | |
| 316 | /* |
| 317 | * Returns a bitmask of CPUs on Node 'node'. |
| 318 | */ |
| 319 | cpumask_t node_to_cpumask(int node) |
| 320 | { |
| 321 | if (node_to_cpumask_map == NULL) { |
| 322 | printk(KERN_WARNING |
| 323 | "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); |
| 324 | dump_stack(); |
| 325 | return cpu_online_map; |
| 326 | } |
| 327 | return node_to_cpumask_map[node]; |
| 328 | } |
| 329 | EXPORT_SYMBOL(node_to_cpumask); |
| 330 | |
| 331 | /* |
| 332 | * --------- end of debug versions of the numa functions --------- |
| 333 | */ |
| 334 | |
| 335 | #endif /* CONFIG_DEBUG_PER_CPU_MAPS */ |
| 336 | |
| 337 | #endif /* X86_64_NUMA */ |