| Rusty Russell | 71ee73e | 2009-03-13 14:49:52 +1030 | [diff] [blame] | 1 | /* Common code for 32 and 64-bit NUMA */ | 
|  | 2 | #include <linux/topology.h> | 
|  | 3 | #include <linux/module.h> | 
|  | 4 | #include <linux/bootmem.h> | 
| Jan Beulich | 9032160 | 2011-01-19 08:57:21 +0000 | [diff] [blame] | 5 | #include <asm/numa.h> | 
|  | 6 | #include <asm/acpi.h> | 
|  | 7 |  | 
|  | 8 | int __initdata numa_off; | 
|  | 9 |  | 
|  | 10 | static __init int numa_setup(char *opt) | 
|  | 11 | { | 
|  | 12 | if (!opt) | 
|  | 13 | return -EINVAL; | 
|  | 14 | if (!strncmp(opt, "off", 3)) | 
|  | 15 | numa_off = 1; | 
|  | 16 | #ifdef CONFIG_NUMA_EMU | 
|  | 17 | if (!strncmp(opt, "fake=", 5)) | 
|  | 18 | numa_emu_cmdline(opt + 5); | 
|  | 19 | #endif | 
|  | 20 | #ifdef CONFIG_ACPI_NUMA | 
|  | 21 | if (!strncmp(opt, "noacpi", 6)) | 
|  | 22 | acpi_numa = -1; | 
|  | 23 | #endif | 
|  | 24 | return 0; | 
|  | 25 | } | 
|  | 26 | early_param("numa", numa_setup); | 
| Rusty Russell | 71ee73e | 2009-03-13 14:49:52 +1030 | [diff] [blame] | 27 |  | 
| Rusty Russell | 71ee73e | 2009-03-13 14:49:52 +1030 | [diff] [blame] | 28 | /* | 
| Tejun Heo | bbc9e2f | 2011-01-23 14:37:39 +0100 | [diff] [blame] | 29 | * apicid, cpu, node mappings | 
| Rusty Russell | 71ee73e | 2009-03-13 14:49:52 +1030 | [diff] [blame] | 30 | */ | 
| Tejun Heo | bbc9e2f | 2011-01-23 14:37:39 +0100 | [diff] [blame] | 31 | s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | 
|  | 32 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE | 
|  | 33 | }; | 
|  | 34 |  | 
| Rusty Russell | c032ef60 | 2009-03-13 14:49:53 +1030 | [diff] [blame] | 35 | cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; | 
| Rusty Russell | 71ee73e | 2009-03-13 14:49:52 +1030 | [diff] [blame] | 36 | EXPORT_SYMBOL(node_to_cpumask_map); | 
|  | 37 |  | 
|  | 38 | /* | 
| Tejun Heo | 645a791 | 2011-01-23 14:37:40 +0100 | [diff] [blame] | 39 | * Map cpu index to node index | 
|  | 40 | */ | 
| Tejun Heo | 645a791 | 2011-01-23 14:37:40 +0100 | [diff] [blame] | 41 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); | 
| Tejun Heo | 645a791 | 2011-01-23 14:37:40 +0100 | [diff] [blame] | 42 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); | 
|  | 43 |  | 
|  | 44 | void __cpuinit numa_set_node(int cpu, int node) | 
|  | 45 | { | 
|  | 46 | int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); | 
|  | 47 |  | 
|  | 48 | /* early setting, no percpu area yet */ | 
|  | 49 | if (cpu_to_node_map) { | 
|  | 50 | cpu_to_node_map[cpu] = node; | 
|  | 51 | return; | 
|  | 52 | } | 
|  | 53 |  | 
|  | 54 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | 
|  | 55 | if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) { | 
|  | 56 | printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); | 
|  | 57 | dump_stack(); | 
|  | 58 | return; | 
|  | 59 | } | 
|  | 60 | #endif | 
|  | 61 | per_cpu(x86_cpu_to_node_map, cpu) = node; | 
|  | 62 |  | 
|  | 63 | if (node != NUMA_NO_NODE) | 
|  | 64 | set_cpu_numa_node(cpu, node); | 
|  | 65 | } | 
|  | 66 |  | 
|  | 67 | void __cpuinit numa_clear_node(int cpu) | 
|  | 68 | { | 
|  | 69 | numa_set_node(cpu, NUMA_NO_NODE); | 
|  | 70 | } | 
|  | 71 |  | 
|  | 72 | /* | 
| Rusty Russell | 71ee73e | 2009-03-13 14:49:52 +1030 | [diff] [blame] | 73 | * Allocate node_to_cpumask_map based on number of available nodes | 
|  | 74 | * Requires node_possible_map to be valid. | 
|  | 75 | * | 
|  | 76 | * Note: node_to_cpumask() is not valid until after this is done. | 
|  | 77 | * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) | 
|  | 78 | */ | 
|  | 79 | void __init setup_node_to_cpumask_map(void) | 
|  | 80 | { | 
|  | 81 | unsigned int node, num = 0; | 
| Rusty Russell | 71ee73e | 2009-03-13 14:49:52 +1030 | [diff] [blame] | 82 |  | 
|  | 83 | /* setup nr_node_ids if not done yet */ | 
|  | 84 | if (nr_node_ids == MAX_NUMNODES) { | 
|  | 85 | for_each_node_mask(node, node_possible_map) | 
|  | 86 | num = node; | 
|  | 87 | nr_node_ids = num + 1; | 
|  | 88 | } | 
|  | 89 |  | 
|  | 90 | /* allocate the map */ | 
| Rusty Russell | c032ef60 | 2009-03-13 14:49:53 +1030 | [diff] [blame] | 91 | for (node = 0; node < nr_node_ids; node++) | 
|  | 92 | alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); | 
| Rusty Russell | 71ee73e | 2009-03-13 14:49:52 +1030 | [diff] [blame] | 93 |  | 
| Rusty Russell | c032ef60 | 2009-03-13 14:49:53 +1030 | [diff] [blame] | 94 | /* cpumask_of_node() will now work */ | 
|  | 95 | pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); | 
| Rusty Russell | 71ee73e | 2009-03-13 14:49:52 +1030 | [diff] [blame] | 96 | } | 
|  | 97 |  | 
| Tejun Heo | 8db78cc | 2011-01-23 14:37:42 +0100 | [diff] [blame] | 98 | /* | 
|  | 99 | * There are unfortunately some poorly designed mainboards around that | 
|  | 100 | * only connect memory to a single CPU. This breaks the 1:1 cpu->node | 
|  | 101 | * mapping. To avoid this fill in the mapping for all possible CPUs, | 
|  | 102 | * as the number of CPUs is not known yet. We round robin the existing | 
|  | 103 | * nodes. | 
|  | 104 | */ | 
|  | 105 | void __init numa_init_array(void) | 
|  | 106 | { | 
|  | 107 | int rr, i; | 
|  | 108 |  | 
|  | 109 | rr = first_node(node_online_map); | 
|  | 110 | for (i = 0; i < nr_cpu_ids; i++) { | 
|  | 111 | if (early_cpu_to_node(i) != NUMA_NO_NODE) | 
|  | 112 | continue; | 
|  | 113 | numa_set_node(i, rr); | 
|  | 114 | rr = next_node(rr, node_online_map); | 
|  | 115 | if (rr == MAX_NUMNODES) | 
|  | 116 | rr = first_node(node_online_map); | 
|  | 117 | } | 
|  | 118 | } | 
|  | 119 |  | 
|  | 120 | static __init int find_near_online_node(int node) | 
|  | 121 | { | 
|  | 122 | int n, val; | 
|  | 123 | int min_val = INT_MAX; | 
|  | 124 | int best_node = -1; | 
|  | 125 |  | 
|  | 126 | for_each_online_node(n) { | 
|  | 127 | val = node_distance(node, n); | 
|  | 128 |  | 
|  | 129 | if (val < min_val) { | 
|  | 130 | min_val = val; | 
|  | 131 | best_node = n; | 
|  | 132 | } | 
|  | 133 | } | 
|  | 134 |  | 
|  | 135 | return best_node; | 
|  | 136 | } | 
|  | 137 |  | 
|  | 138 | /* | 
|  | 139 | * Setup early cpu_to_node. | 
|  | 140 | * | 
|  | 141 | * Populate cpu_to_node[] only if x86_cpu_to_apicid[], | 
|  | 142 | * and apicid_to_node[] tables have valid entries for a CPU. | 
|  | 143 | * This means we skip cpu_to_node[] initialisation for NUMA | 
|  | 144 | * emulation and faking node case (when running a kernel compiled | 
|  | 145 | * for NUMA on a non NUMA box), which is OK as cpu_to_node[] | 
|  | 146 | * is already initialized in a round robin manner at numa_init_array, | 
|  | 147 | * prior to this call, and this initialization is good enough | 
|  | 148 | * for the fake NUMA cases. | 
|  | 149 | * | 
|  | 150 | * Called before the per_cpu areas are setup. | 
|  | 151 | */ | 
|  | 152 | void __init init_cpu_to_node(void) | 
|  | 153 | { | 
|  | 154 | int cpu; | 
|  | 155 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); | 
|  | 156 |  | 
|  | 157 | BUG_ON(cpu_to_apicid == NULL); | 
|  | 158 |  | 
|  | 159 | for_each_possible_cpu(cpu) { | 
|  | 160 | int node = numa_cpu_node(cpu); | 
|  | 161 |  | 
|  | 162 | if (node == NUMA_NO_NODE) | 
|  | 163 | continue; | 
|  | 164 | if (!node_online(node)) | 
|  | 165 | node = find_near_online_node(node); | 
|  | 166 | numa_set_node(cpu, node); | 
|  | 167 | } | 
|  | 168 | } | 
|  | 169 |  | 
| Tejun Heo | de2d944 | 2011-01-23 14:37:41 +0100 | [diff] [blame] | 170 | #ifndef CONFIG_DEBUG_PER_CPU_MAPS | 
|  | 171 |  | 
|  | 172 | # ifndef CONFIG_NUMA_EMU | 
|  | 173 | void __cpuinit numa_add_cpu(int cpu) | 
|  | 174 | { | 
|  | 175 | cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | 
|  | 176 | } | 
|  | 177 |  | 
|  | 178 | void __cpuinit numa_remove_cpu(int cpu) | 
|  | 179 | { | 
|  | 180 | cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | 
|  | 181 | } | 
|  | 182 | # endif	/* !CONFIG_NUMA_EMU */ | 
|  | 183 |  | 
|  | 184 | #else	/* !CONFIG_DEBUG_PER_CPU_MAPS */ | 
| Tejun Heo | 645a791 | 2011-01-23 14:37:40 +0100 | [diff] [blame] | 185 |  | 
|  | 186 | int __cpu_to_node(int cpu) | 
|  | 187 | { | 
|  | 188 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) { | 
|  | 189 | printk(KERN_WARNING | 
|  | 190 | "cpu_to_node(%d): usage too early!\n", cpu); | 
|  | 191 | dump_stack(); | 
|  | 192 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | 
|  | 193 | } | 
|  | 194 | return per_cpu(x86_cpu_to_node_map, cpu); | 
|  | 195 | } | 
|  | 196 | EXPORT_SYMBOL(__cpu_to_node); | 
|  | 197 |  | 
|  | 198 | /* | 
|  | 199 | * Same function as cpu_to_node() but used if called before the | 
|  | 200 | * per_cpu areas are setup. | 
|  | 201 | */ | 
|  | 202 | int early_cpu_to_node(int cpu) | 
|  | 203 | { | 
|  | 204 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) | 
|  | 205 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | 
|  | 206 |  | 
|  | 207 | if (!cpu_possible(cpu)) { | 
|  | 208 | printk(KERN_WARNING | 
|  | 209 | "early_cpu_to_node(%d): no per_cpu area!\n", cpu); | 
|  | 210 | dump_stack(); | 
|  | 211 | return NUMA_NO_NODE; | 
|  | 212 | } | 
|  | 213 | return per_cpu(x86_cpu_to_node_map, cpu); | 
|  | 214 | } | 
|  | 215 |  | 
| Tejun Heo | de2d944 | 2011-01-23 14:37:41 +0100 | [diff] [blame] | 216 | struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable) | 
|  | 217 | { | 
|  | 218 | int node = early_cpu_to_node(cpu); | 
|  | 219 | struct cpumask *mask; | 
|  | 220 | char buf[64]; | 
|  | 221 |  | 
| David Rientjes | 14392fd | 2011-02-07 14:08:53 -0800 | [diff] [blame] | 222 | if (node == NUMA_NO_NODE) { | 
|  | 223 | /* early_cpu_to_node() already emits a warning and trace */ | 
|  | 224 | return NULL; | 
|  | 225 | } | 
| Tejun Heo | de2d944 | 2011-01-23 14:37:41 +0100 | [diff] [blame] | 226 | mask = node_to_cpumask_map[node]; | 
|  | 227 | if (!mask) { | 
|  | 228 | pr_err("node_to_cpumask_map[%i] NULL\n", node); | 
|  | 229 | dump_stack(); | 
|  | 230 | return NULL; | 
|  | 231 | } | 
|  | 232 |  | 
|  | 233 | cpulist_scnprintf(buf, sizeof(buf), mask); | 
|  | 234 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", | 
|  | 235 | enable ? "numa_add_cpu" : "numa_remove_cpu", | 
|  | 236 | cpu, node, buf); | 
|  | 237 | return mask; | 
|  | 238 | } | 
|  | 239 |  | 
|  | 240 | # ifndef CONFIG_NUMA_EMU | 
|  | 241 | static void __cpuinit numa_set_cpumask(int cpu, int enable) | 
|  | 242 | { | 
|  | 243 | struct cpumask *mask; | 
|  | 244 |  | 
|  | 245 | mask = debug_cpumask_set_cpu(cpu, enable); | 
|  | 246 | if (!mask) | 
|  | 247 | return; | 
|  | 248 |  | 
|  | 249 | if (enable) | 
|  | 250 | cpumask_set_cpu(cpu, mask); | 
|  | 251 | else | 
|  | 252 | cpumask_clear_cpu(cpu, mask); | 
|  | 253 | } | 
|  | 254 |  | 
|  | 255 | void __cpuinit numa_add_cpu(int cpu) | 
|  | 256 | { | 
|  | 257 | numa_set_cpumask(cpu, 1); | 
|  | 258 | } | 
|  | 259 |  | 
|  | 260 | void __cpuinit numa_remove_cpu(int cpu) | 
|  | 261 | { | 
|  | 262 | numa_set_cpumask(cpu, 0); | 
|  | 263 | } | 
|  | 264 | # endif	/* !CONFIG_NUMA_EMU */ | 
|  | 265 |  | 
| Rusty Russell | 71ee73e | 2009-03-13 14:49:52 +1030 | [diff] [blame] | 266 | /* | 
|  | 267 | * Returns a pointer to the bitmask of CPUs on Node 'node'. | 
|  | 268 | */ | 
| Rusty Russell | 73e907d | 2009-03-13 14:49:57 +1030 | [diff] [blame] | 269 | const struct cpumask *cpumask_of_node(int node) | 
| Rusty Russell | 71ee73e | 2009-03-13 14:49:52 +1030 | [diff] [blame] | 270 | { | 
| Rusty Russell | 71ee73e | 2009-03-13 14:49:52 +1030 | [diff] [blame] | 271 | if (node >= nr_node_ids) { | 
|  | 272 | printk(KERN_WARNING | 
|  | 273 | "cpumask_of_node(%d): node > nr_node_ids(%d)\n", | 
|  | 274 | node, nr_node_ids); | 
|  | 275 | dump_stack(); | 
|  | 276 | return cpu_none_mask; | 
|  | 277 | } | 
| Rusty Russell | c032ef60 | 2009-03-13 14:49:53 +1030 | [diff] [blame] | 278 | if (node_to_cpumask_map[node] == NULL) { | 
|  | 279 | printk(KERN_WARNING | 
|  | 280 | "cpumask_of_node(%d): no node_to_cpumask_map!\n", | 
|  | 281 | node); | 
|  | 282 | dump_stack(); | 
|  | 283 | return cpu_online_mask; | 
|  | 284 | } | 
| Rusty Russell | 0b96625 | 2009-03-13 23:42:42 +1030 | [diff] [blame] | 285 | return node_to_cpumask_map[node]; | 
| Rusty Russell | 71ee73e | 2009-03-13 14:49:52 +1030 | [diff] [blame] | 286 | } | 
|  | 287 | EXPORT_SYMBOL(cpumask_of_node); | 
| Tejun Heo | 645a791 | 2011-01-23 14:37:40 +0100 | [diff] [blame] | 288 |  | 
| Tejun Heo | de2d944 | 2011-01-23 14:37:41 +0100 | [diff] [blame] | 289 | #endif	/* !CONFIG_DEBUG_PER_CPU_MAPS */ |