Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Some of the code in this file has been gleaned from the 64 bit |
| 3 | * discontigmem support code base. |
| 4 | * |
| 5 | * Copyright (C) 2002, IBM Corp. |
| 6 | * |
| 7 | * All rights reserved. |
| 8 | * |
| 9 | * This program is free software; you can redistribute it and/or modify |
| 10 | * it under the terms of the GNU General Public License as published by |
| 11 | * the Free Software Foundation; either version 2 of the License, or |
| 12 | * (at your option) any later version. |
| 13 | * |
| 14 | * This program is distributed in the hope that it will be useful, but |
| 15 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
| 17 | * NON INFRINGEMENT. See the GNU General Public License for more |
| 18 | * details. |
| 19 | * |
| 20 | * You should have received a copy of the GNU General Public License |
| 21 | * along with this program; if not, write to the Free Software |
| 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
| 23 | * |
| 24 | * Send feedback to Pat Gaughen <gone@us.ibm.com> |
| 25 | */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 26 | #include <linux/mm.h> |
| 27 | #include <linux/bootmem.h> |
| 28 | #include <linux/mmzone.h> |
| 29 | #include <linux/acpi.h> |
| 30 | #include <linux/nodemask.h> |
| 31 | #include <asm/srat.h> |
| 32 | #include <asm/topology.h> |
| 33 | |
| 34 | /* |
| 35 | * proximity macros and definitions |
| 36 | */ |
| 37 | #define NODE_ARRAY_INDEX(x) ((x) / 8) /* 8 bits/char */ |
| 38 | #define NODE_ARRAY_OFFSET(x) ((x) % 8) /* 8 bits/char */ |
| 39 | #define BMAP_SET(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit)) |
| 40 | #define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit))) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 41 | /* bitmap length; _PXM is at most 255 */ |
| 42 | #define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) |
| 43 | static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ |
| 44 | |
Christoph Lameter | b9b1578 | 2006-09-25 23:31:16 -0700 | [diff] [blame] | 45 | #define MAX_CHUNKS_PER_NODE 3 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 46 | #define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES) |
| 47 | struct node_memory_chunk_s { |
| 48 | unsigned long start_pfn; |
| 49 | unsigned long end_pfn; |
| 50 | u8 pxm; // proximity domain of node |
| 51 | u8 nid; // which cnode contains this chunk? |
| 52 | u8 bank; // which mem bank on this node |
| 53 | }; |
| 54 | static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS]; |
| 55 | |
| 56 | static int num_memory_chunks; /* total number of memory chunks */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 57 | |
| 58 | extern void * boot_ioremap(unsigned long, unsigned long); |
| 59 | |
| 60 | /* Identify CPU proximity domains */ |
| 61 | static void __init parse_cpu_affinity_structure(char *p) |
| 62 | { |
| 63 | struct acpi_table_processor_affinity *cpu_affinity = |
| 64 | (struct acpi_table_processor_affinity *) p; |
| 65 | |
| 66 | if (!cpu_affinity->flags.enabled) |
| 67 | return; /* empty entry */ |
| 68 | |
| 69 | /* mark this node as "seen" in node bitmap */ |
| 70 | BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain); |
| 71 | |
| 72 | printk("CPU 0x%02X in proximity domain 0x%02X\n", |
| 73 | cpu_affinity->apic_id, cpu_affinity->proximity_domain); |
| 74 | } |
| 75 | |
| 76 | /* |
| 77 | * Identify memory proximity domains and hot-remove capabilities. |
| 78 | * Fill node memory chunk list structure. |
| 79 | */ |
| 80 | static void __init parse_memory_affinity_structure (char *sratp) |
| 81 | { |
| 82 | unsigned long long paddr, size; |
| 83 | unsigned long start_pfn, end_pfn; |
| 84 | u8 pxm; |
| 85 | struct node_memory_chunk_s *p, *q, *pend; |
| 86 | struct acpi_table_memory_affinity *memory_affinity = |
| 87 | (struct acpi_table_memory_affinity *) sratp; |
| 88 | |
| 89 | if (!memory_affinity->flags.enabled) |
| 90 | return; /* empty entry */ |
| 91 | |
| 92 | /* mark this node as "seen" in node bitmap */ |
| 93 | BMAP_SET(pxm_bitmap, memory_affinity->proximity_domain); |
| 94 | |
| 95 | /* calculate info for memory chunk structure */ |
| 96 | paddr = memory_affinity->base_addr_hi; |
| 97 | paddr = (paddr << 32) | memory_affinity->base_addr_lo; |
| 98 | size = memory_affinity->length_hi; |
| 99 | size = (size << 32) | memory_affinity->length_lo; |
| 100 | |
| 101 | start_pfn = paddr >> PAGE_SHIFT; |
| 102 | end_pfn = (paddr + size) >> PAGE_SHIFT; |
| 103 | |
| 104 | pxm = memory_affinity->proximity_domain; |
| 105 | |
| 106 | if (num_memory_chunks >= MAXCHUNKS) { |
| 107 | printk("Too many mem chunks in SRAT. Ignoring %lld MBytes at %llx\n", |
| 108 | size/(1024*1024), paddr); |
| 109 | return; |
| 110 | } |
| 111 | |
| 112 | /* Insertion sort based on base address */ |
| 113 | pend = &node_memory_chunk[num_memory_chunks]; |
| 114 | for (p = &node_memory_chunk[0]; p < pend; p++) { |
| 115 | if (start_pfn < p->start_pfn) |
| 116 | break; |
| 117 | } |
| 118 | if (p < pend) { |
| 119 | for (q = pend; q >= p; q--) |
| 120 | *(q + 1) = *q; |
| 121 | } |
| 122 | p->start_pfn = start_pfn; |
| 123 | p->end_pfn = end_pfn; |
| 124 | p->pxm = pxm; |
| 125 | |
| 126 | num_memory_chunks++; |
| 127 | |
| 128 | printk("Memory range 0x%lX to 0x%lX (type 0x%X) in proximity domain 0x%02X %s\n", |
| 129 | start_pfn, end_pfn, |
| 130 | memory_affinity->memory_type, |
| 131 | memory_affinity->proximity_domain, |
| 132 | (memory_affinity->flags.hot_pluggable ? |
| 133 | "enabled and removable" : "enabled" ) ); |
| 134 | } |
| 135 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 136 | /* |
| 137 | * The SRAT table always lists ascending addresses, so can always |
| 138 | * assume that the first "start" address that you see is the real |
| 139 | * start of the node, and that the current "end" address is after |
| 140 | * the previous one. |
| 141 | */ |
| 142 | static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk) |
| 143 | { |
| 144 | /* |
| 145 | * Only add present memory as told by the e820. |
| 146 | * There is no guarantee from the SRAT that the memory it |
| 147 | * enumerates is present at boot time because it represents |
| 148 | * *possible* memory hotplug areas the same as normal RAM. |
| 149 | */ |
| 150 | if (memory_chunk->start_pfn >= max_pfn) { |
| 151 | printk (KERN_INFO "Ignoring SRAT pfns: 0x%08lx -> %08lx\n", |
| 152 | memory_chunk->start_pfn, memory_chunk->end_pfn); |
| 153 | return; |
| 154 | } |
| 155 | if (memory_chunk->nid != nid) |
| 156 | return; |
| 157 | |
| 158 | if (!node_has_online_mem(nid)) |
| 159 | node_start_pfn[nid] = memory_chunk->start_pfn; |
| 160 | |
| 161 | if (node_start_pfn[nid] > memory_chunk->start_pfn) |
| 162 | node_start_pfn[nid] = memory_chunk->start_pfn; |
| 163 | |
| 164 | if (node_end_pfn[nid] < memory_chunk->end_pfn) |
| 165 | node_end_pfn[nid] = memory_chunk->end_pfn; |
| 166 | } |
| 167 | |
| 168 | /* Parse the ACPI Static Resource Affinity Table */ |
| 169 | static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) |
| 170 | { |
| 171 | u8 *start, *end, *p; |
| 172 | int i, j, nid; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 173 | |
| 174 | start = (u8 *)(&(sratp->reserved) + 1); /* skip header */ |
| 175 | p = start; |
| 176 | end = (u8 *)sratp + sratp->header.length; |
| 177 | |
| 178 | memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */ |
| 179 | memset(node_memory_chunk, 0, sizeof(node_memory_chunk)); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 180 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 181 | num_memory_chunks = 0; |
| 182 | while (p < end) { |
| 183 | switch (*p) { |
| 184 | case ACPI_SRAT_PROCESSOR_AFFINITY: |
| 185 | parse_cpu_affinity_structure(p); |
| 186 | break; |
| 187 | case ACPI_SRAT_MEMORY_AFFINITY: |
| 188 | parse_memory_affinity_structure(p); |
| 189 | break; |
| 190 | default: |
| 191 | printk("ACPI 2.0 SRAT: unknown entry skipped: type=0x%02X, len=%d\n", p[0], p[1]); |
| 192 | break; |
| 193 | } |
| 194 | p += p[1]; |
| 195 | if (p[1] == 0) { |
| 196 | printk("acpi20_parse_srat: Entry length value is zero;" |
| 197 | " can't parse any further!\n"); |
| 198 | break; |
| 199 | } |
| 200 | } |
| 201 | |
| 202 | if (num_memory_chunks == 0) { |
| 203 | printk("could not finy any ACPI SRAT memory areas.\n"); |
| 204 | goto out_fail; |
| 205 | } |
| 206 | |
| 207 | /* Calculate total number of nodes in system from PXM bitmap and create |
| 208 | * a set of sequential node IDs starting at zero. (ACPI doesn't seem |
| 209 | * to specify the range of _PXM values.) |
| 210 | */ |
| 211 | /* |
| 212 | * MCD - we no longer HAVE to number nodes sequentially. PXM domain |
| 213 | * numbers could go as high as 256, and MAX_NUMNODES for i386 is typically |
| 214 | * 32, so we will continue numbering them in this manner until MAX_NUMNODES |
| 215 | * approaches MAX_PXM_DOMAINS for i386. |
| 216 | */ |
| 217 | nodes_clear(node_online_map); |
| 218 | for (i = 0; i < MAX_PXM_DOMAINS; i++) { |
| 219 | if (BMAP_TEST(pxm_bitmap, i)) { |
Yasunori Goto | 762834e | 2006-06-23 02:03:19 -0700 | [diff] [blame] | 220 | int nid = acpi_map_pxm_to_node(i); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 221 | node_set_online(nid); |
| 222 | } |
| 223 | } |
| 224 | BUG_ON(num_online_nodes() == 0); |
| 225 | |
| 226 | /* set cnode id in memory chunk structure */ |
| 227 | for (i = 0; i < num_memory_chunks; i++) |
Yasunori Goto | 762834e | 2006-06-23 02:03:19 -0700 | [diff] [blame] | 228 | node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 229 | |
| 230 | printk("pxm bitmap: "); |
| 231 | for (i = 0; i < sizeof(pxm_bitmap); i++) { |
| 232 | printk("%02X ", pxm_bitmap[i]); |
| 233 | } |
| 234 | printk("\n"); |
| 235 | printk("Number of logical nodes in system = %d\n", num_online_nodes()); |
| 236 | printk("Number of memory chunks in system = %d\n", num_memory_chunks); |
| 237 | |
| 238 | for (j = 0; j < num_memory_chunks; j++){ |
| 239 | struct node_memory_chunk_s * chunk = &node_memory_chunk[j]; |
| 240 | printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", |
| 241 | j, chunk->nid, chunk->start_pfn, chunk->end_pfn); |
| 242 | node_read_chunk(chunk->nid, chunk); |
Mel Gorman | 4cfee88 | 2006-09-27 01:49:51 -0700 | [diff] [blame^] | 243 | add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 244 | } |
| 245 | |
| 246 | for_each_online_node(nid) { |
| 247 | unsigned long start = node_start_pfn[nid]; |
| 248 | unsigned long end = node_end_pfn[nid]; |
| 249 | |
| 250 | memory_present(nid, start, end); |
| 251 | node_remap_size[nid] = node_memmap_size_bytes(nid, start, end); |
| 252 | } |
| 253 | return 1; |
| 254 | out_fail: |
| 255 | return 0; |
| 256 | } |
| 257 | |
| 258 | int __init get_memcfg_from_srat(void) |
| 259 | { |
| 260 | struct acpi_table_header *header = NULL; |
| 261 | struct acpi_table_rsdp *rsdp = NULL; |
| 262 | struct acpi_table_rsdt *rsdt = NULL; |
| 263 | struct acpi_pointer *rsdp_address = NULL; |
| 264 | struct acpi_table_rsdt saved_rsdt; |
| 265 | int tables = 0; |
| 266 | int i = 0; |
| 267 | |
Magnus Damm | 5d35704 | 2005-10-30 14:59:48 -0800 | [diff] [blame] | 268 | if (ACPI_FAILURE(acpi_find_root_pointer(ACPI_PHYSICAL_ADDRESSING, |
| 269 | rsdp_address))) { |
| 270 | printk("%s: System description tables not found\n", |
| 271 | __FUNCTION__); |
| 272 | goto out_err; |
| 273 | } |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 274 | |
| 275 | if (rsdp_address->pointer_type == ACPI_PHYSICAL_POINTER) { |
| 276 | printk("%s: assigning address to rsdp\n", __FUNCTION__); |
| 277 | rsdp = (struct acpi_table_rsdp *) |
| 278 | (u32)rsdp_address->pointer.physical; |
| 279 | } else { |
| 280 | printk("%s: rsdp_address is not a physical pointer\n", __FUNCTION__); |
| 281 | goto out_err; |
| 282 | } |
| 283 | if (!rsdp) { |
| 284 | printk("%s: Didn't find ACPI root!\n", __FUNCTION__); |
| 285 | goto out_err; |
| 286 | } |
| 287 | |
| 288 | printk(KERN_INFO "%.8s v%d [%.6s]\n", rsdp->signature, rsdp->revision, |
| 289 | rsdp->oem_id); |
| 290 | |
| 291 | if (strncmp(rsdp->signature, RSDP_SIG,strlen(RSDP_SIG))) { |
| 292 | printk(KERN_WARNING "%s: RSDP table signature incorrect\n", __FUNCTION__); |
| 293 | goto out_err; |
| 294 | } |
| 295 | |
| 296 | rsdt = (struct acpi_table_rsdt *) |
| 297 | boot_ioremap(rsdp->rsdt_address, sizeof(struct acpi_table_rsdt)); |
| 298 | |
| 299 | if (!rsdt) { |
| 300 | printk(KERN_WARNING |
| 301 | "%s: ACPI: Invalid root system description tables (RSDT)\n", |
| 302 | __FUNCTION__); |
| 303 | goto out_err; |
| 304 | } |
| 305 | |
| 306 | header = & rsdt->header; |
| 307 | |
| 308 | if (strncmp(header->signature, RSDT_SIG, strlen(RSDT_SIG))) { |
| 309 | printk(KERN_WARNING "ACPI: RSDT signature incorrect\n"); |
| 310 | goto out_err; |
| 311 | } |
| 312 | |
| 313 | /* |
| 314 | * The number of tables is computed by taking the |
| 315 | * size of all entries (header size minus total |
| 316 | * size of RSDT) divided by the size of each entry |
| 317 | * (4-byte table pointers). |
| 318 | */ |
| 319 | tables = (header->length - sizeof(struct acpi_table_header)) / 4; |
| 320 | |
| 321 | if (!tables) |
| 322 | goto out_err; |
| 323 | |
| 324 | memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt)); |
| 325 | |
| 326 | if (saved_rsdt.header.length > sizeof(saved_rsdt)) { |
| 327 | printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n", |
| 328 | saved_rsdt.header.length); |
| 329 | goto out_err; |
| 330 | } |
| 331 | |
| 332 | printk("Begin SRAT table scan....\n"); |
| 333 | |
| 334 | for (i = 0; i < tables; i++) { |
| 335 | /* Map in header, then map in full table length. */ |
| 336 | header = (struct acpi_table_header *) |
| 337 | boot_ioremap(saved_rsdt.entry[i], sizeof(struct acpi_table_header)); |
| 338 | if (!header) |
| 339 | break; |
| 340 | header = (struct acpi_table_header *) |
| 341 | boot_ioremap(saved_rsdt.entry[i], header->length); |
| 342 | if (!header) |
| 343 | break; |
| 344 | |
| 345 | if (strncmp((char *) &header->signature, "SRAT", 4)) |
| 346 | continue; |
| 347 | |
| 348 | /* we've found the srat table. don't need to look at any more tables */ |
| 349 | return acpi20_parse_srat((struct acpi_table_srat *)header); |
| 350 | } |
| 351 | out_err: |
Mel Gorman | 4cfee88 | 2006-09-27 01:49:51 -0700 | [diff] [blame^] | 352 | remove_all_active_ranges(); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 353 | printk("failed to get NUMA memory information from SRAT table\n"); |
| 354 | return 0; |
| 355 | } |