| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * Copyright (c) 2000, 2003 Silicon Graphics, Inc.  All rights reserved. | 
 | 3 |  * Copyright (c) 2001 Intel Corp. | 
 | 4 |  * Copyright (c) 2001 Tony Luck <tony.luck@intel.com> | 
 | 5 |  * Copyright (c) 2002 NEC Corp. | 
 | 6 |  * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com> | 
 | 7 |  * Copyright (c) 2004 Silicon Graphics, Inc | 
 | 8 |  *	Russ Anderson <rja@sgi.com> | 
 | 9 |  *	Jesse Barnes <jbarnes@sgi.com> | 
 | 10 |  *	Jack Steiner <steiner@sgi.com> | 
 | 11 |  */ | 
 | 12 |  | 
 | 13 | /* | 
 | 14 |  * Platform initialization for Discontig Memory | 
 | 15 |  */ | 
 | 16 |  | 
 | 17 | #include <linux/kernel.h> | 
 | 18 | #include <linux/mm.h> | 
| Prarit Bhargava | 99a19cf | 2007-08-22 14:34:38 -0400 | [diff] [blame] | 19 | #include <linux/nmi.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 20 | #include <linux/swap.h> | 
 | 21 | #include <linux/bootmem.h> | 
 | 22 | #include <linux/acpi.h> | 
 | 23 | #include <linux/efi.h> | 
 | 24 | #include <linux/nodemask.h> | 
| Tejun Heo | 5a0e3ad | 2010-03-24 17:04:11 +0900 | [diff] [blame] | 25 | #include <linux/slab.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 26 | #include <asm/pgalloc.h> | 
 | 27 | #include <asm/tlb.h> | 
 | 28 | #include <asm/meminit.h> | 
 | 29 | #include <asm/numa.h> | 
 | 30 | #include <asm/sections.h> | 
 | 31 |  | 
 | 32 | /* | 
 | 33 |  * Track per-node information needed to setup the boot memory allocator, the | 
 | 34 |  * per-node areas, and the real VM. | 
 | 35 |  */ | 
 | 36 | struct early_node_data { | 
 | 37 | 	struct ia64_node_data *node_data; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 38 | 	unsigned long pernode_addr; | 
 | 39 | 	unsigned long pernode_size; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 40 | 	unsigned long num_physpages; | 
| Christoph Lameter | 09ae1f5 | 2007-02-10 01:43:11 -0800 | [diff] [blame] | 41 | #ifdef CONFIG_ZONE_DMA | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 42 | 	unsigned long num_dma_physpages; | 
| Christoph Lameter | 09ae1f5 | 2007-02-10 01:43:11 -0800 | [diff] [blame] | 43 | #endif | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 44 | 	unsigned long min_pfn; | 
 | 45 | 	unsigned long max_pfn; | 
 | 46 | }; | 
 | 47 |  | 
 | 48 | static struct early_node_data mem_data[MAX_NUMNODES] __initdata; | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 49 | static nodemask_t memory_less_mask __initdata; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 50 |  | 
| Ken'ichi Ohmichi | fd59d23 | 2007-10-16 23:27:27 -0700 | [diff] [blame] | 51 | pg_data_t *pgdat_list[MAX_NUMNODES]; | 
| Yasunori Goto | ae5a2c1 | 2006-06-27 02:53:38 -0700 | [diff] [blame] | 52 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 53 | /* | 
 | 54 |  * To prevent cache aliasing effects, align per-node structures so that they | 
 | 55 |  * start at addresses that are strided by node number. | 
 | 56 |  */ | 
| Jack Steiner | acb7f67 | 2005-12-05 13:56:50 -0600 | [diff] [blame] | 57 | #define MAX_NODE_ALIGN_OFFSET	(32 * 1024 * 1024) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 58 | #define NODEDATA_ALIGN(addr, node)						\ | 
| Jack Steiner | acb7f67 | 2005-12-05 13:56:50 -0600 | [diff] [blame] | 59 | 	((((addr) + 1024*1024-1) & ~(1024*1024-1)) + 				\ | 
 | 60 | 	     (((node)*PERCPU_PAGE_SIZE) & (MAX_NODE_ALIGN_OFFSET - 1))) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 61 |  | 
 | 62 | /** | 
 | 63 |  * build_node_maps - callback to setup bootmem structs for each node | 
 | 64 |  * @start: physical start of range | 
 | 65 |  * @len: length of range | 
 | 66 |  * @node: node where this range resides | 
 | 67 |  * | 
 | 68 |  * We allocate a struct bootmem_data for each piece of memory that we wish to | 
 | 69 |  * treat as a virtually contiguous block (i.e. each node). Each such block | 
 | 70 |  * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down | 
 | 71 |  * if necessary.  Any non-existent pages will simply be part of the virtual | 
 | 72 |  * memmap.  We also update min_low_pfn and max_low_pfn here as we receive | 
 | 73 |  * memory ranges from the caller. | 
 | 74 |  */ | 
 | 75 | static int __init build_node_maps(unsigned long start, unsigned long len, | 
 | 76 | 				  int node) | 
 | 77 | { | 
| Johannes Weiner | 3560e24 | 2008-07-23 21:28:09 -0700 | [diff] [blame] | 78 | 	unsigned long spfn, epfn, end = start + len; | 
| Johannes Weiner | b61bfa3 | 2008-07-23 21:26:55 -0700 | [diff] [blame] | 79 | 	struct bootmem_data *bdp = &bootmem_node_data[node]; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 80 |  | 
 | 81 | 	epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT; | 
| Johannes Weiner | 3560e24 | 2008-07-23 21:28:09 -0700 | [diff] [blame] | 82 | 	spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 83 |  | 
 | 84 | 	if (!bdp->node_low_pfn) { | 
| Johannes Weiner | 3560e24 | 2008-07-23 21:28:09 -0700 | [diff] [blame] | 85 | 		bdp->node_min_pfn = spfn; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 86 | 		bdp->node_low_pfn = epfn; | 
 | 87 | 	} else { | 
| Johannes Weiner | 3560e24 | 2008-07-23 21:28:09 -0700 | [diff] [blame] | 88 | 		bdp->node_min_pfn = min(spfn, bdp->node_min_pfn); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 89 | 		bdp->node_low_pfn = max(epfn, bdp->node_low_pfn); | 
 | 90 | 	} | 
 | 91 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 92 | 	return 0; | 
 | 93 | } | 
 | 94 |  | 
 | 95 | /** | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 96 |  * early_nr_cpus_node - return number of cpus on a given node | 
 | 97 |  * @node: node to check | 
 | 98 |  * | 
 | 99 |  * Count the number of cpus on @node.  We can't use nr_cpus_node() yet because | 
 | 100 |  * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been | 
 | 101 |  * called yet.  Note that node 0 will also count all non-existent cpus. | 
 | 102 |  */ | 
| Yasunori Goto | dd0932d | 2006-06-27 02:53:40 -0700 | [diff] [blame] | 103 | static int __meminit early_nr_cpus_node(int node) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 104 | { | 
 | 105 | 	int cpu, n = 0; | 
 | 106 |  | 
| holt@sgi.com | 2c6e6db | 2008-04-03 15:17:13 -0500 | [diff] [blame] | 107 | 	for_each_possible_early_cpu(cpu) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 108 | 		if (node == node_cpuid[cpu].nid) | 
 | 109 | 			n++; | 
 | 110 |  | 
 | 111 | 	return n; | 
 | 112 | } | 
 | 113 |  | 
 | 114 | /** | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 115 |  * compute_pernodesize - compute size of pernode data | 
 | 116 |  * @node: the node id. | 
 | 117 |  */ | 
| Yasunori Goto | dd0932d | 2006-06-27 02:53:40 -0700 | [diff] [blame] | 118 | static unsigned long __meminit compute_pernodesize(int node) | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 119 | { | 
 | 120 | 	unsigned long pernodesize = 0, cpus; | 
 | 121 |  | 
 | 122 | 	cpus = early_nr_cpus_node(node); | 
 | 123 | 	pernodesize += PERCPU_PAGE_SIZE * cpus; | 
 | 124 | 	pernodesize += node * L1_CACHE_BYTES; | 
 | 125 | 	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); | 
 | 126 | 	pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); | 
| holt@sgi.com | 41bd26d | 2008-04-03 15:17:12 -0500 | [diff] [blame] | 127 | 	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 128 | 	pernodesize = PAGE_ALIGN(pernodesize); | 
 | 129 | 	return pernodesize; | 
 | 130 | } | 
 | 131 |  | 
 | 132 | /** | 
| Tony Luck | 8d7e351 | 2005-07-06 18:18:10 -0700 | [diff] [blame] | 133 |  * per_cpu_node_setup - setup per-cpu areas on each node | 
 | 134 |  * @cpu_data: per-cpu area on this node | 
 | 135 |  * @node: node to setup | 
 | 136 |  * | 
 | 137 |  * Copy the static per-cpu data into the region we just set aside and then | 
 | 138 |  * setup __per_cpu_offset for each CPU on this node.  Return a pointer to | 
 | 139 |  * the end of the area. | 
 | 140 |  */ | 
 | 141 | static void *per_cpu_node_setup(void *cpu_data, int node) | 
 | 142 | { | 
 | 143 | #ifdef CONFIG_SMP | 
 | 144 | 	int cpu; | 
 | 145 |  | 
| holt@sgi.com | 2c6e6db | 2008-04-03 15:17:13 -0500 | [diff] [blame] | 146 | 	for_each_possible_early_cpu(cpu) { | 
| Tejun Heo | 3688647 | 2009-10-02 13:28:56 +0900 | [diff] [blame] | 147 | 		void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start; | 
 | 148 |  | 
 | 149 | 		if (node != node_cpuid[cpu].nid) | 
 | 150 | 			continue; | 
 | 151 |  | 
 | 152 | 		memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start); | 
 | 153 | 		__per_cpu_offset[cpu] = (char *)__va(cpu_data) - | 
 | 154 | 			__per_cpu_start; | 
 | 155 |  | 
 | 156 | 		/* | 
 | 157 | 		 * percpu area for cpu0 is moved from the __init area | 
 | 158 | 		 * which is setup by head.S and used till this point. | 
 | 159 | 		 * Update ar.k3.  This move is ensures that percpu | 
 | 160 | 		 * area for cpu0 is on the correct node and its | 
 | 161 | 		 * virtual address isn't insanely far from other | 
 | 162 | 		 * percpu areas which is important for congruent | 
 | 163 | 		 * percpu allocator. | 
 | 164 | 		 */ | 
 | 165 | 		if (cpu == 0) | 
 | 166 | 			ia64_set_kr(IA64_KR_PER_CPU_DATA, | 
 | 167 | 				    (unsigned long)cpu_data - | 
 | 168 | 				    (unsigned long)__per_cpu_start); | 
 | 169 |  | 
 | 170 | 		cpu_data += PERCPU_PAGE_SIZE; | 
| Tony Luck | 8d7e351 | 2005-07-06 18:18:10 -0700 | [diff] [blame] | 171 | 	} | 
 | 172 | #endif | 
 | 173 | 	return cpu_data; | 
 | 174 | } | 
 | 175 |  | 
| Tejun Heo | 52594762 | 2009-10-02 13:28:56 +0900 | [diff] [blame] | 176 | #ifdef CONFIG_SMP | 
 | 177 | /** | 
 | 178 |  * setup_per_cpu_areas - setup percpu areas | 
 | 179 |  * | 
 | 180 |  * Arch code has already allocated and initialized percpu areas.  All | 
 | 181 |  * this function has to do is to teach the determined layout to the | 
 | 182 |  * dynamic percpu allocator, which happens to be more complex than | 
 | 183 |  * creating whole new ones using helpers. | 
 | 184 |  */ | 
 | 185 | void __init setup_per_cpu_areas(void) | 
 | 186 | { | 
 | 187 | 	struct pcpu_alloc_info *ai; | 
 | 188 | 	struct pcpu_group_info *uninitialized_var(gi); | 
 | 189 | 	unsigned int *cpu_map; | 
 | 190 | 	void *base; | 
 | 191 | 	unsigned long base_offset; | 
 | 192 | 	unsigned int cpu; | 
 | 193 | 	ssize_t static_size, reserved_size, dyn_size; | 
 | 194 | 	int node, prev_node, unit, nr_units, rc; | 
 | 195 |  | 
 | 196 | 	ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids); | 
 | 197 | 	if (!ai) | 
 | 198 | 		panic("failed to allocate pcpu_alloc_info"); | 
 | 199 | 	cpu_map = ai->groups[0].cpu_map; | 
 | 200 |  | 
 | 201 | 	/* determine base */ | 
 | 202 | 	base = (void *)ULONG_MAX; | 
 | 203 | 	for_each_possible_cpu(cpu) | 
 | 204 | 		base = min(base, | 
 | 205 | 			   (void *)(__per_cpu_offset[cpu] + __per_cpu_start)); | 
 | 206 | 	base_offset = (void *)__per_cpu_start - base; | 
 | 207 |  | 
 | 208 | 	/* build cpu_map, units are grouped by node */ | 
 | 209 | 	unit = 0; | 
 | 210 | 	for_each_node(node) | 
 | 211 | 		for_each_possible_cpu(cpu) | 
 | 212 | 			if (node == node_cpuid[cpu].nid) | 
 | 213 | 				cpu_map[unit++] = cpu; | 
 | 214 | 	nr_units = unit; | 
 | 215 |  | 
 | 216 | 	/* set basic parameters */ | 
 | 217 | 	static_size = __per_cpu_end - __per_cpu_start; | 
 | 218 | 	reserved_size = PERCPU_MODULE_RESERVE; | 
 | 219 | 	dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size; | 
 | 220 | 	if (dyn_size < 0) | 
 | 221 | 		panic("percpu area overflow static=%zd reserved=%zd\n", | 
 | 222 | 		      static_size, reserved_size); | 
 | 223 |  | 
 | 224 | 	ai->static_size		= static_size; | 
 | 225 | 	ai->reserved_size	= reserved_size; | 
 | 226 | 	ai->dyn_size		= dyn_size; | 
 | 227 | 	ai->unit_size		= PERCPU_PAGE_SIZE; | 
 | 228 | 	ai->atom_size		= PAGE_SIZE; | 
 | 229 | 	ai->alloc_size		= PERCPU_PAGE_SIZE; | 
 | 230 |  | 
 | 231 | 	/* | 
 | 232 | 	 * CPUs are put into groups according to node.  Walk cpu_map | 
 | 233 | 	 * and create new groups at node boundaries. | 
 | 234 | 	 */ | 
 | 235 | 	prev_node = -1; | 
 | 236 | 	ai->nr_groups = 0; | 
 | 237 | 	for (unit = 0; unit < nr_units; unit++) { | 
 | 238 | 		cpu = cpu_map[unit]; | 
 | 239 | 		node = node_cpuid[cpu].nid; | 
 | 240 |  | 
 | 241 | 		if (node == prev_node) { | 
 | 242 | 			gi->nr_units++; | 
 | 243 | 			continue; | 
 | 244 | 		} | 
 | 245 | 		prev_node = node; | 
 | 246 |  | 
 | 247 | 		gi = &ai->groups[ai->nr_groups++]; | 
 | 248 | 		gi->nr_units		= 1; | 
 | 249 | 		gi->base_offset		= __per_cpu_offset[cpu] + base_offset; | 
 | 250 | 		gi->cpu_map		= &cpu_map[unit]; | 
 | 251 | 	} | 
 | 252 |  | 
 | 253 | 	rc = pcpu_setup_first_chunk(ai, base); | 
 | 254 | 	if (rc) | 
 | 255 | 		panic("failed to setup percpu area (err=%d)", rc); | 
 | 256 |  | 
 | 257 | 	pcpu_free_alloc_info(ai); | 
 | 258 | } | 
 | 259 | #endif | 
 | 260 |  | 
| Tony Luck | 8d7e351 | 2005-07-06 18:18:10 -0700 | [diff] [blame] | 261 | /** | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 262 |  * fill_pernode - initialize pernode data. | 
 | 263 |  * @node: the node id. | 
 | 264 |  * @pernode: physical address of pernode data | 
 | 265 |  * @pernodesize: size of the pernode data | 
 | 266 |  */ | 
 | 267 | static void __init fill_pernode(int node, unsigned long pernode, | 
 | 268 | 	unsigned long pernodesize) | 
 | 269 | { | 
 | 270 | 	void *cpu_data; | 
| Tony Luck | 8d7e351 | 2005-07-06 18:18:10 -0700 | [diff] [blame] | 271 | 	int cpus = early_nr_cpus_node(node); | 
| Johannes Weiner | b61bfa3 | 2008-07-23 21:26:55 -0700 | [diff] [blame] | 272 | 	struct bootmem_data *bdp = &bootmem_node_data[node]; | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 273 |  | 
 | 274 | 	mem_data[node].pernode_addr = pernode; | 
 | 275 | 	mem_data[node].pernode_size = pernodesize; | 
 | 276 | 	memset(__va(pernode), 0, pernodesize); | 
 | 277 |  | 
 | 278 | 	cpu_data = (void *)pernode; | 
 | 279 | 	pernode += PERCPU_PAGE_SIZE * cpus; | 
 | 280 | 	pernode += node * L1_CACHE_BYTES; | 
 | 281 |  | 
| Yasunori Goto | ae5a2c1 | 2006-06-27 02:53:38 -0700 | [diff] [blame] | 282 | 	pgdat_list[node] = __va(pernode); | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 283 | 	pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); | 
 | 284 |  | 
 | 285 | 	mem_data[node].node_data = __va(pernode); | 
 | 286 | 	pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); | 
 | 287 |  | 
| Yasunori Goto | ae5a2c1 | 2006-06-27 02:53:38 -0700 | [diff] [blame] | 288 | 	pgdat_list[node]->bdata = bdp; | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 289 | 	pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); | 
 | 290 |  | 
| Tony Luck | 8d7e351 | 2005-07-06 18:18:10 -0700 | [diff] [blame] | 291 | 	cpu_data = per_cpu_node_setup(cpu_data, node); | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 292 |  | 
 | 293 | 	return; | 
 | 294 | } | 
| Tony Luck | 8d7e351 | 2005-07-06 18:18:10 -0700 | [diff] [blame] | 295 |  | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 296 | /** | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 297 |  * find_pernode_space - allocate memory for memory map and per-node structures | 
 | 298 |  * @start: physical start of range | 
 | 299 |  * @len: length of range | 
 | 300 |  * @node: node where this range resides | 
 | 301 |  * | 
 | 302 |  * This routine reserves space for the per-cpu data struct, the list of | 
 | 303 |  * pg_data_ts and the per-node data struct.  Each node will have something like | 
 | 304 |  * the following in the first chunk of addr. space large enough to hold it. | 
 | 305 |  * | 
 | 306 |  *    ________________________ | 
 | 307 |  *   |                        | | 
 | 308 |  *   |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first | 
 | 309 |  *   |    PERCPU_PAGE_SIZE *  |     start and length big enough | 
 | 310 |  *   |    cpus_on_this_node   | Node 0 will also have entries for all non-existent cpus. | 
 | 311 |  *   |------------------------| | 
 | 312 |  *   |   local pg_data_t *    | | 
 | 313 |  *   |------------------------| | 
 | 314 |  *   |  local ia64_node_data  | | 
 | 315 |  *   |------------------------| | 
 | 316 |  *   |          ???           | | 
 | 317 |  *   |________________________| | 
 | 318 |  * | 
 | 319 |  * Once this space has been set aside, the bootmem maps are initialized.  We | 
 | 320 |  * could probably move the allocation of the per-cpu and ia64_node_data space | 
 | 321 |  * outside of this function and use alloc_bootmem_node(), but doing it here | 
 | 322 |  * is straightforward and we get the alignments we want so... | 
 | 323 |  */ | 
 | 324 | static int __init find_pernode_space(unsigned long start, unsigned long len, | 
 | 325 | 				     int node) | 
 | 326 | { | 
| Johannes Weiner | 3560e24 | 2008-07-23 21:28:09 -0700 | [diff] [blame] | 327 | 	unsigned long spfn, epfn; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 328 | 	unsigned long pernodesize = 0, pernode, pages, mapsize; | 
| Johannes Weiner | b61bfa3 | 2008-07-23 21:26:55 -0700 | [diff] [blame] | 329 | 	struct bootmem_data *bdp = &bootmem_node_data[node]; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 330 |  | 
| Johannes Weiner | 3560e24 | 2008-07-23 21:28:09 -0700 | [diff] [blame] | 331 | 	spfn = start >> PAGE_SHIFT; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 332 | 	epfn = (start + len) >> PAGE_SHIFT; | 
 | 333 |  | 
| Johannes Weiner | 3560e24 | 2008-07-23 21:28:09 -0700 | [diff] [blame] | 334 | 	pages = bdp->node_low_pfn - bdp->node_min_pfn; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 335 | 	mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT; | 
 | 336 |  | 
 | 337 | 	/* | 
 | 338 | 	 * Make sure this memory falls within this node's usable memory | 
 | 339 | 	 * since we may have thrown some away in build_maps(). | 
 | 340 | 	 */ | 
| Johannes Weiner | 3560e24 | 2008-07-23 21:28:09 -0700 | [diff] [blame] | 341 | 	if (spfn < bdp->node_min_pfn || epfn > bdp->node_low_pfn) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 342 | 		return 0; | 
 | 343 |  | 
 | 344 | 	/* Don't setup this node's local space twice... */ | 
 | 345 | 	if (mem_data[node].pernode_addr) | 
 | 346 | 		return 0; | 
 | 347 |  | 
 | 348 | 	/* | 
 | 349 | 	 * Calculate total size needed, incl. what's necessary | 
 | 350 | 	 * for good alignment and alias prevention. | 
 | 351 | 	 */ | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 352 | 	pernodesize = compute_pernodesize(node); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 353 | 	pernode = NODEDATA_ALIGN(start, node); | 
 | 354 |  | 
 | 355 | 	/* Is this range big enough for what we want to store here? */ | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 356 | 	if (start + len > (pernode + pernodesize + mapsize)) | 
 | 357 | 		fill_pernode(node, pernode, pernodesize); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 358 |  | 
 | 359 | 	return 0; | 
 | 360 | } | 
 | 361 |  | 
 | 362 | /** | 
 | 363 |  * free_node_bootmem - free bootmem allocator memory for use | 
 | 364 |  * @start: physical start of range | 
 | 365 |  * @len: length of range | 
 | 366 |  * @node: node where this range resides | 
 | 367 |  * | 
 | 368 |  * Simply calls the bootmem allocator to free the specified ranged from | 
 | 369 |  * the given pg_data_t's bdata struct.  After this function has been called | 
 | 370 |  * for all the entries in the EFI memory map, the bootmem allocator will | 
 | 371 |  * be ready to service allocation requests. | 
 | 372 |  */ | 
 | 373 | static int __init free_node_bootmem(unsigned long start, unsigned long len, | 
 | 374 | 				    int node) | 
 | 375 | { | 
| Yasunori Goto | ae5a2c1 | 2006-06-27 02:53:38 -0700 | [diff] [blame] | 376 | 	free_bootmem_node(pgdat_list[node], start, len); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 377 |  | 
 | 378 | 	return 0; | 
 | 379 | } | 
 | 380 |  | 
 | 381 | /** | 
 | 382 |  * reserve_pernode_space - reserve memory for per-node space | 
 | 383 |  * | 
 | 384 |  * Reserve the space used by the bootmem maps & per-node space in the boot | 
 | 385 |  * allocator so that when we actually create the real mem maps we don't | 
 | 386 |  * use their memory. | 
 | 387 |  */ | 
 | 388 | static void __init reserve_pernode_space(void) | 
 | 389 | { | 
 | 390 | 	unsigned long base, size, pages; | 
 | 391 | 	struct bootmem_data *bdp; | 
 | 392 | 	int node; | 
 | 393 |  | 
 | 394 | 	for_each_online_node(node) { | 
| Yasunori Goto | ae5a2c1 | 2006-06-27 02:53:38 -0700 | [diff] [blame] | 395 | 		pg_data_t *pdp = pgdat_list[node]; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 396 |  | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 397 | 		if (node_isset(node, memory_less_mask)) | 
 | 398 | 			continue; | 
 | 399 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 400 | 		bdp = pdp->bdata; | 
 | 401 |  | 
 | 402 | 		/* First the bootmem_map itself */ | 
| Johannes Weiner | 3560e24 | 2008-07-23 21:28:09 -0700 | [diff] [blame] | 403 | 		pages = bdp->node_low_pfn - bdp->node_min_pfn; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 404 | 		size = bootmem_bootmap_pages(pages) << PAGE_SHIFT; | 
 | 405 | 		base = __pa(bdp->node_bootmem_map); | 
| Bernhard Walle | 72a7fe3 | 2008-02-07 00:15:17 -0800 | [diff] [blame] | 406 | 		reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 407 |  | 
 | 408 | 		/* Now the per-node space */ | 
 | 409 | 		size = mem_data[node].pernode_size; | 
 | 410 | 		base = __pa(mem_data[node].pernode_addr); | 
| Bernhard Walle | 72a7fe3 | 2008-02-07 00:15:17 -0800 | [diff] [blame] | 411 | 		reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 412 | 	} | 
 | 413 | } | 
 | 414 |  | 
| Yasunori Goto | 7049027 | 2006-06-27 02:53:39 -0700 | [diff] [blame] | 415 | static void __meminit scatter_node_data(void) | 
 | 416 | { | 
 | 417 | 	pg_data_t **dst; | 
 | 418 | 	int node; | 
 | 419 |  | 
| Yasunori Goto | dd8041f | 2006-07-04 02:57:51 -0700 | [diff] [blame] | 420 | 	/* | 
 | 421 | 	 * for_each_online_node() can't be used at here. | 
 | 422 | 	 * node_online_map is not set for hot-added nodes at this time, | 
 | 423 | 	 * because we are halfway through initialization of the new node's | 
 | 424 | 	 * structures.  If for_each_online_node() is used, a new node's | 
| Simon Arlott | 72fdbdc | 2007-05-11 14:55:43 -0700 | [diff] [blame] | 425 | 	 * pg_data_ptrs will be not initialized. Instead of using it, | 
| Yasunori Goto | dd8041f | 2006-07-04 02:57:51 -0700 | [diff] [blame] | 426 | 	 * pgdat_list[] is checked. | 
 | 427 | 	 */ | 
 | 428 | 	for_each_node(node) { | 
 | 429 | 		if (pgdat_list[node]) { | 
 | 430 | 			dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs; | 
 | 431 | 			memcpy(dst, pgdat_list, sizeof(pgdat_list)); | 
 | 432 | 		} | 
| Yasunori Goto | 7049027 | 2006-06-27 02:53:39 -0700 | [diff] [blame] | 433 | 	} | 
 | 434 | } | 
 | 435 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 436 | /** | 
 | 437 |  * initialize_pernode_data - fixup per-cpu & per-node pointers | 
 | 438 |  * | 
 | 439 |  * Each node's per-node area has a copy of the global pg_data_t list, so | 
 | 440 |  * we copy that to each node here, as well as setting the per-cpu pointer | 
 | 441 |  * to the local node data structure.  The active_cpus field of the per-node | 
 | 442 |  * structure gets setup by the platform_cpu_init() function later. | 
 | 443 |  */ | 
 | 444 | static void __init initialize_pernode_data(void) | 
 | 445 | { | 
| Tony Luck | 8d7e351 | 2005-07-06 18:18:10 -0700 | [diff] [blame] | 446 | 	int cpu, node; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 447 |  | 
| Yasunori Goto | 7049027 | 2006-06-27 02:53:39 -0700 | [diff] [blame] | 448 | 	scatter_node_data(); | 
 | 449 |  | 
| Tony Luck | 8d7e351 | 2005-07-06 18:18:10 -0700 | [diff] [blame] | 450 | #ifdef CONFIG_SMP | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 451 | 	/* Set the node_data pointer for each per-cpu struct */ | 
| holt@sgi.com | 2c6e6db | 2008-04-03 15:17:13 -0500 | [diff] [blame] | 452 | 	for_each_possible_early_cpu(cpu) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 453 | 		node = node_cpuid[cpu].nid; | 
| Tejun Heo | 877105c | 2009-10-29 22:34:14 +0900 | [diff] [blame] | 454 | 		per_cpu(ia64_cpu_info, cpu).node_data = | 
 | 455 | 			mem_data[node].node_data; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 456 | 	} | 
| Tony Luck | 8d7e351 | 2005-07-06 18:18:10 -0700 | [diff] [blame] | 457 | #else | 
 | 458 | 	{ | 
 | 459 | 		struct cpuinfo_ia64 *cpu0_cpu_info; | 
 | 460 | 		cpu = 0; | 
 | 461 | 		node = node_cpuid[cpu].nid; | 
 | 462 | 		cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start + | 
| Rusty Russell | dd17c8f | 2009-10-29 22:34:15 +0900 | [diff] [blame] | 463 | 			((char *)&ia64_cpu_info - __per_cpu_start)); | 
| Tony Luck | 8d7e351 | 2005-07-06 18:18:10 -0700 | [diff] [blame] | 464 | 		cpu0_cpu_info->node_data = mem_data[node].node_data; | 
 | 465 | 	} | 
 | 466 | #endif /* CONFIG_SMP */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 467 | } | 
 | 468 |  | 
 | 469 | /** | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 470 |  * memory_less_node_alloc - * attempt to allocate memory on the best NUMA slit | 
 | 471 |  * 	node but fall back to any other node when __alloc_bootmem_node fails | 
 | 472 |  *	for best. | 
 | 473 |  * @nid: node id | 
 | 474 |  * @pernodesize: size of this node's pernode data | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 475 |  */ | 
| Bob Picco | 9783524 | 2005-10-29 17:23:05 -0400 | [diff] [blame] | 476 | static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize) | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 477 | { | 
 | 478 | 	void *ptr = NULL; | 
 | 479 | 	u8 best = 0xff; | 
| Bob Picco | 9783524 | 2005-10-29 17:23:05 -0400 | [diff] [blame] | 480 | 	int bestnode = -1, node, anynode = 0; | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 481 |  | 
 | 482 | 	for_each_online_node(node) { | 
 | 483 | 		if (node_isset(node, memory_less_mask)) | 
 | 484 | 			continue; | 
 | 485 | 		else if (node_distance(nid, node) < best) { | 
 | 486 | 			best = node_distance(nid, node); | 
 | 487 | 			bestnode = node; | 
 | 488 | 		} | 
| Bob Picco | 9783524 | 2005-10-29 17:23:05 -0400 | [diff] [blame] | 489 | 		anynode = node; | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 490 | 	} | 
 | 491 |  | 
| Bob Picco | 9783524 | 2005-10-29 17:23:05 -0400 | [diff] [blame] | 492 | 	if (bestnode == -1) | 
 | 493 | 		bestnode = anynode; | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 494 |  | 
| Yasunori Goto | ae5a2c1 | 2006-06-27 02:53:38 -0700 | [diff] [blame] | 495 | 	ptr = __alloc_bootmem_node(pgdat_list[bestnode], pernodesize, | 
| Bob Picco | 9783524 | 2005-10-29 17:23:05 -0400 | [diff] [blame] | 496 | 		PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); | 
 | 497 |  | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 498 | 	return ptr; | 
 | 499 | } | 
 | 500 |  | 
 | 501 | /** | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 502 |  * memory_less_nodes - allocate and initialize CPU only nodes pernode | 
 | 503 |  *	information. | 
 | 504 |  */ | 
 | 505 | static void __init memory_less_nodes(void) | 
 | 506 | { | 
 | 507 | 	unsigned long pernodesize; | 
 | 508 | 	void *pernode; | 
 | 509 | 	int node; | 
 | 510 |  | 
 | 511 | 	for_each_node_mask(node, memory_less_mask) { | 
 | 512 | 		pernodesize = compute_pernodesize(node); | 
| Bob Picco | 9783524 | 2005-10-29 17:23:05 -0400 | [diff] [blame] | 513 | 		pernode = memory_less_node_alloc(node, pernodesize); | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 514 | 		fill_pernode(node, __pa(pernode), pernodesize); | 
 | 515 | 	} | 
 | 516 |  | 
 | 517 | 	return; | 
 | 518 | } | 
 | 519 |  | 
 | 520 | /** | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 521 |  * find_memory - walk the EFI memory map and setup the bootmem allocator | 
 | 522 |  * | 
 | 523 |  * Called early in boot to setup the bootmem allocator, and to | 
 | 524 |  * allocate the per-cpu and per-node structures. | 
 | 525 |  */ | 
 | 526 | void __init find_memory(void) | 
 | 527 | { | 
 | 528 | 	int node; | 
 | 529 |  | 
 | 530 | 	reserve_memory(); | 
 | 531 |  | 
 | 532 | 	if (num_online_nodes() == 0) { | 
 | 533 | 		printk(KERN_ERR "node info missing!\n"); | 
 | 534 | 		node_set_online(0); | 
 | 535 | 	} | 
 | 536 |  | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 537 | 	nodes_or(memory_less_mask, memory_less_mask, node_online_map); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 538 | 	min_low_pfn = -1; | 
 | 539 | 	max_low_pfn = 0; | 
 | 540 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 541 | 	/* These actually end up getting called by call_pernode_memory() */ | 
 | 542 | 	efi_memmap_walk(filter_rsvd_memory, build_node_maps); | 
 | 543 | 	efi_memmap_walk(filter_rsvd_memory, find_pernode_space); | 
| Zou Nan hai | a3f5c33 | 2007-03-20 13:41:57 -0700 | [diff] [blame] | 544 | 	efi_memmap_walk(find_max_min_low_pfn, NULL); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 545 |  | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 546 | 	for_each_online_node(node) | 
| Johannes Weiner | b61bfa3 | 2008-07-23 21:26:55 -0700 | [diff] [blame] | 547 | 		if (bootmem_node_data[node].node_low_pfn) { | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 548 | 			node_clear(node, memory_less_mask); | 
 | 549 | 			mem_data[node].min_pfn = ~0UL; | 
 | 550 | 		} | 
| Bob Picco | 139b830 | 2007-01-30 02:11:09 -0800 | [diff] [blame] | 551 |  | 
| Zoltan Menyhart | 98075d2 | 2008-04-11 15:21:35 -0700 | [diff] [blame] | 552 | 	efi_memmap_walk(filter_memory, register_active_ranges); | 
| Bob Picco | 139b830 | 2007-01-30 02:11:09 -0800 | [diff] [blame] | 553 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 554 | 	/* | 
 | 555 | 	 * Initialize the boot memory maps in reverse order since that's | 
 | 556 | 	 * what the bootmem allocator expects | 
 | 557 | 	 */ | 
 | 558 | 	for (node = MAX_NUMNODES - 1; node >= 0; node--) { | 
 | 559 | 		unsigned long pernode, pernodesize, map; | 
 | 560 | 		struct bootmem_data *bdp; | 
 | 561 |  | 
 | 562 | 		if (!node_online(node)) | 
 | 563 | 			continue; | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 564 | 		else if (node_isset(node, memory_less_mask)) | 
 | 565 | 			continue; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 566 |  | 
| Johannes Weiner | b61bfa3 | 2008-07-23 21:26:55 -0700 | [diff] [blame] | 567 | 		bdp = &bootmem_node_data[node]; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 568 | 		pernode = mem_data[node].pernode_addr; | 
 | 569 | 		pernodesize = mem_data[node].pernode_size; | 
 | 570 | 		map = pernode + pernodesize; | 
 | 571 |  | 
| Yasunori Goto | ae5a2c1 | 2006-06-27 02:53:38 -0700 | [diff] [blame] | 572 | 		init_bootmem_node(pgdat_list[node], | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 573 | 				  map>>PAGE_SHIFT, | 
| Johannes Weiner | 3560e24 | 2008-07-23 21:28:09 -0700 | [diff] [blame] | 574 | 				  bdp->node_min_pfn, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 575 | 				  bdp->node_low_pfn); | 
 | 576 | 	} | 
 | 577 |  | 
 | 578 | 	efi_memmap_walk(filter_rsvd_memory, free_node_bootmem); | 
 | 579 |  | 
 | 580 | 	reserve_pernode_space(); | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 581 | 	memory_less_nodes(); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 582 | 	initialize_pernode_data(); | 
 | 583 |  | 
 | 584 | 	max_pfn = max_low_pfn; | 
 | 585 |  | 
 | 586 | 	find_initrd(); | 
 | 587 | } | 
 | 588 |  | 
| Tony Luck | 8d7e351 | 2005-07-06 18:18:10 -0700 | [diff] [blame] | 589 | #ifdef CONFIG_SMP | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 590 | /** | 
 | 591 |  * per_cpu_init - setup per-cpu variables | 
 | 592 |  * | 
 | 593 |  * find_pernode_space() does most of this already, we just need to set | 
 | 594 |  * local_per_cpu_offset | 
 | 595 |  */ | 
| Chen, Kenneth W | 244fd54 | 2006-03-12 09:00:13 -0800 | [diff] [blame] | 596 | void __cpuinit *per_cpu_init(void) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 597 | { | 
 | 598 | 	int cpu; | 
| Ashok Raj | ff74190 | 2005-11-11 14:32:40 -0800 | [diff] [blame] | 599 | 	static int first_time = 1; | 
 | 600 |  | 
| Ashok Raj | ff74190 | 2005-11-11 14:32:40 -0800 | [diff] [blame] | 601 | 	if (first_time) { | 
 | 602 | 		first_time = 0; | 
| holt@sgi.com | 2c6e6db | 2008-04-03 15:17:13 -0500 | [diff] [blame] | 603 | 		for_each_possible_early_cpu(cpu) | 
| Ashok Raj | ff74190 | 2005-11-11 14:32:40 -0800 | [diff] [blame] | 604 | 			per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; | 
 | 605 | 	} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 606 |  | 
 | 607 | 	return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; | 
 | 608 | } | 
| Tony Luck | 8d7e351 | 2005-07-06 18:18:10 -0700 | [diff] [blame] | 609 | #endif /* CONFIG_SMP */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 610 |  | 
 | 611 | /** | 
 | 612 |  * show_mem - give short summary of memory stats | 
 | 613 |  * | 
 | 614 |  * Shows a simple page count of reserved and used pages in the system. | 
 | 615 |  * For discontig machines, it does this on a per-pgdat basis. | 
 | 616 |  */ | 
| David Rientjes | b2b755b | 2011-03-24 15:18:15 -0700 | [diff] [blame] | 617 | void show_mem(unsigned int filter) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 618 | { | 
 | 619 | 	int i, total_reserved = 0; | 
 | 620 | 	int total_shared = 0, total_cached = 0; | 
 | 621 | 	unsigned long total_present = 0; | 
 | 622 | 	pg_data_t *pgdat; | 
 | 623 |  | 
| Jes Sorensen | 709a6c1 | 2006-09-13 08:43:42 -0400 | [diff] [blame] | 624 | 	printk(KERN_INFO "Mem-info:\n"); | 
| David Rientjes | 7bf02ea | 2011-05-24 17:11:16 -0700 | [diff] [blame] | 625 | 	show_free_areas(filter); | 
| Jes Sorensen | 816add4 | 2006-09-14 07:10:30 -0400 | [diff] [blame] | 626 | 	printk(KERN_INFO "Node memory in pages:\n"); | 
| KAMEZAWA Hiroyuki | ec936fc | 2006-03-27 01:15:59 -0800 | [diff] [blame] | 627 | 	for_each_online_pgdat(pgdat) { | 
| Dave Hansen | 208d54e | 2005-10-29 18:16:52 -0700 | [diff] [blame] | 628 | 		unsigned long present; | 
 | 629 | 		unsigned long flags; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 630 | 		int shared = 0, cached = 0, reserved = 0; | 
| David Rientjes | 7bf02ea | 2011-05-24 17:11:16 -0700 | [diff] [blame] | 631 | 		int nid = pgdat->node_id; | 
| Dave Hansen | 208d54e | 2005-10-29 18:16:52 -0700 | [diff] [blame] | 632 |  | 
| David Rientjes | 7bf02ea | 2011-05-24 17:11:16 -0700 | [diff] [blame] | 633 | 		if (skip_free_areas_node(filter, nid)) | 
 | 634 | 			continue; | 
| Dave Hansen | 208d54e | 2005-10-29 18:16:52 -0700 | [diff] [blame] | 635 | 		pgdat_resize_lock(pgdat, &flags); | 
 | 636 | 		present = pgdat->node_present_pages; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 637 | 		for(i = 0; i < pgdat->node_spanned_pages; i++) { | 
| Bob Picco | 2d4b1fa | 2005-10-04 15:13:57 -0400 | [diff] [blame] | 638 | 			struct page *page; | 
| Prarit Bhargava | 99a19cf | 2007-08-22 14:34:38 -0400 | [diff] [blame] | 639 | 			if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) | 
 | 640 | 				touch_nmi_watchdog(); | 
| Bob Picco | 2d4b1fa | 2005-10-04 15:13:57 -0400 | [diff] [blame] | 641 | 			if (pfn_valid(pgdat->node_start_pfn + i)) | 
 | 642 | 				page = pfn_to_page(pgdat->node_start_pfn + i); | 
| Robin Holt | ace1d81 | 2006-04-13 15:34:45 -0700 | [diff] [blame] | 643 | 			else { | 
| David Rientjes | 7bf02ea | 2011-05-24 17:11:16 -0700 | [diff] [blame] | 644 | 				i = vmemmap_find_next_valid_pfn(nid, i) - 1; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 645 | 				continue; | 
| Robin Holt | ace1d81 | 2006-04-13 15:34:45 -0700 | [diff] [blame] | 646 | 			} | 
| Dave Hansen | 408fde8 | 2005-06-23 00:07:37 -0700 | [diff] [blame] | 647 | 			if (PageReserved(page)) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 648 | 				reserved++; | 
| Dave Hansen | 408fde8 | 2005-06-23 00:07:37 -0700 | [diff] [blame] | 649 | 			else if (PageSwapCache(page)) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 650 | 				cached++; | 
| Dave Hansen | 408fde8 | 2005-06-23 00:07:37 -0700 | [diff] [blame] | 651 | 			else if (page_count(page)) | 
 | 652 | 				shared += page_count(page)-1; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 653 | 		} | 
| Dave Hansen | 208d54e | 2005-10-29 18:16:52 -0700 | [diff] [blame] | 654 | 		pgdat_resize_unlock(pgdat, &flags); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 655 | 		total_present += present; | 
 | 656 | 		total_reserved += reserved; | 
 | 657 | 		total_cached += cached; | 
 | 658 | 		total_shared += shared; | 
| Jes Sorensen | 816add4 | 2006-09-14 07:10:30 -0400 | [diff] [blame] | 659 | 		printk(KERN_INFO "Node %4d:  RAM: %11ld, rsvd: %8d, " | 
| David Rientjes | 7bf02ea | 2011-05-24 17:11:16 -0700 | [diff] [blame] | 660 | 		       "shrd: %10d, swpd: %10d\n", nid, | 
| Jes Sorensen | 816add4 | 2006-09-14 07:10:30 -0400 | [diff] [blame] | 661 | 		       present, reserved, shared, cached); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 662 | 	} | 
| Jes Sorensen | 709a6c1 | 2006-09-13 08:43:42 -0400 | [diff] [blame] | 663 | 	printk(KERN_INFO "%ld pages of RAM\n", total_present); | 
 | 664 | 	printk(KERN_INFO "%d reserved pages\n", total_reserved); | 
 | 665 | 	printk(KERN_INFO "%d pages shared\n", total_shared); | 
 | 666 | 	printk(KERN_INFO "%d pages swap cached\n", total_cached); | 
 | 667 | 	printk(KERN_INFO "Total of %ld pages in page table cache\n", | 
| Christoph Lameter | 2bd62a4 | 2007-05-10 22:42:53 -0700 | [diff] [blame] | 668 | 	       quicklist_total_size()); | 
| Jes Sorensen | 709a6c1 | 2006-09-13 08:43:42 -0400 | [diff] [blame] | 669 | 	printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages()); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 670 | } | 
 | 671 |  | 
 | 672 | /** | 
 | 673 |  * call_pernode_memory - use SRAT to call callback functions with node info | 
 | 674 |  * @start: physical start of range | 
 | 675 |  * @len: length of range | 
 | 676 |  * @arg: function to call for each range | 
 | 677 |  * | 
 | 678 |  * efi_memmap_walk() knows nothing about layout of memory across nodes. Find | 
 | 679 |  * out to which node a block of memory belongs.  Ignore memory that we cannot | 
 | 680 |  * identify, and split blocks that run across multiple nodes. | 
 | 681 |  * | 
 | 682 |  * Take this opportunity to round the start address up and the end address | 
 | 683 |  * down to page boundaries. | 
 | 684 |  */ | 
 | 685 | void call_pernode_memory(unsigned long start, unsigned long len, void *arg) | 
 | 686 | { | 
 | 687 | 	unsigned long rs, re, end = start + len; | 
 | 688 | 	void (*func)(unsigned long, unsigned long, int); | 
 | 689 | 	int i; | 
 | 690 |  | 
 | 691 | 	start = PAGE_ALIGN(start); | 
 | 692 | 	end &= PAGE_MASK; | 
 | 693 | 	if (start >= end) | 
 | 694 | 		return; | 
 | 695 |  | 
 | 696 | 	func = arg; | 
 | 697 |  | 
 | 698 | 	if (!num_node_memblks) { | 
 | 699 | 		/* No SRAT table, so assume one node (node 0) */ | 
 | 700 | 		if (start < end) | 
 | 701 | 			(*func)(start, end - start, 0); | 
 | 702 | 		return; | 
 | 703 | 	} | 
 | 704 |  | 
 | 705 | 	for (i = 0; i < num_node_memblks; i++) { | 
 | 706 | 		rs = max(start, node_memblk[i].start_paddr); | 
 | 707 | 		re = min(end, node_memblk[i].start_paddr + | 
 | 708 | 			 node_memblk[i].size); | 
 | 709 |  | 
 | 710 | 		if (rs < re) | 
 | 711 | 			(*func)(rs, re - rs, node_memblk[i].nid); | 
 | 712 |  | 
 | 713 | 		if (re == end) | 
 | 714 | 			break; | 
 | 715 | 	} | 
 | 716 | } | 
 | 717 |  | 
 | 718 | /** | 
 | 719 |  * count_node_pages - callback to build per-node memory info structures | 
 | 720 |  * @start: physical start of range | 
 | 721 |  * @len: length of range | 
 | 722 |  * @node: node where this range resides | 
 | 723 |  * | 
 | 724 |  * Each node has it's own number of physical pages, DMAable pages, start, and | 
 | 725 |  * end page frame number.  This routine will be called by call_pernode_memory() | 
 | 726 |  * for each piece of usable memory and will setup these values for each node. | 
 | 727 |  * Very similar to build_maps(). | 
 | 728 |  */ | 
 | 729 | static __init int count_node_pages(unsigned long start, unsigned long len, int node) | 
 | 730 | { | 
 | 731 | 	unsigned long end = start + len; | 
 | 732 |  | 
 | 733 | 	mem_data[node].num_physpages += len >> PAGE_SHIFT; | 
| Christoph Lameter | 09ae1f5 | 2007-02-10 01:43:11 -0800 | [diff] [blame] | 734 | #ifdef CONFIG_ZONE_DMA | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 735 | 	if (start <= __pa(MAX_DMA_ADDRESS)) | 
 | 736 | 		mem_data[node].num_dma_physpages += | 
 | 737 | 			(min(end, __pa(MAX_DMA_ADDRESS)) - start) >>PAGE_SHIFT; | 
| Christoph Lameter | 09ae1f5 | 2007-02-10 01:43:11 -0800 | [diff] [blame] | 738 | #endif | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 739 | 	start = GRANULEROUNDDOWN(start); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 740 | 	end = GRANULEROUNDUP(end); | 
 | 741 | 	mem_data[node].max_pfn = max(mem_data[node].max_pfn, | 
 | 742 | 				     end >> PAGE_SHIFT); | 
 | 743 | 	mem_data[node].min_pfn = min(mem_data[node].min_pfn, | 
 | 744 | 				     start >> PAGE_SHIFT); | 
 | 745 |  | 
 | 746 | 	return 0; | 
 | 747 | } | 
 | 748 |  | 
 | 749 | /** | 
 | 750 |  * paging_init - setup page tables | 
 | 751 |  * | 
 | 752 |  * paging_init() sets up the page tables for each node of the system and frees | 
 | 753 |  * the bootmem allocator memory for general use. | 
 | 754 |  */ | 
 | 755 | void __init paging_init(void) | 
 | 756 | { | 
 | 757 | 	unsigned long max_dma; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 758 | 	unsigned long pfn_offset = 0; | 
| Mel Gorman | 05e0caa | 2006-09-27 01:49:54 -0700 | [diff] [blame] | 759 | 	unsigned long max_pfn = 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 760 | 	int node; | 
| Mel Gorman | 05e0caa | 2006-09-27 01:49:54 -0700 | [diff] [blame] | 761 | 	unsigned long max_zone_pfns[MAX_NR_ZONES]; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 762 |  | 
 | 763 | 	max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; | 
 | 764 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 765 | 	efi_memmap_walk(filter_rsvd_memory, count_node_pages); | 
 | 766 |  | 
| Bob Picco | 524fd98 | 2007-02-05 16:20:08 -0800 | [diff] [blame] | 767 | 	sparse_memory_present_with_active_regions(MAX_NUMNODES); | 
 | 768 | 	sparse_init(); | 
 | 769 |  | 
| Bob Picco | 2d4b1fa | 2005-10-04 15:13:57 -0400 | [diff] [blame] | 770 | #ifdef CONFIG_VIRTUAL_MEM_MAP | 
| Tejun Heo | 126b3fc | 2009-10-02 13:28:55 +0900 | [diff] [blame] | 771 | 	VMALLOC_END -= PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) * | 
| Bob Picco | 921eea1 | 2006-06-28 12:54:55 -0400 | [diff] [blame] | 772 | 		sizeof(struct page)); | 
| Tejun Heo | 126b3fc | 2009-10-02 13:28:55 +0900 | [diff] [blame] | 773 | 	vmem_map = (struct page *) VMALLOC_END; | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 774 | 	efi_memmap_walk(create_mem_map_page_table, NULL); | 
 | 775 | 	printk("Virtual mem_map starts at 0x%p\n", vmem_map); | 
| Bob Picco | 2d4b1fa | 2005-10-04 15:13:57 -0400 | [diff] [blame] | 776 | #endif | 
| bob.picco | 564601a | 2005-06-30 09:52:00 -0700 | [diff] [blame] | 777 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 778 | 	for_each_online_node(node) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 779 | 		num_physpages += mem_data[node].num_physpages; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 780 | 		pfn_offset = mem_data[node].min_pfn; | 
 | 781 |  | 
| Bob Picco | 2d4b1fa | 2005-10-04 15:13:57 -0400 | [diff] [blame] | 782 | #ifdef CONFIG_VIRTUAL_MEM_MAP | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 783 | 		NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset; | 
| Bob Picco | 2d4b1fa | 2005-10-04 15:13:57 -0400 | [diff] [blame] | 784 | #endif | 
| Mel Gorman | 05e0caa | 2006-09-27 01:49:54 -0700 | [diff] [blame] | 785 | 		if (mem_data[node].max_pfn > max_pfn) | 
 | 786 | 			max_pfn = mem_data[node].max_pfn; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 787 | 	} | 
 | 788 |  | 
| Mel Gorman | 6391af1 | 2006-10-11 01:20:39 -0700 | [diff] [blame] | 789 | 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | 
| Christoph Lameter | 09ae1f5 | 2007-02-10 01:43:11 -0800 | [diff] [blame] | 790 | #ifdef CONFIG_ZONE_DMA | 
| Mel Gorman | 05e0caa | 2006-09-27 01:49:54 -0700 | [diff] [blame] | 791 | 	max_zone_pfns[ZONE_DMA] = max_dma; | 
| Christoph Lameter | 09ae1f5 | 2007-02-10 01:43:11 -0800 | [diff] [blame] | 792 | #endif | 
| Mel Gorman | 05e0caa | 2006-09-27 01:49:54 -0700 | [diff] [blame] | 793 | 	max_zone_pfns[ZONE_NORMAL] = max_pfn; | 
 | 794 | 	free_area_init_nodes(max_zone_pfns); | 
 | 795 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 796 | 	zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); | 
 | 797 | } | 
| Yasunori Goto | 7049027 | 2006-06-27 02:53:39 -0700 | [diff] [blame] | 798 |  | 
| Yasunori Goto | a3142c8 | 2007-05-08 00:23:07 -0700 | [diff] [blame] | 799 | #ifdef CONFIG_MEMORY_HOTPLUG | 
| Yasunori Goto | dd0932d | 2006-06-27 02:53:40 -0700 | [diff] [blame] | 800 | pg_data_t *arch_alloc_nodedata(int nid) | 
 | 801 | { | 
 | 802 | 	unsigned long size = compute_pernodesize(nid); | 
 | 803 |  | 
 | 804 | 	return kzalloc(size, GFP_KERNEL); | 
 | 805 | } | 
 | 806 |  | 
 | 807 | void arch_free_nodedata(pg_data_t *pgdat) | 
 | 808 | { | 
 | 809 | 	kfree(pgdat); | 
 | 810 | } | 
 | 811 |  | 
| Yasunori Goto | 7049027 | 2006-06-27 02:53:39 -0700 | [diff] [blame] | 812 | void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) | 
 | 813 | { | 
 | 814 | 	pgdat_list[update_node] = update_pgdat; | 
 | 815 | 	scatter_node_data(); | 
 | 816 | } | 
| Yasunori Goto | a3142c8 | 2007-05-08 00:23:07 -0700 | [diff] [blame] | 817 | #endif | 
| Christoph Lameter | ef229c5 | 2007-10-16 01:24:15 -0700 | [diff] [blame] | 818 |  | 
 | 819 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | 
 | 820 | int __meminit vmemmap_populate(struct page *start_page, | 
 | 821 | 						unsigned long size, int node) | 
 | 822 | { | 
 | 823 | 	return vmemmap_populate_basepages(start_page, size, node); | 
 | 824 | } | 
 | 825 | #endif |