| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  *  linux/mm/vmalloc.c | 
 | 3 |  * | 
 | 4 |  *  Copyright (C) 1993  Linus Torvalds | 
 | 5 |  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 | 
 | 6 |  *  SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000 | 
 | 7 |  *  Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002 | 
| Christoph Lameter | 930fc45 | 2005-10-29 18:15:41 -0700 | [diff] [blame] | 8 |  *  Numa awareness, Christoph Lameter, SGI, June 2005 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 9 |  */ | 
 | 10 |  | 
 | 11 | #include <linux/mm.h> | 
 | 12 | #include <linux/module.h> | 
 | 13 | #include <linux/highmem.h> | 
 | 14 | #include <linux/slab.h> | 
 | 15 | #include <linux/spinlock.h> | 
 | 16 | #include <linux/interrupt.h> | 
| Christoph Lameter | a10aa57 | 2008-04-28 02:12:40 -0700 | [diff] [blame] | 17 | #include <linux/seq_file.h> | 
| Thomas Gleixner | 3ac7fe5 | 2008-04-30 00:55:01 -0700 | [diff] [blame] | 18 | #include <linux/debugobjects.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 19 | #include <linux/vmalloc.h> | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 20 | #include <linux/kallsyms.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 21 |  | 
 | 22 | #include <asm/uaccess.h> | 
 | 23 | #include <asm/tlbflush.h> | 
 | 24 |  | 
 | 25 |  | 
 | 26 | DEFINE_RWLOCK(vmlist_lock); | 
 | 27 | struct vm_struct *vmlist; | 
 | 28 |  | 
| Adrian Bunk | b221385 | 2006-09-25 23:31:02 -0700 | [diff] [blame] | 29 | static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 30 | 			    int node, void *caller); | 
| Adrian Bunk | b221385 | 2006-09-25 23:31:02 -0700 | [diff] [blame] | 31 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 32 | static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) | 
 | 33 | { | 
 | 34 | 	pte_t *pte; | 
 | 35 |  | 
 | 36 | 	pte = pte_offset_kernel(pmd, addr); | 
 | 37 | 	do { | 
 | 38 | 		pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte); | 
 | 39 | 		WARN_ON(!pte_none(ptent) && !pte_present(ptent)); | 
 | 40 | 	} while (pte++, addr += PAGE_SIZE, addr != end); | 
 | 41 | } | 
 | 42 |  | 
 | 43 | static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr, | 
 | 44 | 						unsigned long end) | 
 | 45 | { | 
 | 46 | 	pmd_t *pmd; | 
 | 47 | 	unsigned long next; | 
 | 48 |  | 
 | 49 | 	pmd = pmd_offset(pud, addr); | 
 | 50 | 	do { | 
 | 51 | 		next = pmd_addr_end(addr, end); | 
 | 52 | 		if (pmd_none_or_clear_bad(pmd)) | 
 | 53 | 			continue; | 
 | 54 | 		vunmap_pte_range(pmd, addr, next); | 
 | 55 | 	} while (pmd++, addr = next, addr != end); | 
 | 56 | } | 
 | 57 |  | 
 | 58 | static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr, | 
 | 59 | 						unsigned long end) | 
 | 60 | { | 
 | 61 | 	pud_t *pud; | 
 | 62 | 	unsigned long next; | 
 | 63 |  | 
 | 64 | 	pud = pud_offset(pgd, addr); | 
 | 65 | 	do { | 
 | 66 | 		next = pud_addr_end(addr, end); | 
 | 67 | 		if (pud_none_or_clear_bad(pud)) | 
 | 68 | 			continue; | 
 | 69 | 		vunmap_pmd_range(pud, addr, next); | 
 | 70 | 	} while (pud++, addr = next, addr != end); | 
 | 71 | } | 
 | 72 |  | 
| Benjamin Herrenschmidt | c19c03f | 2007-06-04 15:15:35 +1000 | [diff] [blame] | 73 | void unmap_kernel_range(unsigned long addr, unsigned long size) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 74 | { | 
 | 75 | 	pgd_t *pgd; | 
 | 76 | 	unsigned long next; | 
| Benjamin Herrenschmidt | c19c03f | 2007-06-04 15:15:35 +1000 | [diff] [blame] | 77 | 	unsigned long start = addr; | 
 | 78 | 	unsigned long end = addr + size; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 79 |  | 
 | 80 | 	BUG_ON(addr >= end); | 
 | 81 | 	pgd = pgd_offset_k(addr); | 
 | 82 | 	flush_cache_vunmap(addr, end); | 
 | 83 | 	do { | 
 | 84 | 		next = pgd_addr_end(addr, end); | 
 | 85 | 		if (pgd_none_or_clear_bad(pgd)) | 
 | 86 | 			continue; | 
 | 87 | 		vunmap_pud_range(pgd, addr, next); | 
 | 88 | 	} while (pgd++, addr = next, addr != end); | 
| Benjamin Herrenschmidt | c19c03f | 2007-06-04 15:15:35 +1000 | [diff] [blame] | 89 | 	flush_tlb_kernel_range(start, end); | 
 | 90 | } | 
 | 91 |  | 
 | 92 | static void unmap_vm_area(struct vm_struct *area) | 
 | 93 | { | 
 | 94 | 	unmap_kernel_range((unsigned long)area->addr, area->size); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 95 | } | 
 | 96 |  | 
 | 97 | static int vmap_pte_range(pmd_t *pmd, unsigned long addr, | 
 | 98 | 			unsigned long end, pgprot_t prot, struct page ***pages) | 
 | 99 | { | 
 | 100 | 	pte_t *pte; | 
 | 101 |  | 
| Hugh Dickins | 872fec1 | 2005-10-29 18:16:21 -0700 | [diff] [blame] | 102 | 	pte = pte_alloc_kernel(pmd, addr); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 103 | 	if (!pte) | 
 | 104 | 		return -ENOMEM; | 
 | 105 | 	do { | 
 | 106 | 		struct page *page = **pages; | 
 | 107 | 		WARN_ON(!pte_none(*pte)); | 
 | 108 | 		if (!page) | 
 | 109 | 			return -ENOMEM; | 
 | 110 | 		set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); | 
 | 111 | 		(*pages)++; | 
 | 112 | 	} while (pte++, addr += PAGE_SIZE, addr != end); | 
 | 113 | 	return 0; | 
 | 114 | } | 
 | 115 |  | 
 | 116 | static inline int vmap_pmd_range(pud_t *pud, unsigned long addr, | 
 | 117 | 			unsigned long end, pgprot_t prot, struct page ***pages) | 
 | 118 | { | 
 | 119 | 	pmd_t *pmd; | 
 | 120 | 	unsigned long next; | 
 | 121 |  | 
 | 122 | 	pmd = pmd_alloc(&init_mm, pud, addr); | 
 | 123 | 	if (!pmd) | 
 | 124 | 		return -ENOMEM; | 
 | 125 | 	do { | 
 | 126 | 		next = pmd_addr_end(addr, end); | 
 | 127 | 		if (vmap_pte_range(pmd, addr, next, prot, pages)) | 
 | 128 | 			return -ENOMEM; | 
 | 129 | 	} while (pmd++, addr = next, addr != end); | 
 | 130 | 	return 0; | 
 | 131 | } | 
 | 132 |  | 
 | 133 | static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr, | 
 | 134 | 			unsigned long end, pgprot_t prot, struct page ***pages) | 
 | 135 | { | 
 | 136 | 	pud_t *pud; | 
 | 137 | 	unsigned long next; | 
 | 138 |  | 
 | 139 | 	pud = pud_alloc(&init_mm, pgd, addr); | 
 | 140 | 	if (!pud) | 
 | 141 | 		return -ENOMEM; | 
 | 142 | 	do { | 
 | 143 | 		next = pud_addr_end(addr, end); | 
 | 144 | 		if (vmap_pmd_range(pud, addr, next, prot, pages)) | 
 | 145 | 			return -ENOMEM; | 
 | 146 | 	} while (pud++, addr = next, addr != end); | 
 | 147 | 	return 0; | 
 | 148 | } | 
 | 149 |  | 
 | 150 | int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) | 
 | 151 | { | 
 | 152 | 	pgd_t *pgd; | 
 | 153 | 	unsigned long next; | 
 | 154 | 	unsigned long addr = (unsigned long) area->addr; | 
 | 155 | 	unsigned long end = addr + area->size - PAGE_SIZE; | 
 | 156 | 	int err; | 
 | 157 |  | 
 | 158 | 	BUG_ON(addr >= end); | 
 | 159 | 	pgd = pgd_offset_k(addr); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 160 | 	do { | 
 | 161 | 		next = pgd_addr_end(addr, end); | 
 | 162 | 		err = vmap_pud_range(pgd, addr, next, prot, pages); | 
 | 163 | 		if (err) | 
 | 164 | 			break; | 
 | 165 | 	} while (pgd++, addr = next, addr != end); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 166 | 	flush_cache_vmap((unsigned long) area->addr, end); | 
 | 167 | 	return err; | 
 | 168 | } | 
| Rusty Russell | 5992b6d | 2007-07-19 01:49:21 -0700 | [diff] [blame] | 169 | EXPORT_SYMBOL_GPL(map_vm_area); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 170 |  | 
| Christoph Lameter | 48667e7 | 2008-02-04 22:28:31 -0800 | [diff] [blame] | 171 | /* | 
 | 172 |  * Map a vmalloc()-space virtual address to the physical page. | 
 | 173 |  */ | 
| Christoph Lameter | b3bdda0 | 2008-02-04 22:28:32 -0800 | [diff] [blame] | 174 | struct page *vmalloc_to_page(const void *vmalloc_addr) | 
| Christoph Lameter | 48667e7 | 2008-02-04 22:28:31 -0800 | [diff] [blame] | 175 | { | 
 | 176 | 	unsigned long addr = (unsigned long) vmalloc_addr; | 
 | 177 | 	struct page *page = NULL; | 
 | 178 | 	pgd_t *pgd = pgd_offset_k(addr); | 
 | 179 | 	pud_t *pud; | 
 | 180 | 	pmd_t *pmd; | 
 | 181 | 	pte_t *ptep, pte; | 
 | 182 |  | 
| Ingo Molnar | 7aa413d | 2008-06-19 13:28:11 +0200 | [diff] [blame] | 183 | 	/* | 
 | 184 | 	 * XXX we might need to change this if we add VIRTUAL_BUG_ON for | 
 | 185 | 	 * architectures that do not vmalloc module space | 
 | 186 | 	 */ | 
| Jiri Slaby | 59ea746 | 2008-06-12 13:56:40 +0200 | [diff] [blame] | 187 | 	VIRTUAL_BUG_ON(!is_vmalloc_addr(vmalloc_addr) && | 
 | 188 | 			!is_module_address(addr)); | 
 | 189 |  | 
| Christoph Lameter | 48667e7 | 2008-02-04 22:28:31 -0800 | [diff] [blame] | 190 | 	if (!pgd_none(*pgd)) { | 
 | 191 | 		pud = pud_offset(pgd, addr); | 
 | 192 | 		if (!pud_none(*pud)) { | 
 | 193 | 			pmd = pmd_offset(pud, addr); | 
 | 194 | 			if (!pmd_none(*pmd)) { | 
 | 195 | 				ptep = pte_offset_map(pmd, addr); | 
 | 196 | 				pte = *ptep; | 
 | 197 | 				if (pte_present(pte)) | 
 | 198 | 					page = pte_page(pte); | 
 | 199 | 				pte_unmap(ptep); | 
 | 200 | 			} | 
 | 201 | 		} | 
 | 202 | 	} | 
 | 203 | 	return page; | 
 | 204 | } | 
 | 205 | EXPORT_SYMBOL(vmalloc_to_page); | 
 | 206 |  | 
 | 207 | /* | 
 | 208 |  * Map a vmalloc()-space virtual address to the physical page frame number. | 
 | 209 |  */ | 
| Christoph Lameter | b3bdda0 | 2008-02-04 22:28:32 -0800 | [diff] [blame] | 210 | unsigned long vmalloc_to_pfn(const void *vmalloc_addr) | 
| Christoph Lameter | 48667e7 | 2008-02-04 22:28:31 -0800 | [diff] [blame] | 211 | { | 
 | 212 | 	return page_to_pfn(vmalloc_to_page(vmalloc_addr)); | 
 | 213 | } | 
 | 214 | EXPORT_SYMBOL(vmalloc_to_pfn); | 
 | 215 |  | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 216 | static struct vm_struct * | 
 | 217 | __get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start, | 
 | 218 | 		unsigned long end, int node, gfp_t gfp_mask, void *caller) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 219 | { | 
 | 220 | 	struct vm_struct **p, *tmp, *area; | 
 | 221 | 	unsigned long align = 1; | 
 | 222 | 	unsigned long addr; | 
 | 223 |  | 
| Giridhar Pemmasani | 52fd24c | 2006-10-28 10:38:34 -0700 | [diff] [blame] | 224 | 	BUG_ON(in_interrupt()); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 225 | 	if (flags & VM_IOREMAP) { | 
 | 226 | 		int bit = fls(size); | 
 | 227 |  | 
 | 228 | 		if (bit > IOREMAP_MAX_ORDER) | 
 | 229 | 			bit = IOREMAP_MAX_ORDER; | 
 | 230 | 		else if (bit < PAGE_SHIFT) | 
 | 231 | 			bit = PAGE_SHIFT; | 
 | 232 |  | 
 | 233 | 		align = 1ul << bit; | 
 | 234 | 	} | 
 | 235 | 	addr = ALIGN(start, align); | 
 | 236 | 	size = PAGE_ALIGN(size); | 
| OGAWA Hirofumi | 31be830 | 2006-11-16 01:19:29 -0800 | [diff] [blame] | 237 | 	if (unlikely(!size)) | 
 | 238 | 		return NULL; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 239 |  | 
| Christoph Lameter | 6cb0622 | 2007-10-16 01:25:41 -0700 | [diff] [blame] | 240 | 	area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node); | 
 | 241 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 242 | 	if (unlikely(!area)) | 
 | 243 | 		return NULL; | 
 | 244 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 245 | 	/* | 
 | 246 | 	 * We always allocate a guard page. | 
 | 247 | 	 */ | 
 | 248 | 	size += PAGE_SIZE; | 
 | 249 |  | 
 | 250 | 	write_lock(&vmlist_lock); | 
 | 251 | 	for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) { | 
 | 252 | 		if ((unsigned long)tmp->addr < addr) { | 
 | 253 | 			if((unsigned long)tmp->addr + tmp->size >= addr) | 
 | 254 | 				addr = ALIGN(tmp->size +  | 
 | 255 | 					     (unsigned long)tmp->addr, align); | 
 | 256 | 			continue; | 
 | 257 | 		} | 
 | 258 | 		if ((size + addr) < addr) | 
 | 259 | 			goto out; | 
 | 260 | 		if (size + addr <= (unsigned long)tmp->addr) | 
 | 261 | 			goto found; | 
 | 262 | 		addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align); | 
 | 263 | 		if (addr > end - size) | 
 | 264 | 			goto out; | 
 | 265 | 	} | 
| Robert Bragg | 5dc3318 | 2008-02-04 22:29:18 -0800 | [diff] [blame] | 266 | 	if ((size + addr) < addr) | 
 | 267 | 		goto out; | 
 | 268 | 	if (addr > end - size) | 
 | 269 | 		goto out; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 270 |  | 
 | 271 | found: | 
 | 272 | 	area->next = *p; | 
 | 273 | 	*p = area; | 
 | 274 |  | 
 | 275 | 	area->flags = flags; | 
 | 276 | 	area->addr = (void *)addr; | 
 | 277 | 	area->size = size; | 
 | 278 | 	area->pages = NULL; | 
 | 279 | 	area->nr_pages = 0; | 
 | 280 | 	area->phys_addr = 0; | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 281 | 	area->caller = caller; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 282 | 	write_unlock(&vmlist_lock); | 
 | 283 |  | 
 | 284 | 	return area; | 
 | 285 |  | 
 | 286 | out: | 
 | 287 | 	write_unlock(&vmlist_lock); | 
 | 288 | 	kfree(area); | 
 | 289 | 	if (printk_ratelimit()) | 
 | 290 | 		printk(KERN_WARNING "allocation failed: out of vmalloc space - use vmalloc=<size> to increase size.\n"); | 
 | 291 | 	return NULL; | 
 | 292 | } | 
 | 293 |  | 
| Christoph Lameter | 930fc45 | 2005-10-29 18:15:41 -0700 | [diff] [blame] | 294 | struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, | 
 | 295 | 				unsigned long start, unsigned long end) | 
 | 296 | { | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 297 | 	return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL, | 
 | 298 | 						__builtin_return_address(0)); | 
| Christoph Lameter | 930fc45 | 2005-10-29 18:15:41 -0700 | [diff] [blame] | 299 | } | 
| Rusty Russell | 5992b6d | 2007-07-19 01:49:21 -0700 | [diff] [blame] | 300 | EXPORT_SYMBOL_GPL(__get_vm_area); | 
| Christoph Lameter | 930fc45 | 2005-10-29 18:15:41 -0700 | [diff] [blame] | 301 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 302 | /** | 
| Simon Arlott | 183ff22 | 2007-10-20 01:27:18 +0200 | [diff] [blame] | 303 |  *	get_vm_area  -  reserve a contiguous kernel virtual area | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 304 |  *	@size:		size of the area | 
 | 305 |  *	@flags:		%VM_IOREMAP for I/O mappings or VM_ALLOC | 
 | 306 |  * | 
 | 307 |  *	Search an area of @size in the kernel virtual mapping area, | 
 | 308 |  *	and reserved it for out purposes.  Returns the area descriptor | 
 | 309 |  *	on success or %NULL on failure. | 
 | 310 |  */ | 
 | 311 | struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) | 
 | 312 | { | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 313 | 	return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, | 
 | 314 | 				-1, GFP_KERNEL, __builtin_return_address(0)); | 
 | 315 | } | 
 | 316 |  | 
 | 317 | struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, | 
 | 318 | 				void *caller) | 
 | 319 | { | 
 | 320 | 	return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, | 
 | 321 | 						-1, GFP_KERNEL, caller); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 322 | } | 
 | 323 |  | 
| Giridhar Pemmasani | 52fd24c | 2006-10-28 10:38:34 -0700 | [diff] [blame] | 324 | struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, | 
 | 325 | 				   int node, gfp_t gfp_mask) | 
| Christoph Lameter | 930fc45 | 2005-10-29 18:15:41 -0700 | [diff] [blame] | 326 | { | 
| Giridhar Pemmasani | 52fd24c | 2006-10-28 10:38:34 -0700 | [diff] [blame] | 327 | 	return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node, | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 328 | 				  gfp_mask, __builtin_return_address(0)); | 
| Christoph Lameter | 930fc45 | 2005-10-29 18:15:41 -0700 | [diff] [blame] | 329 | } | 
 | 330 |  | 
| Andi Kleen | 7856dfe | 2005-05-20 14:27:57 -0700 | [diff] [blame] | 331 | /* Caller must hold vmlist_lock */ | 
| Christoph Lameter | b3bdda0 | 2008-02-04 22:28:32 -0800 | [diff] [blame] | 332 | static struct vm_struct *__find_vm_area(const void *addr) | 
| Nick Piggin | 8334231 | 2006-06-23 02:03:20 -0700 | [diff] [blame] | 333 | { | 
 | 334 | 	struct vm_struct *tmp; | 
 | 335 |  | 
 | 336 | 	for (tmp = vmlist; tmp != NULL; tmp = tmp->next) { | 
 | 337 | 		 if (tmp->addr == addr) | 
 | 338 | 			break; | 
 | 339 | 	} | 
 | 340 |  | 
 | 341 | 	return tmp; | 
 | 342 | } | 
 | 343 |  | 
 | 344 | /* Caller must hold vmlist_lock */ | 
| Christoph Lameter | b3bdda0 | 2008-02-04 22:28:32 -0800 | [diff] [blame] | 345 | static struct vm_struct *__remove_vm_area(const void *addr) | 
| Andi Kleen | 7856dfe | 2005-05-20 14:27:57 -0700 | [diff] [blame] | 346 | { | 
 | 347 | 	struct vm_struct **p, *tmp; | 
 | 348 |  | 
 | 349 | 	for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) { | 
 | 350 | 		 if (tmp->addr == addr) | 
 | 351 | 			 goto found; | 
 | 352 | 	} | 
 | 353 | 	return NULL; | 
 | 354 |  | 
 | 355 | found: | 
 | 356 | 	unmap_vm_area(tmp); | 
 | 357 | 	*p = tmp->next; | 
 | 358 |  | 
 | 359 | 	/* | 
 | 360 | 	 * Remove the guard page. | 
 | 361 | 	 */ | 
 | 362 | 	tmp->size -= PAGE_SIZE; | 
 | 363 | 	return tmp; | 
 | 364 | } | 
 | 365 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 366 | /** | 
| Simon Arlott | 183ff22 | 2007-10-20 01:27:18 +0200 | [diff] [blame] | 367 |  *	remove_vm_area  -  find and remove a continuous kernel virtual area | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 368 |  *	@addr:		base address | 
 | 369 |  * | 
 | 370 |  *	Search for the kernel VM area starting at @addr, and remove it. | 
 | 371 |  *	This function returns the found VM area, but using it is NOT safe | 
| Andi Kleen | 7856dfe | 2005-05-20 14:27:57 -0700 | [diff] [blame] | 372 |  *	on SMP machines, except for its size or flags. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 373 |  */ | 
| Christoph Lameter | b3bdda0 | 2008-02-04 22:28:32 -0800 | [diff] [blame] | 374 | struct vm_struct *remove_vm_area(const void *addr) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 375 | { | 
| Andi Kleen | 7856dfe | 2005-05-20 14:27:57 -0700 | [diff] [blame] | 376 | 	struct vm_struct *v; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 377 | 	write_lock(&vmlist_lock); | 
| Andi Kleen | 7856dfe | 2005-05-20 14:27:57 -0700 | [diff] [blame] | 378 | 	v = __remove_vm_area(addr); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 379 | 	write_unlock(&vmlist_lock); | 
| Andi Kleen | 7856dfe | 2005-05-20 14:27:57 -0700 | [diff] [blame] | 380 | 	return v; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 381 | } | 
 | 382 |  | 
| Christoph Lameter | b3bdda0 | 2008-02-04 22:28:32 -0800 | [diff] [blame] | 383 | static void __vunmap(const void *addr, int deallocate_pages) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 384 | { | 
 | 385 | 	struct vm_struct *area; | 
 | 386 |  | 
 | 387 | 	if (!addr) | 
 | 388 | 		return; | 
 | 389 |  | 
 | 390 | 	if ((PAGE_SIZE-1) & (unsigned long)addr) { | 
| Arjan van de Ven | 4c8573e | 2008-07-25 19:45:37 -0700 | [diff] [blame] | 391 | 		WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 392 | 		return; | 
 | 393 | 	} | 
 | 394 |  | 
 | 395 | 	area = remove_vm_area(addr); | 
 | 396 | 	if (unlikely(!area)) { | 
| Arjan van de Ven | 4c8573e | 2008-07-25 19:45:37 -0700 | [diff] [blame] | 397 | 		WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 398 | 				addr); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 399 | 		return; | 
 | 400 | 	} | 
 | 401 |  | 
| Ingo Molnar | 9a11b49a | 2006-07-03 00:24:33 -0700 | [diff] [blame] | 402 | 	debug_check_no_locks_freed(addr, area->size); | 
| Thomas Gleixner | 3ac7fe5 | 2008-04-30 00:55:01 -0700 | [diff] [blame] | 403 | 	debug_check_no_obj_freed(addr, area->size); | 
| Ingo Molnar | 9a11b49a | 2006-07-03 00:24:33 -0700 | [diff] [blame] | 404 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 405 | 	if (deallocate_pages) { | 
 | 406 | 		int i; | 
 | 407 |  | 
 | 408 | 		for (i = 0; i < area->nr_pages; i++) { | 
| Christoph Lameter | bf53d6f | 2008-02-04 22:28:34 -0800 | [diff] [blame] | 409 | 			struct page *page = area->pages[i]; | 
 | 410 |  | 
 | 411 | 			BUG_ON(!page); | 
 | 412 | 			__free_page(page); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 413 | 		} | 
 | 414 |  | 
| Jan Kiszka | 8757d5f | 2006-07-14 00:23:56 -0700 | [diff] [blame] | 415 | 		if (area->flags & VM_VPAGES) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 416 | 			vfree(area->pages); | 
 | 417 | 		else | 
 | 418 | 			kfree(area->pages); | 
 | 419 | 	} | 
 | 420 |  | 
 | 421 | 	kfree(area); | 
 | 422 | 	return; | 
 | 423 | } | 
 | 424 |  | 
 | 425 | /** | 
 | 426 |  *	vfree  -  release memory allocated by vmalloc() | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 427 |  *	@addr:		memory base address | 
 | 428 |  * | 
| Simon Arlott | 183ff22 | 2007-10-20 01:27:18 +0200 | [diff] [blame] | 429 |  *	Free the virtually continuous memory area starting at @addr, as | 
| Pekka Enberg | 80e93ef | 2005-09-09 13:10:16 -0700 | [diff] [blame] | 430 |  *	obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is | 
 | 431 |  *	NULL, no operation is performed. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 432 |  * | 
| Pekka Enberg | 80e93ef | 2005-09-09 13:10:16 -0700 | [diff] [blame] | 433 |  *	Must not be called in interrupt context. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 434 |  */ | 
| Christoph Lameter | b3bdda0 | 2008-02-04 22:28:32 -0800 | [diff] [blame] | 435 | void vfree(const void *addr) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 436 | { | 
 | 437 | 	BUG_ON(in_interrupt()); | 
 | 438 | 	__vunmap(addr, 1); | 
 | 439 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 440 | EXPORT_SYMBOL(vfree); | 
 | 441 |  | 
 | 442 | /** | 
 | 443 |  *	vunmap  -  release virtual mapping obtained by vmap() | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 444 |  *	@addr:		memory base address | 
 | 445 |  * | 
 | 446 |  *	Free the virtually contiguous memory area starting at @addr, | 
 | 447 |  *	which was created from the page array passed to vmap(). | 
 | 448 |  * | 
| Pekka Enberg | 80e93ef | 2005-09-09 13:10:16 -0700 | [diff] [blame] | 449 |  *	Must not be called in interrupt context. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 450 |  */ | 
| Christoph Lameter | b3bdda0 | 2008-02-04 22:28:32 -0800 | [diff] [blame] | 451 | void vunmap(const void *addr) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 452 | { | 
 | 453 | 	BUG_ON(in_interrupt()); | 
 | 454 | 	__vunmap(addr, 0); | 
 | 455 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 456 | EXPORT_SYMBOL(vunmap); | 
 | 457 |  | 
 | 458 | /** | 
 | 459 |  *	vmap  -  map an array of pages into virtually contiguous space | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 460 |  *	@pages:		array of page pointers | 
 | 461 |  *	@count:		number of pages to map | 
 | 462 |  *	@flags:		vm_area->flags | 
 | 463 |  *	@prot:		page protection for the mapping | 
 | 464 |  * | 
 | 465 |  *	Maps @count pages from @pages into contiguous kernel virtual | 
 | 466 |  *	space. | 
 | 467 |  */ | 
 | 468 | void *vmap(struct page **pages, unsigned int count, | 
 | 469 | 		unsigned long flags, pgprot_t prot) | 
 | 470 | { | 
 | 471 | 	struct vm_struct *area; | 
 | 472 |  | 
 | 473 | 	if (count > num_physpages) | 
 | 474 | 		return NULL; | 
 | 475 |  | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 476 | 	area = get_vm_area_caller((count << PAGE_SHIFT), flags, | 
 | 477 | 					__builtin_return_address(0)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 478 | 	if (!area) | 
 | 479 | 		return NULL; | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 480 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 481 | 	if (map_vm_area(area, prot, &pages)) { | 
 | 482 | 		vunmap(area->addr); | 
 | 483 | 		return NULL; | 
 | 484 | 	} | 
 | 485 |  | 
 | 486 | 	return area->addr; | 
 | 487 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 488 | EXPORT_SYMBOL(vmap); | 
 | 489 |  | 
| Adrian Bunk | e31d9eb | 2008-02-04 22:29:09 -0800 | [diff] [blame] | 490 | static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 491 | 				 pgprot_t prot, int node, void *caller) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 492 | { | 
 | 493 | 	struct page **pages; | 
 | 494 | 	unsigned int nr_pages, array_size, i; | 
 | 495 |  | 
 | 496 | 	nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT; | 
 | 497 | 	array_size = (nr_pages * sizeof(struct page *)); | 
 | 498 |  | 
 | 499 | 	area->nr_pages = nr_pages; | 
 | 500 | 	/* Please note that the recursion is strictly bounded. */ | 
| Jan Kiszka | 8757d5f | 2006-07-14 00:23:56 -0700 | [diff] [blame] | 501 | 	if (array_size > PAGE_SIZE) { | 
| Christoph Lameter | 94f6030 | 2007-07-17 04:03:29 -0700 | [diff] [blame] | 502 | 		pages = __vmalloc_node(array_size, gfp_mask | __GFP_ZERO, | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 503 | 				PAGE_KERNEL, node, caller); | 
| Jan Kiszka | 8757d5f | 2006-07-14 00:23:56 -0700 | [diff] [blame] | 504 | 		area->flags |= VM_VPAGES; | 
| Andrew Morton | 286e1ea | 2006-10-17 00:09:57 -0700 | [diff] [blame] | 505 | 	} else { | 
 | 506 | 		pages = kmalloc_node(array_size, | 
| Christoph Lameter | 6cb0622 | 2007-10-16 01:25:41 -0700 | [diff] [blame] | 507 | 				(gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO, | 
| Andrew Morton | 286e1ea | 2006-10-17 00:09:57 -0700 | [diff] [blame] | 508 | 				node); | 
 | 509 | 	} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 510 | 	area->pages = pages; | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 511 | 	area->caller = caller; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 512 | 	if (!area->pages) { | 
 | 513 | 		remove_vm_area(area->addr); | 
 | 514 | 		kfree(area); | 
 | 515 | 		return NULL; | 
 | 516 | 	} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 517 |  | 
 | 518 | 	for (i = 0; i < area->nr_pages; i++) { | 
| Christoph Lameter | bf53d6f | 2008-02-04 22:28:34 -0800 | [diff] [blame] | 519 | 		struct page *page; | 
 | 520 |  | 
| Christoph Lameter | 930fc45 | 2005-10-29 18:15:41 -0700 | [diff] [blame] | 521 | 		if (node < 0) | 
| Christoph Lameter | bf53d6f | 2008-02-04 22:28:34 -0800 | [diff] [blame] | 522 | 			page = alloc_page(gfp_mask); | 
| Christoph Lameter | 930fc45 | 2005-10-29 18:15:41 -0700 | [diff] [blame] | 523 | 		else | 
| Christoph Lameter | bf53d6f | 2008-02-04 22:28:34 -0800 | [diff] [blame] | 524 | 			page = alloc_pages_node(node, gfp_mask, 0); | 
 | 525 |  | 
 | 526 | 		if (unlikely(!page)) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 527 | 			/* Successfully allocated i pages, free them in __vunmap() */ | 
 | 528 | 			area->nr_pages = i; | 
 | 529 | 			goto fail; | 
 | 530 | 		} | 
| Christoph Lameter | bf53d6f | 2008-02-04 22:28:34 -0800 | [diff] [blame] | 531 | 		area->pages[i] = page; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 532 | 	} | 
 | 533 |  | 
 | 534 | 	if (map_vm_area(area, prot, &pages)) | 
 | 535 | 		goto fail; | 
 | 536 | 	return area->addr; | 
 | 537 |  | 
 | 538 | fail: | 
 | 539 | 	vfree(area->addr); | 
 | 540 | 	return NULL; | 
 | 541 | } | 
 | 542 |  | 
| Christoph Lameter | 930fc45 | 2005-10-29 18:15:41 -0700 | [diff] [blame] | 543 | void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot) | 
 | 544 | { | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 545 | 	return __vmalloc_area_node(area, gfp_mask, prot, -1, | 
 | 546 | 					__builtin_return_address(0)); | 
| Christoph Lameter | 930fc45 | 2005-10-29 18:15:41 -0700 | [diff] [blame] | 547 | } | 
 | 548 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 549 | /** | 
| Christoph Lameter | 930fc45 | 2005-10-29 18:15:41 -0700 | [diff] [blame] | 550 |  *	__vmalloc_node  -  allocate virtually contiguous memory | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 551 |  *	@size:		allocation size | 
 | 552 |  *	@gfp_mask:	flags for the page level allocator | 
 | 553 |  *	@prot:		protection mask for the allocated pages | 
| Randy Dunlap | d44e078 | 2005-11-07 01:01:10 -0800 | [diff] [blame] | 554 |  *	@node:		node to use for allocation or -1 | 
| Randy Dunlap | c85d194 | 2008-05-01 04:34:48 -0700 | [diff] [blame] | 555 |  *	@caller:	caller's return address | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 556 |  * | 
 | 557 |  *	Allocate enough pages to cover @size from the page level | 
 | 558 |  *	allocator with @gfp_mask flags.  Map them into contiguous | 
 | 559 |  *	kernel virtual space, using a pagetable protection of @prot. | 
 | 560 |  */ | 
| Adrian Bunk | b221385 | 2006-09-25 23:31:02 -0700 | [diff] [blame] | 561 | static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 562 | 						int node, void *caller) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 563 | { | 
 | 564 | 	struct vm_struct *area; | 
 | 565 |  | 
 | 566 | 	size = PAGE_ALIGN(size); | 
 | 567 | 	if (!size || (size >> PAGE_SHIFT) > num_physpages) | 
 | 568 | 		return NULL; | 
 | 569 |  | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 570 | 	area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END, | 
 | 571 | 						node, gfp_mask, caller); | 
 | 572 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 573 | 	if (!area) | 
 | 574 | 		return NULL; | 
 | 575 |  | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 576 | 	return __vmalloc_area_node(area, gfp_mask, prot, node, caller); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 577 | } | 
 | 578 |  | 
| Christoph Lameter | 930fc45 | 2005-10-29 18:15:41 -0700 | [diff] [blame] | 579 | void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) | 
 | 580 | { | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 581 | 	return __vmalloc_node(size, gfp_mask, prot, -1, | 
 | 582 | 				__builtin_return_address(0)); | 
| Christoph Lameter | 930fc45 | 2005-10-29 18:15:41 -0700 | [diff] [blame] | 583 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 584 | EXPORT_SYMBOL(__vmalloc); | 
 | 585 |  | 
 | 586 | /** | 
 | 587 |  *	vmalloc  -  allocate virtually contiguous memory | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 588 |  *	@size:		allocation size | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 589 |  *	Allocate enough pages to cover @size from the page level | 
 | 590 |  *	allocator and map them into contiguous kernel virtual space. | 
 | 591 |  * | 
| Michael Opdenacker | c1c8897 | 2006-10-03 23:21:02 +0200 | [diff] [blame] | 592 |  *	For tight control over page level allocator and protection flags | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 593 |  *	use __vmalloc() instead. | 
 | 594 |  */ | 
 | 595 | void *vmalloc(unsigned long size) | 
 | 596 | { | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 597 | 	return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, | 
 | 598 | 					-1, __builtin_return_address(0)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 599 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 600 | EXPORT_SYMBOL(vmalloc); | 
 | 601 |  | 
| Christoph Lameter | 930fc45 | 2005-10-29 18:15:41 -0700 | [diff] [blame] | 602 | /** | 
| Rolf Eike Beer | ead0408 | 2006-09-27 01:50:13 -0700 | [diff] [blame] | 603 |  * vmalloc_user - allocate zeroed virtually contiguous memory for userspace | 
 | 604 |  * @size: allocation size | 
| Nick Piggin | 8334231 | 2006-06-23 02:03:20 -0700 | [diff] [blame] | 605 |  * | 
| Rolf Eike Beer | ead0408 | 2006-09-27 01:50:13 -0700 | [diff] [blame] | 606 |  * The resulting memory area is zeroed so it can be mapped to userspace | 
 | 607 |  * without leaking data. | 
| Nick Piggin | 8334231 | 2006-06-23 02:03:20 -0700 | [diff] [blame] | 608 |  */ | 
 | 609 | void *vmalloc_user(unsigned long size) | 
 | 610 | { | 
 | 611 | 	struct vm_struct *area; | 
 | 612 | 	void *ret; | 
 | 613 |  | 
 | 614 | 	ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); | 
| Eric Dumazet | 2b4ac44 | 2006-11-10 12:27:48 -0800 | [diff] [blame] | 615 | 	if (ret) { | 
 | 616 | 		write_lock(&vmlist_lock); | 
 | 617 | 		area = __find_vm_area(ret); | 
 | 618 | 		area->flags |= VM_USERMAP; | 
 | 619 | 		write_unlock(&vmlist_lock); | 
 | 620 | 	} | 
| Nick Piggin | 8334231 | 2006-06-23 02:03:20 -0700 | [diff] [blame] | 621 | 	return ret; | 
 | 622 | } | 
 | 623 | EXPORT_SYMBOL(vmalloc_user); | 
 | 624 |  | 
 | 625 | /** | 
| Christoph Lameter | 930fc45 | 2005-10-29 18:15:41 -0700 | [diff] [blame] | 626 |  *	vmalloc_node  -  allocate memory on a specific node | 
| Christoph Lameter | 930fc45 | 2005-10-29 18:15:41 -0700 | [diff] [blame] | 627 |  *	@size:		allocation size | 
| Randy Dunlap | d44e078 | 2005-11-07 01:01:10 -0800 | [diff] [blame] | 628 |  *	@node:		numa node | 
| Christoph Lameter | 930fc45 | 2005-10-29 18:15:41 -0700 | [diff] [blame] | 629 |  * | 
 | 630 |  *	Allocate enough pages to cover @size from the page level | 
 | 631 |  *	allocator and map them into contiguous kernel virtual space. | 
 | 632 |  * | 
| Michael Opdenacker | c1c8897 | 2006-10-03 23:21:02 +0200 | [diff] [blame] | 633 |  *	For tight control over page level allocator and protection flags | 
| Christoph Lameter | 930fc45 | 2005-10-29 18:15:41 -0700 | [diff] [blame] | 634 |  *	use __vmalloc() instead. | 
 | 635 |  */ | 
 | 636 | void *vmalloc_node(unsigned long size, int node) | 
 | 637 | { | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 638 | 	return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, | 
 | 639 | 					node, __builtin_return_address(0)); | 
| Christoph Lameter | 930fc45 | 2005-10-29 18:15:41 -0700 | [diff] [blame] | 640 | } | 
 | 641 | EXPORT_SYMBOL(vmalloc_node); | 
 | 642 |  | 
| Pavel Pisa | 4dc3b16 | 2005-05-01 08:59:25 -0700 | [diff] [blame] | 643 | #ifndef PAGE_KERNEL_EXEC | 
 | 644 | # define PAGE_KERNEL_EXEC PAGE_KERNEL | 
 | 645 | #endif | 
 | 646 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 647 | /** | 
 | 648 |  *	vmalloc_exec  -  allocate virtually contiguous, executable memory | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 649 |  *	@size:		allocation size | 
 | 650 |  * | 
 | 651 |  *	Kernel-internal function to allocate enough pages to cover @size | 
 | 652 |  *	the page level allocator and map them into contiguous and | 
 | 653 |  *	executable kernel virtual space. | 
 | 654 |  * | 
| Michael Opdenacker | c1c8897 | 2006-10-03 23:21:02 +0200 | [diff] [blame] | 655 |  *	For tight control over page level allocator and protection flags | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 656 |  *	use __vmalloc() instead. | 
 | 657 |  */ | 
 | 658 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 659 | void *vmalloc_exec(unsigned long size) | 
 | 660 | { | 
 | 661 | 	return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC); | 
 | 662 | } | 
 | 663 |  | 
| Andi Kleen | 0d08e0d | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 664 | #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32) | 
| Benjamin Herrenschmidt | 7ac674f | 2007-07-19 01:49:10 -0700 | [diff] [blame] | 665 | #define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL | 
| Andi Kleen | 0d08e0d | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 666 | #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA) | 
| Benjamin Herrenschmidt | 7ac674f | 2007-07-19 01:49:10 -0700 | [diff] [blame] | 667 | #define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL | 
| Andi Kleen | 0d08e0d | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 668 | #else | 
 | 669 | #define GFP_VMALLOC32 GFP_KERNEL | 
 | 670 | #endif | 
 | 671 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 672 | /** | 
 | 673 |  *	vmalloc_32  -  allocate virtually contiguous memory (32bit addressable) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 674 |  *	@size:		allocation size | 
 | 675 |  * | 
 | 676 |  *	Allocate enough 32bit PA addressable pages to cover @size from the | 
 | 677 |  *	page level allocator and map them into contiguous kernel virtual space. | 
 | 678 |  */ | 
 | 679 | void *vmalloc_32(unsigned long size) | 
 | 680 | { | 
| Andi Kleen | 0d08e0d | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 681 | 	return __vmalloc(size, GFP_VMALLOC32, PAGE_KERNEL); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 682 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 683 | EXPORT_SYMBOL(vmalloc_32); | 
 | 684 |  | 
| Nick Piggin | 8334231 | 2006-06-23 02:03:20 -0700 | [diff] [blame] | 685 | /** | 
| Rolf Eike Beer | ead0408 | 2006-09-27 01:50:13 -0700 | [diff] [blame] | 686 |  * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory | 
| Nick Piggin | 8334231 | 2006-06-23 02:03:20 -0700 | [diff] [blame] | 687 |  *	@size:		allocation size | 
| Rolf Eike Beer | ead0408 | 2006-09-27 01:50:13 -0700 | [diff] [blame] | 688 |  * | 
 | 689 |  * The resulting memory area is 32bit addressable and zeroed so it can be | 
 | 690 |  * mapped to userspace without leaking data. | 
| Nick Piggin | 8334231 | 2006-06-23 02:03:20 -0700 | [diff] [blame] | 691 |  */ | 
 | 692 | void *vmalloc_32_user(unsigned long size) | 
 | 693 | { | 
 | 694 | 	struct vm_struct *area; | 
 | 695 | 	void *ret; | 
 | 696 |  | 
| Andi Kleen | 0d08e0d | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 697 | 	ret = __vmalloc(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL); | 
| Eric Dumazet | 2b4ac44 | 2006-11-10 12:27:48 -0800 | [diff] [blame] | 698 | 	if (ret) { | 
 | 699 | 		write_lock(&vmlist_lock); | 
 | 700 | 		area = __find_vm_area(ret); | 
 | 701 | 		area->flags |= VM_USERMAP; | 
 | 702 | 		write_unlock(&vmlist_lock); | 
 | 703 | 	} | 
| Nick Piggin | 8334231 | 2006-06-23 02:03:20 -0700 | [diff] [blame] | 704 | 	return ret; | 
 | 705 | } | 
 | 706 | EXPORT_SYMBOL(vmalloc_32_user); | 
 | 707 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 708 | long vread(char *buf, char *addr, unsigned long count) | 
 | 709 | { | 
 | 710 | 	struct vm_struct *tmp; | 
 | 711 | 	char *vaddr, *buf_start = buf; | 
 | 712 | 	unsigned long n; | 
 | 713 |  | 
 | 714 | 	/* Don't allow overflow */ | 
 | 715 | 	if ((unsigned long) addr + count < count) | 
 | 716 | 		count = -(unsigned long) addr; | 
 | 717 |  | 
 | 718 | 	read_lock(&vmlist_lock); | 
 | 719 | 	for (tmp = vmlist; tmp; tmp = tmp->next) { | 
 | 720 | 		vaddr = (char *) tmp->addr; | 
 | 721 | 		if (addr >= vaddr + tmp->size - PAGE_SIZE) | 
 | 722 | 			continue; | 
 | 723 | 		while (addr < vaddr) { | 
 | 724 | 			if (count == 0) | 
 | 725 | 				goto finished; | 
 | 726 | 			*buf = '\0'; | 
 | 727 | 			buf++; | 
 | 728 | 			addr++; | 
 | 729 | 			count--; | 
 | 730 | 		} | 
 | 731 | 		n = vaddr + tmp->size - PAGE_SIZE - addr; | 
 | 732 | 		do { | 
 | 733 | 			if (count == 0) | 
 | 734 | 				goto finished; | 
 | 735 | 			*buf = *addr; | 
 | 736 | 			buf++; | 
 | 737 | 			addr++; | 
 | 738 | 			count--; | 
 | 739 | 		} while (--n > 0); | 
 | 740 | 	} | 
 | 741 | finished: | 
 | 742 | 	read_unlock(&vmlist_lock); | 
 | 743 | 	return buf - buf_start; | 
 | 744 | } | 
 | 745 |  | 
 | 746 | long vwrite(char *buf, char *addr, unsigned long count) | 
 | 747 | { | 
 | 748 | 	struct vm_struct *tmp; | 
 | 749 | 	char *vaddr, *buf_start = buf; | 
 | 750 | 	unsigned long n; | 
 | 751 |  | 
 | 752 | 	/* Don't allow overflow */ | 
 | 753 | 	if ((unsigned long) addr + count < count) | 
 | 754 | 		count = -(unsigned long) addr; | 
 | 755 |  | 
 | 756 | 	read_lock(&vmlist_lock); | 
 | 757 | 	for (tmp = vmlist; tmp; tmp = tmp->next) { | 
 | 758 | 		vaddr = (char *) tmp->addr; | 
 | 759 | 		if (addr >= vaddr + tmp->size - PAGE_SIZE) | 
 | 760 | 			continue; | 
 | 761 | 		while (addr < vaddr) { | 
 | 762 | 			if (count == 0) | 
 | 763 | 				goto finished; | 
 | 764 | 			buf++; | 
 | 765 | 			addr++; | 
 | 766 | 			count--; | 
 | 767 | 		} | 
 | 768 | 		n = vaddr + tmp->size - PAGE_SIZE - addr; | 
 | 769 | 		do { | 
 | 770 | 			if (count == 0) | 
 | 771 | 				goto finished; | 
 | 772 | 			*addr = *buf; | 
 | 773 | 			buf++; | 
 | 774 | 			addr++; | 
 | 775 | 			count--; | 
 | 776 | 		} while (--n > 0); | 
 | 777 | 	} | 
 | 778 | finished: | 
 | 779 | 	read_unlock(&vmlist_lock); | 
 | 780 | 	return buf - buf_start; | 
 | 781 | } | 
| Nick Piggin | 8334231 | 2006-06-23 02:03:20 -0700 | [diff] [blame] | 782 |  | 
 | 783 | /** | 
 | 784 |  *	remap_vmalloc_range  -  map vmalloc pages to userspace | 
| Nick Piggin | 8334231 | 2006-06-23 02:03:20 -0700 | [diff] [blame] | 785 |  *	@vma:		vma to cover (map full range of vma) | 
 | 786 |  *	@addr:		vmalloc memory | 
 | 787 |  *	@pgoff:		number of pages into addr before first page to map | 
| Randy Dunlap | 7682486 | 2008-03-19 17:00:40 -0700 | [diff] [blame] | 788 |  * | 
 | 789 |  *	Returns:	0 for success, -Exxx on failure | 
| Nick Piggin | 8334231 | 2006-06-23 02:03:20 -0700 | [diff] [blame] | 790 |  * | 
 | 791 |  *	This function checks that addr is a valid vmalloc'ed area, and | 
 | 792 |  *	that it is big enough to cover the vma. Will return failure if | 
 | 793 |  *	that criteria isn't met. | 
 | 794 |  * | 
| Robert P. J. Day | 72fd4a3 | 2007-02-10 01:45:59 -0800 | [diff] [blame] | 795 |  *	Similar to remap_pfn_range() (see mm/memory.c) | 
| Nick Piggin | 8334231 | 2006-06-23 02:03:20 -0700 | [diff] [blame] | 796 |  */ | 
 | 797 | int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, | 
 | 798 | 						unsigned long pgoff) | 
 | 799 | { | 
 | 800 | 	struct vm_struct *area; | 
 | 801 | 	unsigned long uaddr = vma->vm_start; | 
 | 802 | 	unsigned long usize = vma->vm_end - vma->vm_start; | 
 | 803 | 	int ret; | 
 | 804 |  | 
 | 805 | 	if ((PAGE_SIZE-1) & (unsigned long)addr) | 
 | 806 | 		return -EINVAL; | 
 | 807 |  | 
 | 808 | 	read_lock(&vmlist_lock); | 
 | 809 | 	area = __find_vm_area(addr); | 
 | 810 | 	if (!area) | 
 | 811 | 		goto out_einval_locked; | 
 | 812 |  | 
 | 813 | 	if (!(area->flags & VM_USERMAP)) | 
 | 814 | 		goto out_einval_locked; | 
 | 815 |  | 
 | 816 | 	if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE) | 
 | 817 | 		goto out_einval_locked; | 
 | 818 | 	read_unlock(&vmlist_lock); | 
 | 819 |  | 
 | 820 | 	addr += pgoff << PAGE_SHIFT; | 
 | 821 | 	do { | 
 | 822 | 		struct page *page = vmalloc_to_page(addr); | 
 | 823 | 		ret = vm_insert_page(vma, uaddr, page); | 
 | 824 | 		if (ret) | 
 | 825 | 			return ret; | 
 | 826 |  | 
 | 827 | 		uaddr += PAGE_SIZE; | 
 | 828 | 		addr += PAGE_SIZE; | 
 | 829 | 		usize -= PAGE_SIZE; | 
 | 830 | 	} while (usize > 0); | 
 | 831 |  | 
 | 832 | 	/* Prevent "things" like memory migration? VM_flags need a cleanup... */ | 
 | 833 | 	vma->vm_flags |= VM_RESERVED; | 
 | 834 |  | 
 | 835 | 	return ret; | 
 | 836 |  | 
 | 837 | out_einval_locked: | 
 | 838 | 	read_unlock(&vmlist_lock); | 
 | 839 | 	return -EINVAL; | 
 | 840 | } | 
 | 841 | EXPORT_SYMBOL(remap_vmalloc_range); | 
 | 842 |  | 
| Christoph Hellwig | 1eeb66a | 2007-05-08 00:27:03 -0700 | [diff] [blame] | 843 | /* | 
 | 844 |  * Implement a stub for vmalloc_sync_all() if the architecture chose not to | 
 | 845 |  * have one. | 
 | 846 |  */ | 
 | 847 | void  __attribute__((weak)) vmalloc_sync_all(void) | 
 | 848 | { | 
 | 849 | } | 
| Jeremy Fitzhardinge | 5f4352f | 2007-07-17 18:37:04 -0700 | [diff] [blame] | 850 |  | 
 | 851 |  | 
| Martin Schwidefsky | 2f569af | 2008-02-08 04:22:04 -0800 | [diff] [blame] | 852 | static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data) | 
| Jeremy Fitzhardinge | 5f4352f | 2007-07-17 18:37:04 -0700 | [diff] [blame] | 853 | { | 
 | 854 | 	/* apply_to_page_range() does all the hard work. */ | 
 | 855 | 	return 0; | 
 | 856 | } | 
 | 857 |  | 
 | 858 | /** | 
 | 859 |  *	alloc_vm_area - allocate a range of kernel address space | 
 | 860 |  *	@size:		size of the area | 
| Randy Dunlap | 7682486 | 2008-03-19 17:00:40 -0700 | [diff] [blame] | 861 |  * | 
 | 862 |  *	Returns:	NULL on failure, vm_struct on success | 
| Jeremy Fitzhardinge | 5f4352f | 2007-07-17 18:37:04 -0700 | [diff] [blame] | 863 |  * | 
 | 864 |  *	This function reserves a range of kernel address space, and | 
 | 865 |  *	allocates pagetables to map that range.  No actual mappings | 
 | 866 |  *	are created.  If the kernel address space is not shared | 
 | 867 |  *	between processes, it syncs the pagetable across all | 
 | 868 |  *	processes. | 
 | 869 |  */ | 
 | 870 | struct vm_struct *alloc_vm_area(size_t size) | 
 | 871 | { | 
 | 872 | 	struct vm_struct *area; | 
 | 873 |  | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 874 | 	area = get_vm_area_caller(size, VM_IOREMAP, | 
 | 875 | 				__builtin_return_address(0)); | 
| Jeremy Fitzhardinge | 5f4352f | 2007-07-17 18:37:04 -0700 | [diff] [blame] | 876 | 	if (area == NULL) | 
 | 877 | 		return NULL; | 
 | 878 |  | 
 | 879 | 	/* | 
 | 880 | 	 * This ensures that page tables are constructed for this region | 
 | 881 | 	 * of kernel virtual address space and mapped into init_mm. | 
 | 882 | 	 */ | 
 | 883 | 	if (apply_to_page_range(&init_mm, (unsigned long)area->addr, | 
 | 884 | 				area->size, f, NULL)) { | 
 | 885 | 		free_vm_area(area); | 
 | 886 | 		return NULL; | 
 | 887 | 	} | 
 | 888 |  | 
 | 889 | 	/* Make sure the pagetables are constructed in process kernel | 
 | 890 | 	   mappings */ | 
 | 891 | 	vmalloc_sync_all(); | 
 | 892 |  | 
 | 893 | 	return area; | 
 | 894 | } | 
 | 895 | EXPORT_SYMBOL_GPL(alloc_vm_area); | 
 | 896 |  | 
 | 897 | void free_vm_area(struct vm_struct *area) | 
 | 898 | { | 
 | 899 | 	struct vm_struct *ret; | 
 | 900 | 	ret = remove_vm_area(area->addr); | 
 | 901 | 	BUG_ON(ret != area); | 
 | 902 | 	kfree(area); | 
 | 903 | } | 
 | 904 | EXPORT_SYMBOL_GPL(free_vm_area); | 
| Christoph Lameter | a10aa57 | 2008-04-28 02:12:40 -0700 | [diff] [blame] | 905 |  | 
 | 906 |  | 
 | 907 | #ifdef CONFIG_PROC_FS | 
 | 908 | static void *s_start(struct seq_file *m, loff_t *pos) | 
 | 909 | { | 
 | 910 | 	loff_t n = *pos; | 
 | 911 | 	struct vm_struct *v; | 
 | 912 |  | 
 | 913 | 	read_lock(&vmlist_lock); | 
 | 914 | 	v = vmlist; | 
 | 915 | 	while (n > 0 && v) { | 
 | 916 | 		n--; | 
 | 917 | 		v = v->next; | 
 | 918 | 	} | 
 | 919 | 	if (!n) | 
 | 920 | 		return v; | 
 | 921 |  | 
 | 922 | 	return NULL; | 
 | 923 |  | 
 | 924 | } | 
 | 925 |  | 
 | 926 | static void *s_next(struct seq_file *m, void *p, loff_t *pos) | 
 | 927 | { | 
 | 928 | 	struct vm_struct *v = p; | 
 | 929 |  | 
 | 930 | 	++*pos; | 
 | 931 | 	return v->next; | 
 | 932 | } | 
 | 933 |  | 
 | 934 | static void s_stop(struct seq_file *m, void *p) | 
 | 935 | { | 
 | 936 | 	read_unlock(&vmlist_lock); | 
 | 937 | } | 
 | 938 |  | 
| Eric Dumazet | a47a126 | 2008-07-23 21:27:38 -0700 | [diff] [blame] | 939 | static void show_numa_info(struct seq_file *m, struct vm_struct *v) | 
 | 940 | { | 
 | 941 | 	if (NUMA_BUILD) { | 
 | 942 | 		unsigned int nr, *counters = m->private; | 
 | 943 |  | 
 | 944 | 		if (!counters) | 
 | 945 | 			return; | 
 | 946 |  | 
 | 947 | 		memset(counters, 0, nr_node_ids * sizeof(unsigned int)); | 
 | 948 |  | 
 | 949 | 		for (nr = 0; nr < v->nr_pages; nr++) | 
 | 950 | 			counters[page_to_nid(v->pages[nr])]++; | 
 | 951 |  | 
 | 952 | 		for_each_node_state(nr, N_HIGH_MEMORY) | 
 | 953 | 			if (counters[nr]) | 
 | 954 | 				seq_printf(m, " N%u=%u", nr, counters[nr]); | 
 | 955 | 	} | 
 | 956 | } | 
 | 957 |  | 
| Christoph Lameter | a10aa57 | 2008-04-28 02:12:40 -0700 | [diff] [blame] | 958 | static int s_show(struct seq_file *m, void *p) | 
 | 959 | { | 
 | 960 | 	struct vm_struct *v = p; | 
 | 961 |  | 
 | 962 | 	seq_printf(m, "0x%p-0x%p %7ld", | 
 | 963 | 		v->addr, v->addr + v->size, v->size); | 
 | 964 |  | 
| Christoph Lameter | 2301696 | 2008-04-28 02:12:42 -0700 | [diff] [blame] | 965 | 	if (v->caller) { | 
 | 966 | 		char buff[2 * KSYM_NAME_LEN]; | 
 | 967 |  | 
 | 968 | 		seq_putc(m, ' '); | 
 | 969 | 		sprint_symbol(buff, (unsigned long)v->caller); | 
 | 970 | 		seq_puts(m, buff); | 
 | 971 | 	} | 
 | 972 |  | 
| Christoph Lameter | a10aa57 | 2008-04-28 02:12:40 -0700 | [diff] [blame] | 973 | 	if (v->nr_pages) | 
 | 974 | 		seq_printf(m, " pages=%d", v->nr_pages); | 
 | 975 |  | 
 | 976 | 	if (v->phys_addr) | 
 | 977 | 		seq_printf(m, " phys=%lx", v->phys_addr); | 
 | 978 |  | 
 | 979 | 	if (v->flags & VM_IOREMAP) | 
 | 980 | 		seq_printf(m, " ioremap"); | 
 | 981 |  | 
 | 982 | 	if (v->flags & VM_ALLOC) | 
 | 983 | 		seq_printf(m, " vmalloc"); | 
 | 984 |  | 
 | 985 | 	if (v->flags & VM_MAP) | 
 | 986 | 		seq_printf(m, " vmap"); | 
 | 987 |  | 
 | 988 | 	if (v->flags & VM_USERMAP) | 
 | 989 | 		seq_printf(m, " user"); | 
 | 990 |  | 
 | 991 | 	if (v->flags & VM_VPAGES) | 
 | 992 | 		seq_printf(m, " vpages"); | 
 | 993 |  | 
| Eric Dumazet | a47a126 | 2008-07-23 21:27:38 -0700 | [diff] [blame] | 994 | 	show_numa_info(m, v); | 
| Christoph Lameter | a10aa57 | 2008-04-28 02:12:40 -0700 | [diff] [blame] | 995 | 	seq_putc(m, '\n'); | 
 | 996 | 	return 0; | 
 | 997 | } | 
 | 998 |  | 
 | 999 | const struct seq_operations vmalloc_op = { | 
 | 1000 | 	.start = s_start, | 
 | 1001 | 	.next = s_next, | 
 | 1002 | 	.stop = s_stop, | 
 | 1003 | 	.show = s_show, | 
 | 1004 | }; | 
 | 1005 | #endif | 
 | 1006 |  |