| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* internal.h: mm/ internal definitions | 
 | 2 |  * | 
 | 3 |  * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. | 
 | 4 |  * Written by David Howells (dhowells@redhat.com) | 
 | 5 |  * | 
 | 6 |  * This program is free software; you can redistribute it and/or | 
 | 7 |  * modify it under the terms of the GNU General Public License | 
 | 8 |  * as published by the Free Software Foundation; either version | 
 | 9 |  * 2 of the License, or (at your option) any later version. | 
 | 10 |  */ | 
| Nick Piggin | 0f8053a | 2006-03-22 00:08:33 -0800 | [diff] [blame] | 11 | #ifndef __MM_INTERNAL_H | 
 | 12 | #define __MM_INTERNAL_H | 
 | 13 |  | 
 | 14 | #include <linux/mm.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 15 |  | 
| Jan Beulich | 42b7772 | 2008-07-23 21:27:10 -0700 | [diff] [blame] | 16 | void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, | 
 | 17 | 		unsigned long floor, unsigned long ceiling); | 
 | 18 |  | 
| Nick Piggin | 7835e98 | 2006-03-22 00:08:40 -0800 | [diff] [blame] | 19 | static inline void set_page_count(struct page *page, int v) | 
| Nick Piggin | 77a8a78 | 2006-01-06 00:10:57 -0800 | [diff] [blame] | 20 | { | 
| Nick Piggin | 7835e98 | 2006-03-22 00:08:40 -0800 | [diff] [blame] | 21 | 	atomic_set(&page->_count, v); | 
 | 22 | } | 
 | 23 |  | 
 | 24 | /* | 
 | 25 |  * Turn a non-refcounted page (->_count == 0) into refcounted with | 
 | 26 |  * a count of one. | 
 | 27 |  */ | 
 | 28 | static inline void set_page_refcounted(struct page *page) | 
 | 29 | { | 
| Qi Yong | ae1276b | 2008-02-04 22:29:27 -0800 | [diff] [blame] | 30 | 	VM_BUG_ON(PageTail(page)); | 
| Nick Piggin | 725d704 | 2006-09-25 23:30:55 -0700 | [diff] [blame] | 31 | 	VM_BUG_ON(atomic_read(&page->_count)); | 
| Nick Piggin | 77a8a78 | 2006-01-06 00:10:57 -0800 | [diff] [blame] | 32 | 	set_page_count(page, 1); | 
| Nick Piggin | 77a8a78 | 2006-01-06 00:10:57 -0800 | [diff] [blame] | 33 | } | 
 | 34 |  | 
| Nick Piggin | 0f8053a | 2006-03-22 00:08:33 -0800 | [diff] [blame] | 35 | static inline void __put_page(struct page *page) | 
 | 36 | { | 
 | 37 | 	atomic_dec(&page->_count); | 
 | 38 | } | 
 | 39 |  | 
| Andrea Arcangeli | 70b50f9 | 2011-11-02 13:36:59 -0700 | [diff] [blame] | 40 | static inline void __get_page_tail_foll(struct page *page, | 
 | 41 | 					bool get_page_head) | 
 | 42 | { | 
 | 43 | 	/* | 
 | 44 | 	 * If we're getting a tail page, the elevated page->_count is | 
 | 45 | 	 * required only in the head page and we will elevate the head | 
 | 46 | 	 * page->_count and tail page->_mapcount. | 
 | 47 | 	 * | 
 | 48 | 	 * We elevate page_tail->_mapcount for tail pages to force | 
 | 49 | 	 * page_tail->_count to be zero at all times to avoid getting | 
 | 50 | 	 * false positives from get_page_unless_zero() with | 
 | 51 | 	 * speculative page access (like in | 
 | 52 | 	 * page_cache_get_speculative()) on tail pages. | 
 | 53 | 	 */ | 
 | 54 | 	VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0); | 
 | 55 | 	VM_BUG_ON(atomic_read(&page->_count) != 0); | 
 | 56 | 	VM_BUG_ON(page_mapcount(page) < 0); | 
 | 57 | 	if (get_page_head) | 
 | 58 | 		atomic_inc(&page->first_page->_count); | 
 | 59 | 	atomic_inc(&page->_mapcount); | 
 | 60 | } | 
 | 61 |  | 
 | 62 | /* | 
 | 63 |  * This is meant to be called as the FOLL_GET operation of | 
 | 64 |  * follow_page() and it must be called while holding the proper PT | 
 | 65 |  * lock while the pte (or pmd_trans_huge) is still mapping the page. | 
 | 66 |  */ | 
 | 67 | static inline void get_page_foll(struct page *page) | 
 | 68 | { | 
 | 69 | 	if (unlikely(PageTail(page))) | 
 | 70 | 		/* | 
 | 71 | 		 * This is safe only because | 
 | 72 | 		 * __split_huge_page_refcount() can't run under | 
 | 73 | 		 * get_page_foll() because we hold the proper PT lock. | 
 | 74 | 		 */ | 
 | 75 | 		__get_page_tail_foll(page, true); | 
 | 76 | 	else { | 
 | 77 | 		/* | 
 | 78 | 		 * Getting a normal page or the head of a compound page | 
 | 79 | 		 * requires to already have an elevated page->_count. | 
 | 80 | 		 */ | 
 | 81 | 		VM_BUG_ON(atomic_read(&page->_count) <= 0); | 
 | 82 | 		atomic_inc(&page->_count); | 
 | 83 | 	} | 
 | 84 | } | 
 | 85 |  | 
| Hugh Dickins | 03f6462 | 2009-09-21 17:03:35 -0700 | [diff] [blame] | 86 | extern unsigned long highest_memmap_pfn; | 
 | 87 |  | 
| Lee Schermerhorn | 894bc31 | 2008-10-18 20:26:39 -0700 | [diff] [blame] | 88 | /* | 
 | 89 |  * in mm/vmscan.c: | 
 | 90 |  */ | 
| Nick Piggin | 62695a8 | 2008-10-18 20:26:09 -0700 | [diff] [blame] | 91 | extern int isolate_lru_page(struct page *page); | 
| Lee Schermerhorn | 894bc31 | 2008-10-18 20:26:39 -0700 | [diff] [blame] | 92 | extern void putback_lru_page(struct page *page); | 
| Nick Piggin | 62695a8 | 2008-10-18 20:26:09 -0700 | [diff] [blame] | 93 |  | 
| Lee Schermerhorn | 894bc31 | 2008-10-18 20:26:39 -0700 | [diff] [blame] | 94 | /* | 
 | 95 |  * in mm/page_alloc.c | 
 | 96 |  */ | 
| Yasunori Goto | 0c0a4a5 | 2008-04-28 02:13:34 -0700 | [diff] [blame] | 97 | extern void __free_pages_bootmem(struct page *page, unsigned int order); | 
| Wu Fengguang | 20a0307 | 2009-06-16 15:32:22 -0700 | [diff] [blame] | 98 | extern void prep_compound_page(struct page *page, unsigned long order); | 
| Wu Fengguang | 8d22ba1 | 2009-12-16 12:19:58 +0100 | [diff] [blame] | 99 | #ifdef CONFIG_MEMORY_FAILURE | 
 | 100 | extern bool is_free_buddy_page(struct page *page); | 
 | 101 | #endif | 
| Wu Fengguang | 20a0307 | 2009-06-16 15:32:22 -0700 | [diff] [blame] | 102 |  | 
| Nick Piggin | 0f8053a | 2006-03-22 00:08:33 -0800 | [diff] [blame] | 103 |  | 
| Mel Gorman | 48f13bf | 2007-10-16 01:26:10 -0700 | [diff] [blame] | 104 | /* | 
 | 105 |  * function for dealing with page's order in buddy system. | 
 | 106 |  * zone->lock is already acquired when we use these. | 
 | 107 |  * So, we don't need atomic page->flags operations here. | 
 | 108 |  */ | 
 | 109 | static inline unsigned long page_order(struct page *page) | 
 | 110 | { | 
| KAMEZAWA Hiroyuki | 572438f | 2010-10-26 14:22:08 -0700 | [diff] [blame] | 111 | 	/* PageBuddy() must be checked by the caller */ | 
| Mel Gorman | 48f13bf | 2007-10-16 01:26:10 -0700 | [diff] [blame] | 112 | 	return page_private(page); | 
 | 113 | } | 
| Alexander van Heukelum | b5a0e01 | 2008-02-23 15:24:06 -0800 | [diff] [blame] | 114 |  | 
| Namhyung Kim | 6038def | 2011-05-24 17:11:22 -0700 | [diff] [blame] | 115 | /* mm/util.c */ | 
 | 116 | void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, | 
 | 117 | 		struct vm_area_struct *prev, struct rb_node *rb_parent); | 
 | 118 |  | 
| Hugh Dickins | af8e335 | 2009-12-14 17:58:59 -0800 | [diff] [blame] | 119 | #ifdef CONFIG_MMU | 
 | 120 | extern long mlock_vma_pages_range(struct vm_area_struct *vma, | 
 | 121 | 			unsigned long start, unsigned long end); | 
 | 122 | extern void munlock_vma_pages_range(struct vm_area_struct *vma, | 
 | 123 | 			unsigned long start, unsigned long end); | 
 | 124 | static inline void munlock_vma_pages_all(struct vm_area_struct *vma) | 
 | 125 | { | 
 | 126 | 	munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end); | 
 | 127 | } | 
 | 128 |  | 
| Nick Piggin | b291f00 | 2008-10-18 20:26:44 -0700 | [diff] [blame] | 129 | /* | 
 | 130 |  * Called only in fault path via page_evictable() for a new page | 
 | 131 |  * to determine if it's being mapped into a LOCKED vma. | 
 | 132 |  * If so, mark page as mlocked. | 
 | 133 |  */ | 
 | 134 | static inline int is_mlocked_vma(struct vm_area_struct *vma, struct page *page) | 
 | 135 | { | 
 | 136 | 	VM_BUG_ON(PageLRU(page)); | 
 | 137 |  | 
 | 138 | 	if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) | 
 | 139 | 		return 0; | 
 | 140 |  | 
| Nick Piggin | 5344b7e | 2008-10-18 20:26:51 -0700 | [diff] [blame] | 141 | 	if (!TestSetPageMlocked(page)) { | 
 | 142 | 		inc_zone_page_state(page, NR_MLOCK); | 
 | 143 | 		count_vm_event(UNEVICTABLE_PGMLOCKED); | 
 | 144 | 	} | 
| Nick Piggin | b291f00 | 2008-10-18 20:26:44 -0700 | [diff] [blame] | 145 | 	return 1; | 
 | 146 | } | 
 | 147 |  | 
 | 148 | /* | 
| Hugh Dickins | 73848b4 | 2009-12-14 17:59:22 -0800 | [diff] [blame] | 149 |  * must be called with vma's mmap_sem held for read or write, and page locked. | 
| Nick Piggin | b291f00 | 2008-10-18 20:26:44 -0700 | [diff] [blame] | 150 |  */ | 
 | 151 | extern void mlock_vma_page(struct page *page); | 
| Hugh Dickins | 73848b4 | 2009-12-14 17:59:22 -0800 | [diff] [blame] | 152 | extern void munlock_vma_page(struct page *page); | 
| Nick Piggin | b291f00 | 2008-10-18 20:26:44 -0700 | [diff] [blame] | 153 |  | 
 | 154 | /* | 
 | 155 |  * Clear the page's PageMlocked().  This can be useful in a situation where | 
 | 156 |  * we want to unconditionally remove a page from the pagecache -- e.g., | 
 | 157 |  * on truncation or freeing. | 
 | 158 |  * | 
 | 159 |  * It is legal to call this function for any page, mlocked or not. | 
 | 160 |  * If called for a page that is still mapped by mlocked vmas, all we do | 
 | 161 |  * is revert to lazy LRU behaviour -- semantics are not broken. | 
 | 162 |  */ | 
 | 163 | extern void __clear_page_mlock(struct page *page); | 
 | 164 | static inline void clear_page_mlock(struct page *page) | 
 | 165 | { | 
 | 166 | 	if (unlikely(TestClearPageMlocked(page))) | 
 | 167 | 		__clear_page_mlock(page); | 
 | 168 | } | 
 | 169 |  | 
 | 170 | /* | 
 | 171 |  * mlock_migrate_page - called only from migrate_page_copy() to | 
| Nick Piggin | 5344b7e | 2008-10-18 20:26:51 -0700 | [diff] [blame] | 172 |  * migrate the Mlocked page flag; update statistics. | 
| Nick Piggin | b291f00 | 2008-10-18 20:26:44 -0700 | [diff] [blame] | 173 |  */ | 
 | 174 | static inline void mlock_migrate_page(struct page *newpage, struct page *page) | 
 | 175 | { | 
| Nick Piggin | 5344b7e | 2008-10-18 20:26:51 -0700 | [diff] [blame] | 176 | 	if (TestClearPageMlocked(page)) { | 
 | 177 | 		unsigned long flags; | 
 | 178 |  | 
 | 179 | 		local_irq_save(flags); | 
 | 180 | 		__dec_zone_page_state(page, NR_MLOCK); | 
| Nick Piggin | b291f00 | 2008-10-18 20:26:44 -0700 | [diff] [blame] | 181 | 		SetPageMlocked(newpage); | 
| Nick Piggin | 5344b7e | 2008-10-18 20:26:51 -0700 | [diff] [blame] | 182 | 		__inc_zone_page_state(newpage, NR_MLOCK); | 
 | 183 | 		local_irq_restore(flags); | 
 | 184 | 	} | 
| Nick Piggin | b291f00 | 2008-10-18 20:26:44 -0700 | [diff] [blame] | 185 | } | 
 | 186 |  | 
| Andrea Arcangeli | 71e3aac | 2011-01-13 15:46:52 -0800 | [diff] [blame] | 187 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 
 | 188 | extern unsigned long vma_address(struct page *page, | 
 | 189 | 				 struct vm_area_struct *vma); | 
 | 190 | #endif | 
| Hugh Dickins | af8e335 | 2009-12-14 17:58:59 -0800 | [diff] [blame] | 191 | #else /* !CONFIG_MMU */ | 
| Nick Piggin | b291f00 | 2008-10-18 20:26:44 -0700 | [diff] [blame] | 192 | static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p) | 
 | 193 | { | 
 | 194 | 	return 0; | 
 | 195 | } | 
 | 196 | static inline void clear_page_mlock(struct page *page) { } | 
 | 197 | static inline void mlock_vma_page(struct page *page) { } | 
 | 198 | static inline void mlock_migrate_page(struct page *new, struct page *old) { } | 
 | 199 |  | 
| Hugh Dickins | af8e335 | 2009-12-14 17:58:59 -0800 | [diff] [blame] | 200 | #endif /* !CONFIG_MMU */ | 
| Lee Schermerhorn | 894bc31 | 2008-10-18 20:26:39 -0700 | [diff] [blame] | 201 |  | 
| Alexander van Heukelum | b5a0e01 | 2008-02-23 15:24:06 -0800 | [diff] [blame] | 202 | /* | 
| Andy Whitcroft | 69d177c | 2008-11-06 12:53:26 -0800 | [diff] [blame] | 203 |  * Return the mem_map entry representing the 'offset' subpage within | 
 | 204 |  * the maximally aligned gigantic page 'base'.  Handle any discontiguity | 
 | 205 |  * in the mem_map at MAX_ORDER_NR_PAGES boundaries. | 
 | 206 |  */ | 
 | 207 | static inline struct page *mem_map_offset(struct page *base, int offset) | 
 | 208 | { | 
 | 209 | 	if (unlikely(offset >= MAX_ORDER_NR_PAGES)) | 
 | 210 | 		return pfn_to_page(page_to_pfn(base) + offset); | 
 | 211 | 	return base + offset; | 
 | 212 | } | 
 | 213 |  | 
 | 214 | /* | 
| Lucas De Marchi | 25985ed | 2011-03-30 22:57:33 -0300 | [diff] [blame] | 215 |  * Iterator over all subpages within the maximally aligned gigantic | 
| Andy Whitcroft | 69d177c | 2008-11-06 12:53:26 -0800 | [diff] [blame] | 216 |  * page 'base'.  Handle any discontiguity in the mem_map. | 
 | 217 |  */ | 
 | 218 | static inline struct page *mem_map_next(struct page *iter, | 
 | 219 | 						struct page *base, int offset) | 
 | 220 | { | 
 | 221 | 	if (unlikely((offset & (MAX_ORDER_NR_PAGES - 1)) == 0)) { | 
 | 222 | 		unsigned long pfn = page_to_pfn(base) + offset; | 
 | 223 | 		if (!pfn_valid(pfn)) | 
 | 224 | 			return NULL; | 
 | 225 | 		return pfn_to_page(pfn); | 
 | 226 | 	} | 
 | 227 | 	return iter + 1; | 
 | 228 | } | 
 | 229 |  | 
 | 230 | /* | 
| Alexander van Heukelum | b5a0e01 | 2008-02-23 15:24:06 -0800 | [diff] [blame] | 231 |  * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, | 
 | 232 |  * so all functions starting at paging_init should be marked __init | 
 | 233 |  * in those cases. SPARSEMEM, however, allows for memory hotplug, | 
 | 234 |  * and alloc_bootmem_node is not used. | 
 | 235 |  */ | 
 | 236 | #ifdef CONFIG_SPARSEMEM | 
 | 237 | #define __paginginit __meminit | 
 | 238 | #else | 
 | 239 | #define __paginginit __init | 
 | 240 | #endif | 
 | 241 |  | 
| Mel Gorman | 6b74ab9 | 2008-07-23 21:26:49 -0700 | [diff] [blame] | 242 | /* Memory initialisation debug and verification */ | 
 | 243 | enum mminit_level { | 
 | 244 | 	MMINIT_WARNING, | 
 | 245 | 	MMINIT_VERIFY, | 
 | 246 | 	MMINIT_TRACE | 
 | 247 | }; | 
 | 248 |  | 
 | 249 | #ifdef CONFIG_DEBUG_MEMORY_INIT | 
 | 250 |  | 
 | 251 | extern int mminit_loglevel; | 
 | 252 |  | 
 | 253 | #define mminit_dprintk(level, prefix, fmt, arg...) \ | 
 | 254 | do { \ | 
 | 255 | 	if (level < mminit_loglevel) { \ | 
 | 256 | 		printk(level <= MMINIT_WARNING ? KERN_WARNING : KERN_DEBUG); \ | 
 | 257 | 		printk(KERN_CONT "mminit::" prefix " " fmt, ##arg); \ | 
 | 258 | 	} \ | 
 | 259 | } while (0) | 
 | 260 |  | 
| Mel Gorman | 708614e | 2008-07-23 21:26:51 -0700 | [diff] [blame] | 261 | extern void mminit_verify_pageflags_layout(void); | 
 | 262 | extern void mminit_verify_page_links(struct page *page, | 
 | 263 | 		enum zone_type zone, unsigned long nid, unsigned long pfn); | 
| Mel Gorman | 68ad8df | 2008-07-23 21:26:52 -0700 | [diff] [blame] | 264 | extern void mminit_verify_zonelist(void); | 
| Mel Gorman | 708614e | 2008-07-23 21:26:51 -0700 | [diff] [blame] | 265 |  | 
| Mel Gorman | 6b74ab9 | 2008-07-23 21:26:49 -0700 | [diff] [blame] | 266 | #else | 
 | 267 |  | 
 | 268 | static inline void mminit_dprintk(enum mminit_level level, | 
 | 269 | 				const char *prefix, const char *fmt, ...) | 
 | 270 | { | 
 | 271 | } | 
 | 272 |  | 
| Mel Gorman | 708614e | 2008-07-23 21:26:51 -0700 | [diff] [blame] | 273 | static inline void mminit_verify_pageflags_layout(void) | 
 | 274 | { | 
 | 275 | } | 
 | 276 |  | 
 | 277 | static inline void mminit_verify_page_links(struct page *page, | 
 | 278 | 		enum zone_type zone, unsigned long nid, unsigned long pfn) | 
 | 279 | { | 
 | 280 | } | 
| Mel Gorman | 68ad8df | 2008-07-23 21:26:52 -0700 | [diff] [blame] | 281 |  | 
 | 282 | static inline void mminit_verify_zonelist(void) | 
 | 283 | { | 
 | 284 | } | 
| Mel Gorman | 6b74ab9 | 2008-07-23 21:26:49 -0700 | [diff] [blame] | 285 | #endif /* CONFIG_DEBUG_MEMORY_INIT */ | 
| Mel Gorman | 2dbb51c | 2008-07-23 21:26:52 -0700 | [diff] [blame] | 286 |  | 
 | 287 | /* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */ | 
 | 288 | #if defined(CONFIG_SPARSEMEM) | 
 | 289 | extern void mminit_validate_memmodel_limits(unsigned long *start_pfn, | 
 | 290 | 				unsigned long *end_pfn); | 
 | 291 | #else | 
 | 292 | static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn, | 
 | 293 | 				unsigned long *end_pfn) | 
 | 294 | { | 
 | 295 | } | 
 | 296 | #endif /* CONFIG_SPARSEMEM */ | 
 | 297 |  | 
| Mel Gorman | fa5e084 | 2009-06-16 15:33:22 -0700 | [diff] [blame] | 298 | #define ZONE_RECLAIM_NOSCAN	-2 | 
 | 299 | #define ZONE_RECLAIM_FULL	-1 | 
 | 300 | #define ZONE_RECLAIM_SOME	0 | 
 | 301 | #define ZONE_RECLAIM_SUCCESS	1 | 
| Nick Piggin | 0f8053a | 2006-03-22 00:08:33 -0800 | [diff] [blame] | 302 | #endif | 
| Wu Fengguang | 7c116f2 | 2009-12-16 12:19:59 +0100 | [diff] [blame] | 303 |  | 
| Wu Fengguang | 31d3d34 | 2009-12-16 12:19:59 +0100 | [diff] [blame] | 304 | extern int hwpoison_filter(struct page *p); | 
 | 305 |  | 
| Wu Fengguang | 7c116f2 | 2009-12-16 12:19:59 +0100 | [diff] [blame] | 306 | extern u32 hwpoison_filter_dev_major; | 
 | 307 | extern u32 hwpoison_filter_dev_minor; | 
| Wu Fengguang | 478c5ff | 2009-12-16 12:19:59 +0100 | [diff] [blame] | 308 | extern u64 hwpoison_filter_flags_mask; | 
 | 309 | extern u64 hwpoison_filter_flags_value; | 
| Andi Kleen | 4fd466e | 2009-12-16 12:19:59 +0100 | [diff] [blame] | 310 | extern u64 hwpoison_filter_memcg; | 
| Haicheng Li | 1bfe5fe | 2009-12-16 12:19:59 +0100 | [diff] [blame] | 311 | extern u32 hwpoison_filter_enable; |