| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* internal.h: mm/ internal definitions | 
|  | 2 | * | 
|  | 3 | * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. | 
|  | 4 | * Written by David Howells (dhowells@redhat.com) | 
|  | 5 | * | 
|  | 6 | * This program is free software; you can redistribute it and/or | 
|  | 7 | * modify it under the terms of the GNU General Public License | 
|  | 8 | * as published by the Free Software Foundation; either version | 
|  | 9 | * 2 of the License, or (at your option) any later version. | 
|  | 10 | */ | 
| Nick Piggin | 0f8053a | 2006-03-22 00:08:33 -0800 | [diff] [blame] | 11 | #ifndef __MM_INTERNAL_H | 
|  | 12 | #define __MM_INTERNAL_H | 
|  | 13 |  | 
|  | 14 | #include <linux/mm.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 15 |  | 
| Jan Beulich | 42b7772 | 2008-07-23 21:27:10 -0700 | [diff] [blame] | 16 | void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, | 
|  | 17 | unsigned long floor, unsigned long ceiling); | 
|  | 18 |  | 
| Nick Piggin | 7835e98 | 2006-03-22 00:08:40 -0800 | [diff] [blame] | 19 | static inline void set_page_count(struct page *page, int v) | 
| Nick Piggin | 77a8a78 | 2006-01-06 00:10:57 -0800 | [diff] [blame] | 20 | { | 
| Nick Piggin | 7835e98 | 2006-03-22 00:08:40 -0800 | [diff] [blame] | 21 | atomic_set(&page->_count, v); | 
|  | 22 | } | 
|  | 23 |  | 
|  | 24 | /* | 
|  | 25 | * Turn a non-refcounted page (->_count == 0) into refcounted with | 
|  | 26 | * a count of one. | 
|  | 27 | */ | 
|  | 28 | static inline void set_page_refcounted(struct page *page) | 
|  | 29 | { | 
| Qi Yong | ae1276b | 2008-02-04 22:29:27 -0800 | [diff] [blame] | 30 | VM_BUG_ON(PageTail(page)); | 
| Nick Piggin | 725d704 | 2006-09-25 23:30:55 -0700 | [diff] [blame] | 31 | VM_BUG_ON(atomic_read(&page->_count)); | 
| Nick Piggin | 77a8a78 | 2006-01-06 00:10:57 -0800 | [diff] [blame] | 32 | set_page_count(page, 1); | 
| Nick Piggin | 77a8a78 | 2006-01-06 00:10:57 -0800 | [diff] [blame] | 33 | } | 
|  | 34 |  | 
| Nick Piggin | 0f8053a | 2006-03-22 00:08:33 -0800 | [diff] [blame] | 35 | static inline void __put_page(struct page *page) | 
|  | 36 | { | 
|  | 37 | atomic_dec(&page->_count); | 
|  | 38 | } | 
|  | 39 |  | 
| Andrea Arcangeli | 68fe9d9 | 2011-11-02 13:36:59 -0700 | [diff] [blame] | 40 | static inline void __get_page_tail_foll(struct page *page, | 
|  | 41 | bool get_page_head) | 
|  | 42 | { | 
|  | 43 | /* | 
|  | 44 | * If we're getting a tail page, the elevated page->_count is | 
|  | 45 | * required only in the head page and we will elevate the head | 
|  | 46 | * page->_count and tail page->_mapcount. | 
|  | 47 | * | 
|  | 48 | * We elevate page_tail->_mapcount for tail pages to force | 
|  | 49 | * page_tail->_count to be zero at all times to avoid getting | 
|  | 50 | * false positives from get_page_unless_zero() with | 
|  | 51 | * speculative page access (like in | 
|  | 52 | * page_cache_get_speculative()) on tail pages. | 
|  | 53 | */ | 
|  | 54 | VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0); | 
|  | 55 | VM_BUG_ON(atomic_read(&page->_count) != 0); | 
|  | 56 | VM_BUG_ON(page_mapcount(page) < 0); | 
|  | 57 | if (get_page_head) | 
|  | 58 | atomic_inc(&page->first_page->_count); | 
|  | 59 | atomic_inc(&page->_mapcount); | 
|  | 60 | } | 
|  | 61 |  | 
|  | 62 | /* | 
|  | 63 | * This is meant to be called as the FOLL_GET operation of | 
|  | 64 | * follow_page() and it must be called while holding the proper PT | 
|  | 65 | * lock while the pte (or pmd_trans_huge) is still mapping the page. | 
|  | 66 | */ | 
|  | 67 | static inline void get_page_foll(struct page *page) | 
|  | 68 | { | 
|  | 69 | if (unlikely(PageTail(page))) | 
|  | 70 | /* | 
|  | 71 | * This is safe only because | 
|  | 72 | * __split_huge_page_refcount() can't run under | 
|  | 73 | * get_page_foll() because we hold the proper PT lock. | 
|  | 74 | */ | 
|  | 75 | __get_page_tail_foll(page, true); | 
|  | 76 | else { | 
|  | 77 | /* | 
|  | 78 | * Getting a normal page or the head of a compound page | 
|  | 79 | * requires to already have an elevated page->_count. | 
|  | 80 | */ | 
|  | 81 | VM_BUG_ON(atomic_read(&page->_count) <= 0); | 
|  | 82 | atomic_inc(&page->_count); | 
|  | 83 | } | 
|  | 84 | } | 
|  | 85 |  | 
| Hugh Dickins | 03f6462 | 2009-09-21 17:03:35 -0700 | [diff] [blame] | 86 | extern unsigned long highest_memmap_pfn; | 
|  | 87 |  | 
| Lee Schermerhorn | 894bc31 | 2008-10-18 20:26:39 -0700 | [diff] [blame] | 88 | /* | 
|  | 89 | * in mm/vmscan.c: | 
|  | 90 | */ | 
| Nick Piggin | 62695a8 | 2008-10-18 20:26:09 -0700 | [diff] [blame] | 91 | extern int isolate_lru_page(struct page *page); | 
| Lee Schermerhorn | 894bc31 | 2008-10-18 20:26:39 -0700 | [diff] [blame] | 92 | extern void putback_lru_page(struct page *page); | 
| Nick Piggin | 62695a8 | 2008-10-18 20:26:09 -0700 | [diff] [blame] | 93 |  | 
| Lee Schermerhorn | 894bc31 | 2008-10-18 20:26:39 -0700 | [diff] [blame] | 94 | /* | 
|  | 95 | * in mm/page_alloc.c | 
|  | 96 | */ | 
| Yasunori Goto | 0c0a4a5 | 2008-04-28 02:13:34 -0700 | [diff] [blame] | 97 | extern void __free_pages_bootmem(struct page *page, unsigned int order); | 
| Wu Fengguang | 20a0307 | 2009-06-16 15:32:22 -0700 | [diff] [blame] | 98 | extern void prep_compound_page(struct page *page, unsigned long order); | 
| Wu Fengguang | 8d22ba1 | 2009-12-16 12:19:58 +0100 | [diff] [blame] | 99 | #ifdef CONFIG_MEMORY_FAILURE | 
|  | 100 | extern bool is_free_buddy_page(struct page *page); | 
|  | 101 | #endif | 
| Wu Fengguang | 20a0307 | 2009-06-16 15:32:22 -0700 | [diff] [blame] | 102 |  | 
| Nick Piggin | 0f8053a | 2006-03-22 00:08:33 -0800 | [diff] [blame] | 103 |  | 
| Mel Gorman | 48f13bf | 2007-10-16 01:26:10 -0700 | [diff] [blame] | 104 | /* | 
|  | 105 | * function for dealing with page's order in buddy system. | 
|  | 106 | * zone->lock is already acquired when we use these. | 
|  | 107 | * So, we don't need atomic page->flags operations here. | 
|  | 108 | */ | 
|  | 109 | static inline unsigned long page_order(struct page *page) | 
|  | 110 | { | 
| KAMEZAWA Hiroyuki | 572438f | 2010-10-26 14:22:08 -0700 | [diff] [blame] | 111 | /* PageBuddy() must be checked by the caller */ | 
| Mel Gorman | 48f13bf | 2007-10-16 01:26:10 -0700 | [diff] [blame] | 112 | return page_private(page); | 
|  | 113 | } | 
| Alexander van Heukelum | b5a0e01 | 2008-02-23 15:24:06 -0800 | [diff] [blame] | 114 |  | 
| Namhyung Kim | 6038def | 2011-05-24 17:11:22 -0700 | [diff] [blame] | 115 | /* mm/util.c */ | 
|  | 116 | void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, | 
|  | 117 | struct vm_area_struct *prev, struct rb_node *rb_parent); | 
|  | 118 |  | 
| Hugh Dickins | af8e335 | 2009-12-14 17:58:59 -0800 | [diff] [blame] | 119 | #ifdef CONFIG_MMU | 
|  | 120 | extern long mlock_vma_pages_range(struct vm_area_struct *vma, | 
|  | 121 | unsigned long start, unsigned long end); | 
|  | 122 | extern void munlock_vma_pages_range(struct vm_area_struct *vma, | 
|  | 123 | unsigned long start, unsigned long end); | 
|  | 124 | static inline void munlock_vma_pages_all(struct vm_area_struct *vma) | 
|  | 125 | { | 
|  | 126 | munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end); | 
|  | 127 | } | 
|  | 128 |  | 
| Nick Piggin | b291f00 | 2008-10-18 20:26:44 -0700 | [diff] [blame] | 129 | /* | 
|  | 130 | * Called only in fault path via page_evictable() for a new page | 
|  | 131 | * to determine if it's being mapped into a LOCKED vma. | 
|  | 132 | * If so, mark page as mlocked. | 
|  | 133 | */ | 
|  | 134 | static inline int is_mlocked_vma(struct vm_area_struct *vma, struct page *page) | 
|  | 135 | { | 
|  | 136 | VM_BUG_ON(PageLRU(page)); | 
|  | 137 |  | 
|  | 138 | if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) | 
|  | 139 | return 0; | 
|  | 140 |  | 
| Nick Piggin | 5344b7e | 2008-10-18 20:26:51 -0700 | [diff] [blame] | 141 | if (!TestSetPageMlocked(page)) { | 
|  | 142 | inc_zone_page_state(page, NR_MLOCK); | 
|  | 143 | count_vm_event(UNEVICTABLE_PGMLOCKED); | 
|  | 144 | } | 
| Nick Piggin | b291f00 | 2008-10-18 20:26:44 -0700 | [diff] [blame] | 145 | return 1; | 
|  | 146 | } | 
|  | 147 |  | 
|  | 148 | /* | 
| Hugh Dickins | 73848b4 | 2009-12-14 17:59:22 -0800 | [diff] [blame] | 149 | * must be called with vma's mmap_sem held for read or write, and page locked. | 
| Nick Piggin | b291f00 | 2008-10-18 20:26:44 -0700 | [diff] [blame] | 150 | */ | 
|  | 151 | extern void mlock_vma_page(struct page *page); | 
| Hugh Dickins | 73848b4 | 2009-12-14 17:59:22 -0800 | [diff] [blame] | 152 | extern void munlock_vma_page(struct page *page); | 
| Nick Piggin | b291f00 | 2008-10-18 20:26:44 -0700 | [diff] [blame] | 153 |  | 
|  | 154 | /* | 
|  | 155 | * Clear the page's PageMlocked().  This can be useful in a situation where | 
|  | 156 | * we want to unconditionally remove a page from the pagecache -- e.g., | 
|  | 157 | * on truncation or freeing. | 
|  | 158 | * | 
|  | 159 | * It is legal to call this function for any page, mlocked or not. | 
|  | 160 | * If called for a page that is still mapped by mlocked vmas, all we do | 
|  | 161 | * is revert to lazy LRU behaviour -- semantics are not broken. | 
|  | 162 | */ | 
|  | 163 | extern void __clear_page_mlock(struct page *page); | 
|  | 164 | static inline void clear_page_mlock(struct page *page) | 
|  | 165 | { | 
|  | 166 | if (unlikely(TestClearPageMlocked(page))) | 
|  | 167 | __clear_page_mlock(page); | 
|  | 168 | } | 
|  | 169 |  | 
|  | 170 | /* | 
|  | 171 | * mlock_migrate_page - called only from migrate_page_copy() to | 
| Nick Piggin | 5344b7e | 2008-10-18 20:26:51 -0700 | [diff] [blame] | 172 | * migrate the Mlocked page flag; update statistics. | 
| Nick Piggin | b291f00 | 2008-10-18 20:26:44 -0700 | [diff] [blame] | 173 | */ | 
|  | 174 | static inline void mlock_migrate_page(struct page *newpage, struct page *page) | 
|  | 175 | { | 
| Nick Piggin | 5344b7e | 2008-10-18 20:26:51 -0700 | [diff] [blame] | 176 | if (TestClearPageMlocked(page)) { | 
|  | 177 | unsigned long flags; | 
|  | 178 |  | 
|  | 179 | local_irq_save(flags); | 
|  | 180 | __dec_zone_page_state(page, NR_MLOCK); | 
| Nick Piggin | b291f00 | 2008-10-18 20:26:44 -0700 | [diff] [blame] | 181 | SetPageMlocked(newpage); | 
| Nick Piggin | 5344b7e | 2008-10-18 20:26:51 -0700 | [diff] [blame] | 182 | __inc_zone_page_state(newpage, NR_MLOCK); | 
|  | 183 | local_irq_restore(flags); | 
|  | 184 | } | 
| Nick Piggin | b291f00 | 2008-10-18 20:26:44 -0700 | [diff] [blame] | 185 | } | 
|  | 186 |  | 
| Andrea Arcangeli | 71e3aac | 2011-01-13 15:46:52 -0800 | [diff] [blame] | 187 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 
|  | 188 | extern unsigned long vma_address(struct page *page, | 
|  | 189 | struct vm_area_struct *vma); | 
|  | 190 | #endif | 
| Hugh Dickins | af8e335 | 2009-12-14 17:58:59 -0800 | [diff] [blame] | 191 | #else /* !CONFIG_MMU */ | 
| Nick Piggin | b291f00 | 2008-10-18 20:26:44 -0700 | [diff] [blame] | 192 | static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p) | 
|  | 193 | { | 
|  | 194 | return 0; | 
|  | 195 | } | 
|  | 196 | static inline void clear_page_mlock(struct page *page) { } | 
|  | 197 | static inline void mlock_vma_page(struct page *page) { } | 
|  | 198 | static inline void mlock_migrate_page(struct page *new, struct page *old) { } | 
|  | 199 |  | 
| Hugh Dickins | af8e335 | 2009-12-14 17:58:59 -0800 | [diff] [blame] | 200 | #endif /* !CONFIG_MMU */ | 
| Lee Schermerhorn | 894bc31 | 2008-10-18 20:26:39 -0700 | [diff] [blame] | 201 |  | 
| Alexander van Heukelum | b5a0e01 | 2008-02-23 15:24:06 -0800 | [diff] [blame] | 202 | /* | 
| Andy Whitcroft | 69d177c | 2008-11-06 12:53:26 -0800 | [diff] [blame] | 203 | * Return the mem_map entry representing the 'offset' subpage within | 
|  | 204 | * the maximally aligned gigantic page 'base'.  Handle any discontiguity | 
|  | 205 | * in the mem_map at MAX_ORDER_NR_PAGES boundaries. | 
|  | 206 | */ | 
|  | 207 | static inline struct page *mem_map_offset(struct page *base, int offset) | 
|  | 208 | { | 
|  | 209 | if (unlikely(offset >= MAX_ORDER_NR_PAGES)) | 
|  | 210 | return pfn_to_page(page_to_pfn(base) + offset); | 
|  | 211 | return base + offset; | 
|  | 212 | } | 
|  | 213 |  | 
|  | 214 | /* | 
| Lucas De Marchi | 25985ed | 2011-03-30 22:57:33 -0300 | [diff] [blame] | 215 | * Iterator over all subpages within the maximally aligned gigantic | 
| Andy Whitcroft | 69d177c | 2008-11-06 12:53:26 -0800 | [diff] [blame] | 216 | * page 'base'.  Handle any discontiguity in the mem_map. | 
|  | 217 | */ | 
|  | 218 | static inline struct page *mem_map_next(struct page *iter, | 
|  | 219 | struct page *base, int offset) | 
|  | 220 | { | 
|  | 221 | if (unlikely((offset & (MAX_ORDER_NR_PAGES - 1)) == 0)) { | 
|  | 222 | unsigned long pfn = page_to_pfn(base) + offset; | 
|  | 223 | if (!pfn_valid(pfn)) | 
|  | 224 | return NULL; | 
|  | 225 | return pfn_to_page(pfn); | 
|  | 226 | } | 
|  | 227 | return iter + 1; | 
|  | 228 | } | 
|  | 229 |  | 
|  | 230 | /* | 
| Alexander van Heukelum | b5a0e01 | 2008-02-23 15:24:06 -0800 | [diff] [blame] | 231 | * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, | 
|  | 232 | * so all functions starting at paging_init should be marked __init | 
|  | 233 | * in those cases. SPARSEMEM, however, allows for memory hotplug, | 
|  | 234 | * and alloc_bootmem_node is not used. | 
|  | 235 | */ | 
|  | 236 | #ifdef CONFIG_SPARSEMEM | 
|  | 237 | #define __paginginit __meminit | 
|  | 238 | #else | 
|  | 239 | #define __paginginit __init | 
|  | 240 | #endif | 
|  | 241 |  | 
| Mel Gorman | 6b74ab9 | 2008-07-23 21:26:49 -0700 | [diff] [blame] | 242 | /* Memory initialisation debug and verification */ | 
|  | 243 | enum mminit_level { | 
|  | 244 | MMINIT_WARNING, | 
|  | 245 | MMINIT_VERIFY, | 
|  | 246 | MMINIT_TRACE | 
|  | 247 | }; | 
|  | 248 |  | 
|  | 249 | #ifdef CONFIG_DEBUG_MEMORY_INIT | 
|  | 250 |  | 
|  | 251 | extern int mminit_loglevel; | 
|  | 252 |  | 
|  | 253 | #define mminit_dprintk(level, prefix, fmt, arg...) \ | 
|  | 254 | do { \ | 
|  | 255 | if (level < mminit_loglevel) { \ | 
|  | 256 | printk(level <= MMINIT_WARNING ? KERN_WARNING : KERN_DEBUG); \ | 
|  | 257 | printk(KERN_CONT "mminit::" prefix " " fmt, ##arg); \ | 
|  | 258 | } \ | 
|  | 259 | } while (0) | 
|  | 260 |  | 
| Mel Gorman | 708614e | 2008-07-23 21:26:51 -0700 | [diff] [blame] | 261 | extern void mminit_verify_pageflags_layout(void); | 
|  | 262 | extern void mminit_verify_page_links(struct page *page, | 
|  | 263 | enum zone_type zone, unsigned long nid, unsigned long pfn); | 
| Mel Gorman | 68ad8df | 2008-07-23 21:26:52 -0700 | [diff] [blame] | 264 | extern void mminit_verify_zonelist(void); | 
| Mel Gorman | 708614e | 2008-07-23 21:26:51 -0700 | [diff] [blame] | 265 |  | 
| Mel Gorman | 6b74ab9 | 2008-07-23 21:26:49 -0700 | [diff] [blame] | 266 | #else | 
|  | 267 |  | 
|  | 268 | static inline void mminit_dprintk(enum mminit_level level, | 
|  | 269 | const char *prefix, const char *fmt, ...) | 
|  | 270 | { | 
|  | 271 | } | 
|  | 272 |  | 
| Mel Gorman | 708614e | 2008-07-23 21:26:51 -0700 | [diff] [blame] | 273 | static inline void mminit_verify_pageflags_layout(void) | 
|  | 274 | { | 
|  | 275 | } | 
|  | 276 |  | 
|  | 277 | static inline void mminit_verify_page_links(struct page *page, | 
|  | 278 | enum zone_type zone, unsigned long nid, unsigned long pfn) | 
|  | 279 | { | 
|  | 280 | } | 
| Mel Gorman | 68ad8df | 2008-07-23 21:26:52 -0700 | [diff] [blame] | 281 |  | 
|  | 282 | static inline void mminit_verify_zonelist(void) | 
|  | 283 | { | 
|  | 284 | } | 
| Mel Gorman | 6b74ab9 | 2008-07-23 21:26:49 -0700 | [diff] [blame] | 285 | #endif /* CONFIG_DEBUG_MEMORY_INIT */ | 
| Mel Gorman | 2dbb51c | 2008-07-23 21:26:52 -0700 | [diff] [blame] | 286 |  | 
|  | 287 | /* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */ | 
|  | 288 | #if defined(CONFIG_SPARSEMEM) | 
|  | 289 | extern void mminit_validate_memmodel_limits(unsigned long *start_pfn, | 
|  | 290 | unsigned long *end_pfn); | 
|  | 291 | #else | 
|  | 292 | static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn, | 
|  | 293 | unsigned long *end_pfn) | 
|  | 294 | { | 
|  | 295 | } | 
|  | 296 | #endif /* CONFIG_SPARSEMEM */ | 
|  | 297 |  | 
| Mel Gorman | fa5e084 | 2009-06-16 15:33:22 -0700 | [diff] [blame] | 298 | #define ZONE_RECLAIM_NOSCAN	-2 | 
|  | 299 | #define ZONE_RECLAIM_FULL	-1 | 
|  | 300 | #define ZONE_RECLAIM_SOME	0 | 
|  | 301 | #define ZONE_RECLAIM_SUCCESS	1 | 
| Nick Piggin | 0f8053a | 2006-03-22 00:08:33 -0800 | [diff] [blame] | 302 | #endif | 
| Wu Fengguang | 7c116f2 | 2009-12-16 12:19:59 +0100 | [diff] [blame] | 303 |  | 
| Wu Fengguang | 31d3d34 | 2009-12-16 12:19:59 +0100 | [diff] [blame] | 304 | extern int hwpoison_filter(struct page *p); | 
|  | 305 |  | 
| Wu Fengguang | 7c116f2 | 2009-12-16 12:19:59 +0100 | [diff] [blame] | 306 | extern u32 hwpoison_filter_dev_major; | 
|  | 307 | extern u32 hwpoison_filter_dev_minor; | 
| Wu Fengguang | 478c5ff | 2009-12-16 12:19:59 +0100 | [diff] [blame] | 308 | extern u64 hwpoison_filter_flags_mask; | 
|  | 309 | extern u64 hwpoison_filter_flags_value; | 
| Andi Kleen | 4fd466e | 2009-12-16 12:19:59 +0100 | [diff] [blame] | 310 | extern u64 hwpoison_filter_memcg; | 
| Haicheng Li | 1bfe5fe | 2009-12-16 12:19:59 +0100 | [diff] [blame] | 311 | extern u32 hwpoison_filter_enable; |