Blame - mm/migrate.c - android_kernel_htc_msm8960

blob: 09f6e4aa87fc493f1c4c7accf27ab0280d2b7c01 [file] [log] [blame]

Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	1	/*
				2	* Memory Migration functionality - linux/mm/migration.c
				3	*
				4	* Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
				5	*
				6	* Page migration was first developed in the context of the memory hotplug
				7	* project. The main authors of the migration code are:
				8	*
				9	* IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
				10	* Hirokazu Takahashi <taka@valinux.co.jp>
				11	* Dave Hansen <haveblue@us.ibm.com>
				12	* Christoph Lameter <clameter@sgi.com>
				13	*/
				14
				15	#include <linux/migrate.h>
				16	#include <linux/module.h>
				17	#include <linux/swap.h>
				18	#include <linux/pagemap.h>
				19	#include <linux/buffer_head.h> /* for try_to_release_page(),
				20	buffer_heads_over_limit */
				21	#include <linux/mm_inline.h>
				22	#include <linux/pagevec.h>
				23	#include <linux/rmap.h>
				24	#include <linux/topology.h>
				25	#include <linux/cpu.h>
				26	#include <linux/cpuset.h>
				27	#include <linux/swapops.h>
				28
				29	#include "internal.h"
				30
				31	#include "internal.h"
				32
				33	/* The maximum number of pages to take off the LRU for migration */
				34	#define MIGRATE_CHUNK_SIZE 256
				35
				36	#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
				37
				38	/*
				39	* Isolate one page from the LRU lists. If successful put it onto
				40	* the indicated list with elevated page count.
				41	*
				42	* Result:
				43	* -EBUSY: page not on LRU list
				44	* 0: page removed from LRU list and added to the specified list.
				45	*/
				46	int isolate_lru_page(struct page page, struct list_head pagelist)
				47	{
				48	int ret = -EBUSY;
				49
				50	if (PageLRU(page)) {
				51	struct zone *zone = page_zone(page);
				52
				53	spin_lock_irq(&zone->lru_lock);
				54	if (PageLRU(page)) {
				55	ret = 0;
				56	get_page(page);
				57	ClearPageLRU(page);
				58	if (PageActive(page))
				59	del_page_from_active_list(zone, page);
				60	else
				61	del_page_from_inactive_list(zone, page);
				62	list_add_tail(&page->lru, pagelist);
				63	}
				64	spin_unlock_irq(&zone->lru_lock);
				65	}
				66	return ret;
				67	}
				68
				69	/*
				70	* migrate_prep() needs to be called after we have compiled the list of pages
				71	* to be migrated using isolate_lru_page() but before we begin a series of calls
				72	* to migrate_pages().
				73	*/
				74	int migrate_prep(void)
				75	{
				76	/* Must have swap device for migration */
				77	if (nr_swap_pages <= 0)
				78	return -ENODEV;
				79
				80	/*
				81	* Clear the LRU lists so pages can be isolated.
				82	* Note that pages may be moved off the LRU after we have
				83	* drained them. Those pages will fail to migrate like other
				84	* pages that may be busy.
				85	*/
				86	lru_add_drain_all();
				87
				88	return 0;
				89	}
				90
				91	static inline void move_to_lru(struct page *page)
				92	{
				93	list_del(&page->lru);
				94	if (PageActive(page)) {
				95	/*
				96	* lru_cache_add_active checks that
				97	* the PG_active bit is off.
				98	*/
				99	ClearPageActive(page);
				100	lru_cache_add_active(page);
				101	} else {
				102	lru_cache_add(page);
				103	}
				104	put_page(page);
				105	}
				106
				107	/*
				108	* Add isolated pages on the list back to the LRU.
				109	*
				110	* returns the number of pages put back.
				111	*/
				112	int putback_lru_pages(struct list_head *l)
				113	{
				114	struct page *page;
				115	struct page *page2;
				116	int count = 0;
				117
				118	list_for_each_entry_safe(page, page2, l, lru) {
				119	move_to_lru(page);
				120	count++;
				121	}
				122	return count;
				123	}
				124
				125	/*
				126	* Non migratable page
				127	*/
				128	int fail_migrate_page(struct page newpage, struct page page)
				129	{
				130	return -EIO;
				131	}
				132	EXPORT_SYMBOL(fail_migrate_page);
				133
				134	/*
				135	* swapout a single page
				136	* page is locked upon entry, unlocked on exit
				137	*/
				138	static int swap_page(struct page *page)
				139	{
				140	struct address_space *mapping = page_mapping(page);
				141
				142	if (page_mapped(page) && mapping)
				143	if (try_to_unmap(page, 1) != SWAP_SUCCESS)
				144	goto unlock_retry;
				145
				146	if (PageDirty(page)) {
				147	/* Page is dirty, try to write it out here */
				148	switch(pageout(page, mapping)) {
				149	case PAGE_KEEP:
				150	case PAGE_ACTIVATE:
				151	goto unlock_retry;
				152
				153	case PAGE_SUCCESS:
				154	goto retry;
				155
				156	case PAGE_CLEAN:
				157	; /* try to free the page below */
				158	}
				159	}
				160
				161	if (PagePrivate(page)) {
				162	if (!try_to_release_page(page, GFP_KERNEL) \|\|
				163	(!mapping && page_count(page) == 1))
				164	goto unlock_retry;
				165	}
				166
				167	if (remove_mapping(mapping, page)) {
				168	/* Success */
				169	unlock_page(page);
				170	return 0;
				171	}
				172
				173	unlock_retry:
				174	unlock_page(page);
				175
				176	retry:
				177	return -EAGAIN;
				178	}
				179	EXPORT_SYMBOL(swap_page);
				180
				181	/*
				182	* Remove references for a page and establish the new page with the correct
				183	* basic settings to be able to stop accesses to the page.
				184	*/
				185	int migrate_page_remove_references(struct page *newpage,
				186	struct page *page, int nr_refs)
				187	{
				188	struct address_space *mapping = page_mapping(page);
				189	struct page **radix_pointer;
				190
				191	/*
				192	* Avoid doing any of the following work if the page count
				193	* indicates that the page is in use or truncate has removed
				194	* the page.
				195	*/
				196	if (!mapping \|\| page_mapcount(page) + nr_refs != page_count(page))
				197	return -EAGAIN;
				198
				199	/*
				200	* Establish swap ptes for anonymous pages or destroy pte
				201	* maps for files.
				202	*
				203	* In order to reestablish file backed mappings the fault handlers
				204	* will take the radix tree_lock which may then be used to stop
				205	* processses from accessing this page until the new page is ready.
				206	*
				207	* A process accessing via a swap pte (an anonymous page) will take a
				208	* page_lock on the old page which will block the process until the
				209	* migration attempt is complete. At that time the PageSwapCache bit
				210	* will be examined. If the page was migrated then the PageSwapCache
				211	* bit will be clear and the operation to retrieve the page will be
				212	* retried which will find the new page in the radix tree. Then a new
				213	* direct mapping may be generated based on the radix tree contents.
				214	*
				215	* If the page was not migrated then the PageSwapCache bit
				216	* is still set and the operation may continue.
				217	*/
				218	if (try_to_unmap(page, 1) == SWAP_FAIL)
				219	/* A vma has VM_LOCKED set -> permanent failure */
				220	return -EPERM;
				221
				222	/*
				223	* Give up if we were unable to remove all mappings.
				224	*/
				225	if (page_mapcount(page))
				226	return -EAGAIN;
				227
				228	write_lock_irq(&mapping->tree_lock);
				229
				230	radix_pointer = (struct page **)radix_tree_lookup_slot(
				231	&mapping->page_tree,
				232	page_index(page));
				233
				234	if (!page_mapping(page) \|\| page_count(page) != nr_refs \|\|
				235	*radix_pointer != page) {
				236	write_unlock_irq(&mapping->tree_lock);
				237	return 1;
				238	}
				239
				240	/*
				241	* Now we know that no one else is looking at the page.
				242	*
				243	* Certain minimal information about a page must be available
				244	* in order for other subsystems to properly handle the page if they
				245	* find it through the radix tree update before we are finished
				246	* copying the page.
				247	*/
				248	get_page(newpage);
				249	newpage->index = page->index;
				250	newpage->mapping = page->mapping;
				251	if (PageSwapCache(page)) {
				252	SetPageSwapCache(newpage);
				253	set_page_private(newpage, page_private(page));
				254	}
				255
				256	*radix_pointer = newpage;
				257	__put_page(page);
				258	write_unlock_irq(&mapping->tree_lock);
				259
				260	return 0;
				261	}
				262	EXPORT_SYMBOL(migrate_page_remove_references);
				263
				264	/*
				265	* Copy the page to its new location
				266	*/
				267	void migrate_page_copy(struct page newpage, struct page page)
				268	{
				269	copy_highpage(newpage, page);
				270
				271	if (PageError(page))
				272	SetPageError(newpage);
				273	if (PageReferenced(page))
				274	SetPageReferenced(newpage);
				275	if (PageUptodate(page))
				276	SetPageUptodate(newpage);
				277	if (PageActive(page))
				278	SetPageActive(newpage);
				279	if (PageChecked(page))
				280	SetPageChecked(newpage);
				281	if (PageMappedToDisk(page))
				282	SetPageMappedToDisk(newpage);
				283
				284	if (PageDirty(page)) {
				285	clear_page_dirty_for_io(page);
				286	set_page_dirty(newpage);
				287	}
				288
				289	ClearPageSwapCache(page);
				290	ClearPageActive(page);
				291	ClearPagePrivate(page);
				292	set_page_private(page, 0);
				293	page->mapping = NULL;
				294
				295	/*
				296	* If any waiters have accumulated on the new page then
				297	* wake them up.
				298	*/
				299	if (PageWriteback(newpage))
				300	end_page_writeback(newpage);
				301	}
				302	EXPORT_SYMBOL(migrate_page_copy);
				303
				304	/*
				305	* Common logic to directly migrate a single page suitable for
				306	* pages that do not use PagePrivate.
				307	*
				308	* Pages are locked upon entry and exit.
				309	*/
				310	int migrate_page(struct page newpage, struct page page)
				311	{
				312	int rc;
				313
				314	BUG_ON(PageWriteback(page)); /* Writeback must be complete */
				315
				316	rc = migrate_page_remove_references(newpage, page, 2);
				317
				318	if (rc)
				319	return rc;
				320
				321	migrate_page_copy(newpage, page);
				322
				323	/*
				324	* Remove auxiliary swap entries and replace
				325	* them with real ptes.
				326	*
				327	* Note that a real pte entry will allow processes that are not
				328	* waiting on the page lock to use the new page via the page tables
				329	* before the new page is unlocked.
				330	*/
				331	remove_from_swap(newpage);
				332	return 0;
				333	}
				334	EXPORT_SYMBOL(migrate_page);
				335
				336	/*
				337	* migrate_pages
				338	*
				339	* Two lists are passed to this function. The first list
				340	* contains the pages isolated from the LRU to be migrated.
				341	* The second list contains new pages that the pages isolated
				342	* can be moved to. If the second list is NULL then all
				343	* pages are swapped out.
				344	*
				345	* The function returns after 10 attempts or if no pages
				346	* are movable anymore because to has become empty
				347	* or no retryable pages exist anymore.
				348	*
				349	* Return: Number of pages not migrated when "to" ran empty.
				350	*/
				351	int migrate_pages(struct list_head from, struct list_head to,
				352	struct list_head moved, struct list_head failed)
				353	{
				354	int retry;
				355	int nr_failed = 0;
				356	int pass = 0;
				357	struct page *page;
				358	struct page *page2;
				359	int swapwrite = current->flags & PF_SWAPWRITE;
				360	int rc;
				361
				362	if (!swapwrite)
				363	current->flags \|= PF_SWAPWRITE;
				364
				365	redo:
				366	retry = 0;
				367
				368	list_for_each_entry_safe(page, page2, from, lru) {
				369	struct page *newpage = NULL;
				370	struct address_space *mapping;
				371
				372	cond_resched();
				373
				374	rc = 0;
				375	if (page_count(page) == 1)
				376	/* page was freed from under us. So we are done. */
				377	goto next;
				378
				379	if (to && list_empty(to))
				380	break;
				381
				382	/*
				383	* Skip locked pages during the first two passes to give the
				384	* functions holding the lock time to release the page. Later we
				385	* use lock_page() to have a higher chance of acquiring the
				386	* lock.
				387	*/
				388	rc = -EAGAIN;
				389	if (pass > 2)
				390	lock_page(page);
				391	else
				392	if (TestSetPageLocked(page))
				393	goto next;
				394
				395	/*
				396	* Only wait on writeback if we have already done a pass where
				397	* we we may have triggered writeouts for lots of pages.
				398	*/
				399	if (pass > 0) {
				400	wait_on_page_writeback(page);
				401	} else {
				402	if (PageWriteback(page))
				403	goto unlock_page;
				404	}
				405
				406	/*
				407	* Anonymous pages must have swap cache references otherwise
				408	* the information contained in the page maps cannot be
				409	* preserved.
				410	*/
				411	if (PageAnon(page) && !PageSwapCache(page)) {
				412	if (!add_to_swap(page, GFP_KERNEL)) {
				413	rc = -ENOMEM;
				414	goto unlock_page;
				415	}
				416	}
				417
				418	if (!to) {
				419	rc = swap_page(page);
				420	goto next;
				421	}
				422
				423	newpage = lru_to_page(to);
				424	lock_page(newpage);
				425
				426	/*
				427	* Pages are properly locked and writeback is complete.
				428	* Try to migrate the page.
				429	*/
				430	mapping = page_mapping(page);
				431	if (!mapping)
				432	goto unlock_both;
				433
				434	if (mapping->a_ops->migratepage) {
				435	/*
				436	* Most pages have a mapping and most filesystems
				437	* should provide a migration function. Anonymous
				438	* pages are part of swap space which also has its
				439	* own migration function. This is the most common
				440	* path for page migration.
				441	*/
				442	rc = mapping->a_ops->migratepage(newpage, page);
				443	goto unlock_both;
				444	}
				445
				446	/*
				447	* Default handling if a filesystem does not provide
				448	* a migration function. We can only migrate clean
				449	* pages so try to write out any dirty pages first.
				450	*/
				451	if (PageDirty(page)) {
				452	switch (pageout(page, mapping)) {
				453	case PAGE_KEEP:
				454	case PAGE_ACTIVATE:
				455	goto unlock_both;
				456
				457	case PAGE_SUCCESS:
				458	unlock_page(newpage);
				459	goto next;
				460
				461	case PAGE_CLEAN:
				462	; /* try to migrate the page below */
				463	}
				464	}
				465
				466	/*
				467	* Buffers are managed in a filesystem specific way.
				468	* We must have no buffers or drop them.
				469	*/
				470	if (!page_has_buffers(page) \|\|
				471	try_to_release_page(page, GFP_KERNEL)) {
				472	rc = migrate_page(newpage, page);
				473	goto unlock_both;
				474	}
				475
				476	/*
				477	* On early passes with mapped pages simply
				478	* retry. There may be a lock held for some
				479	* buffers that may go away. Later
				480	* swap them out.
				481	*/
				482	if (pass > 4) {
				483	/*
				484	* Persistently unable to drop buffers..... As a
				485	* measure of last resort we fall back to
				486	* swap_page().
				487	*/
				488	unlock_page(newpage);
				489	newpage = NULL;
				490	rc = swap_page(page);
				491	goto next;
				492	}
				493
				494	unlock_both:
				495	unlock_page(newpage);
				496
				497	unlock_page:
				498	unlock_page(page);
				499
				500	next:
				501	if (rc == -EAGAIN) {
				502	retry++;
				503	} else if (rc) {
				504	/* Permanent failure */
				505	list_move(&page->lru, failed);
				506	nr_failed++;
				507	} else {
				508	if (newpage) {
				509	/* Successful migration. Return page to LRU */
				510	move_to_lru(newpage);
				511	}
				512	list_move(&page->lru, moved);
				513	}
				514	}
				515	if (retry && pass++ < 10)
				516	goto redo;
				517
				518	if (!swapwrite)
				519	current->flags &= ~PF_SWAPWRITE;
				520
				521	return nr_failed + retry;
				522	}
				523
				524	/*
				525	* Migration function for pages with buffers. This function can only be used
				526	* if the underlying filesystem guarantees that no other references to "page"
				527	* exist.
				528	*/
				529	int buffer_migrate_page(struct page newpage, struct page page)
				530	{
				531	struct address_space *mapping = page->mapping;
				532	struct buffer_head bh, head;
				533	int rc;
				534
				535	if (!mapping)
				536	return -EAGAIN;
				537
				538	if (!page_has_buffers(page))
				539	return migrate_page(newpage, page);
				540
				541	head = page_buffers(page);
				542
				543	rc = migrate_page_remove_references(newpage, page, 3);
				544
				545	if (rc)
				546	return rc;
				547
				548	bh = head;
				549	do {
				550	get_bh(bh);
				551	lock_buffer(bh);
				552	bh = bh->b_this_page;
				553
				554	} while (bh != head);
				555
				556	ClearPagePrivate(page);
				557	set_page_private(newpage, page_private(page));
				558	set_page_private(page, 0);
				559	put_page(page);
				560	get_page(newpage);
				561
				562	bh = head;
				563	do {
				564	set_bh_page(bh, newpage, bh_offset(bh));
				565	bh = bh->b_this_page;
				566
				567	} while (bh != head);
				568
				569	SetPagePrivate(newpage);
				570
				571	migrate_page_copy(newpage, page);
				572
				573	bh = head;
				574	do {
				575	unlock_buffer(bh);
				576	put_bh(bh);
				577	bh = bh->b_this_page;
				578
				579	} while (bh != head);
				580
				581	return 0;
				582	}
				583	EXPORT_SYMBOL(buffer_migrate_page);
				584
				585	/*
				586	* Migrate the list 'pagelist' of pages to a certain destination.
				587	*
				588	* Specify destination with either non-NULL vma or dest_node >= 0
				589	* Return the number of pages not migrated or error code
				590	*/
				591	int migrate_pages_to(struct list_head *pagelist,
				592	struct vm_area_struct *vma, int dest)
				593	{
				594	LIST_HEAD(newlist);
				595	LIST_HEAD(moved);
				596	LIST_HEAD(failed);
				597	int err = 0;
				598	unsigned long offset = 0;
				599	int nr_pages;
				600	struct page *page;
				601	struct list_head *p;
				602
				603	redo:
				604	nr_pages = 0;
				605	list_for_each(p, pagelist) {
				606	if (vma) {
				607	/*
				608	* The address passed to alloc_page_vma is used to
				609	* generate the proper interleave behavior. We fake
				610	* the address here by an increasing offset in order
				611	* to get the proper distribution of pages.
				612	*
				613	* No decision has been made as to which page
				614	* a certain old page is moved to so we cannot
				615	* specify the correct address.
				616	*/
				617	page = alloc_page_vma(GFP_HIGHUSER, vma,
				618	offset + vma->vm_start);
				619	offset += PAGE_SIZE;
				620	}
				621	else
				622	page = alloc_pages_node(dest, GFP_HIGHUSER, 0);
				623
				624	if (!page) {
				625	err = -ENOMEM;
				626	goto out;
				627	}
				628	list_add_tail(&page->lru, &newlist);
				629	nr_pages++;
				630	if (nr_pages > MIGRATE_CHUNK_SIZE)
				631	break;
				632	}
				633	err = migrate_pages(pagelist, &newlist, &moved, &failed);
				634
				635	putback_lru_pages(&moved); /* Call release pages instead ?? */
				636
				637	if (err >= 0 && list_empty(&newlist) && !list_empty(pagelist))
				638	goto redo;
				639	out:
				640	/* Return leftover allocated pages */
				641	while (!list_empty(&newlist)) {
				642	page = list_entry(newlist.next, struct page, lru);
				643	list_del(&page->lru);
				644	__free_page(page);
				645	}
				646	list_splice(&failed, pagelist);
				647	if (err < 0)
				648	return err;
				649
				650	/* Calculate number of leftover pages */
				651	nr_pages = 0;
				652	list_for_each(p, pagelist)
				653	nr_pages++;
				654	return nr_pages;
				655	}