Blame - fs/xfs/linux-2.6/xfs_sync.c - android_kernel_oneplus_msm8996

blob: 7adc62dd14bbcefd27fe6526cd37ac0766b36aaa [file] [log] [blame]

David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	1	/*
				2	* Copyright (c) 2000-2005 Silicon Graphics, Inc.
				3	* All Rights Reserved.
				4	*
				5	* This program is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU General Public License as
				7	* published by the Free Software Foundation.
				8	*
				9	* This program is distributed in the hope that it would be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
				13	*
				14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write the Free Software Foundation,
				16	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
				17	*/
				18	#include "xfs.h"
				19	#include "xfs_fs.h"
				20	#include "xfs_types.h"
				21	#include "xfs_bit.h"
				22	#include "xfs_log.h"
				23	#include "xfs_inum.h"
				24	#include "xfs_trans.h"
				25	#include "xfs_sb.h"
				26	#include "xfs_ag.h"
				27	#include "xfs_dir2.h"
				28	#include "xfs_dmapi.h"
				29	#include "xfs_mount.h"
				30	#include "xfs_bmap_btree.h"
				31	#include "xfs_alloc_btree.h"
				32	#include "xfs_ialloc_btree.h"
				33	#include "xfs_btree.h"
				34	#include "xfs_dir2_sf.h"
				35	#include "xfs_attr_sf.h"
				36	#include "xfs_inode.h"
				37	#include "xfs_dinode.h"
				38	#include "xfs_error.h"
				39	#include "xfs_mru_cache.h"
				40	#include "xfs_filestream.h"
				41	#include "xfs_vnodeops.h"
				42	#include "xfs_utils.h"
				43	#include "xfs_buf_item.h"
				44	#include "xfs_inode_item.h"
				45	#include "xfs_rw.h"
Christoph Hellwig	7d09525	2009-06-08 15:33:32 +0200	[diff] [blame]	46	#include "xfs_quota.h"
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	47
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	48	#include <linux/kthread.h>
				49	#include <linux/freezer.h>
				50
Dave Chinner	5a34d5c	2009-06-08 15:35:03 +0200	[diff] [blame^]	51
				52	STATIC int
				53	xfs_sync_inode_data(
				54	struct xfs_inode *ip,
				55	int flags)
				56	{
				57	struct inode *inode = VFS_I(ip);
				58	struct address_space *mapping = inode->i_mapping;
				59	int error = 0;
				60
				61	if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
				62	goto out_wait;
				63
				64	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
				65	if (flags & SYNC_TRYLOCK)
				66	goto out_wait;
				67	xfs_ilock(ip, XFS_IOLOCK_SHARED);
				68	}
				69
				70	error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
				71	0 : XFS_B_ASYNC, FI_NONE);
				72	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
				73
				74	out_wait:
				75	if (flags & SYNC_IOWAIT)
				76	xfs_ioend_wait(ip);
				77	return error;
				78	}
				79
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	80	/*
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	81	* Sync all the inodes in the given AG according to the
				82	* direction given by the flags.
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	83	*/
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	84	STATIC int
				85	xfs_sync_inodes_ag(
				86	xfs_mount_t *mp,
				87	int ag,
David Chinner	2030b5a	2008-10-30 17:15:12 +1100	[diff] [blame]	88	int flags)
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	89	{
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	90	xfs_perag_t *pag = &mp->m_perag[ag];
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	91	int nr_found;
David Chinner	8c38ab0	2008-10-30 17:38:00 +1100	[diff] [blame]	92	uint32_t first_index = 0;
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	93	int error = 0;
				94	int last_error = 0;
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	95
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	96	do {
David Chinner	bc60a99	2008-10-30 17:15:03 +1100	[diff] [blame]	97	struct inode *inode;
David Chinner	bc60a99	2008-10-30 17:15:03 +1100	[diff] [blame]	98	xfs_inode_t *ip = NULL;
David Chinner	455486b	2008-10-30 18:03:14 +1100	[diff] [blame]	99	int lock_flags = XFS_ILOCK_SHARED;
David Chinner	bc60a99	2008-10-30 17:15:03 +1100	[diff] [blame]	100
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	101	/*
				102	* use a gang lookup to find the next inode in the tree
				103	* as the tree is sparse and a gang lookup walks to find
				104	* the number of objects requested.
				105	*/
				106	read_lock(&pag->pag_ici_lock);
				107	nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
				108	(void**)&ip, first_index, 1);
				109
				110	if (!nr_found) {
				111	read_unlock(&pag->pag_ici_lock);
				112	break;
				113	}
				114
David Chinner	8c38ab0	2008-10-30 17:38:00 +1100	[diff] [blame]	115	/*
				116	* Update the index for the next lookup. Catch overflows
				117	* into the next AG range which can occur if we have inodes
				118	* in the last block of the AG and we are currently
				119	* pointing to the last inode.
				120	*/
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	121	first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
David Chinner	8c38ab0	2008-10-30 17:38:00 +1100	[diff] [blame]	122	if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
				123	read_unlock(&pag->pag_ici_lock);
				124	break;
				125	}
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	126
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	127	/* nothing to sync during shutdown */
David Chinner	cb56a4b	2008-10-30 17:16:00 +1100	[diff] [blame]	128	if (XFS_FORCED_SHUTDOWN(mp)) {
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	129	read_unlock(&pag->pag_ici_lock);
				130	return 0;
				131	}
				132
				133	/*
David Chinner	455486b	2008-10-30 18:03:14 +1100	[diff] [blame]	134	* If we can't get a reference on the inode, it must be
				135	* in reclaim. Leave it for the reclaim code to flush.
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	136	*/
David Chinner	455486b	2008-10-30 18:03:14 +1100	[diff] [blame]	137	inode = VFS_I(ip);
				138	if (!igrab(inode)) {
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	139	read_unlock(&pag->pag_ici_lock);
David Chinner	455486b	2008-10-30 18:03:14 +1100	[diff] [blame]	140	continue;
				141	}
				142	read_unlock(&pag->pag_ici_lock);
				143
Dave Chinner	6307091	2008-11-10 17:13:23 +1100	[diff] [blame]	144	/* avoid new or bad inodes */
				145	if (is_bad_inode(inode) \|\|
				146	xfs_iflags_test(ip, XFS_INEW)) {
David Chinner	455486b	2008-10-30 18:03:14 +1100	[diff] [blame]	147	IRELE(ip);
				148	continue;
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	149	}
David Chinner	bc60a99	2008-10-30 17:15:03 +1100	[diff] [blame]	150
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	151	/*
				152	* If we have to flush data or wait for I/O completion
David Chinner	455486b	2008-10-30 18:03:14 +1100	[diff] [blame]	153	* we need to hold the iolock.
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	154	*/
Dave Chinner	5a34d5c	2009-06-08 15:35:03 +0200	[diff] [blame^]	155	if (flags & SYNC_DELWRI)
				156	error = xfs_sync_inode_data(ip, flags);
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	157
Dave Chinner	5a34d5c	2009-06-08 15:35:03 +0200	[diff] [blame^]	158	xfs_ilock(ip, XFS_ILOCK_SHARED);
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	159	if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
				160	if (flags & SYNC_WAIT) {
				161	xfs_iflock(ip);
				162	if (!xfs_inode_clean(ip))
				163	error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
				164	else
				165	xfs_ifunlock(ip);
				166	} else if (xfs_iflock_nowait(ip)) {
				167	if (!xfs_inode_clean(ip))
				168	error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
				169	else
				170	xfs_ifunlock(ip);
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	171	}
				172	}
David Chinner	455486b	2008-10-30 18:03:14 +1100	[diff] [blame]	173	xfs_iput(ip, lock_flags);
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	174
				175	if (error)
				176	last_error = error;
				177	/*
				178	* bail out if the filesystem is corrupted.
				179	*/
				180	if (error == EFSCORRUPTED)
				181	return XFS_ERROR(error);
				182
				183	} while (nr_found);
				184
				185	return last_error;
				186	}
				187
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	188	int
				189	xfs_sync_inodes(
				190	xfs_mount_t *mp,
David Chinner	2030b5a	2008-10-30 17:15:12 +1100	[diff] [blame]	191	int flags)
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	192	{
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	193	int error;
				194	int last_error;
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	195	int i;
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	196	int lflags = XFS_LOG_FORCE;
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	197
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	198	if (mp->m_flags & XFS_MOUNT_RDONLY)
				199	return 0;
				200	error = 0;
				201	last_error = 0;
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	202
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	203	if (flags & SYNC_WAIT)
				204	lflags \|= XFS_LOG_SYNC;
				205
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	206	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
				207	if (!mp->m_perag[i].pag_ici_init)
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	208	continue;
David Chinner	2030b5a	2008-10-30 17:15:12 +1100	[diff] [blame]	209	error = xfs_sync_inodes_ag(mp, i, flags);
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	210	if (error)
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	211	last_error = error;
David Chinner	683a897	2008-10-30 17:07:29 +1100	[diff] [blame]	212	if (error == EFSCORRUPTED)
				213	break;
				214	}
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	215	if (flags & SYNC_DELWRI)
				216	xfs_log_force(mp, 0, lflags);
				217
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	218	return XFS_ERROR(last_error);
				219	}
				220
Christoph Hellwig	2af75df	2008-10-30 17:14:53 +1100	[diff] [blame]	221	STATIC int
				222	xfs_commit_dummy_trans(
				223	struct xfs_mount *mp,
				224	uint log_flags)
				225	{
				226	struct xfs_inode *ip = mp->m_rootip;
				227	struct xfs_trans *tp;
				228	int error;
				229
				230	/*
				231	* Put a dummy transaction in the log to tell recovery
				232	* that all others are OK.
				233	*/
				234	tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
				235	error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
				236	if (error) {
				237	xfs_trans_cancel(tp, 0);
				238	return error;
				239	}
				240
				241	xfs_ilock(ip, XFS_ILOCK_EXCL);
				242
				243	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
				244	xfs_trans_ihold(tp, ip);
				245	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				246	/* XXX(hch): ignoring the error here.. */
				247	error = xfs_trans_commit(tp, 0);
				248
				249	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				250
				251	xfs_log_force(mp, 0, log_flags);
				252	return 0;
				253	}
				254
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	255	int
Christoph Hellwig	2af75df	2008-10-30 17:14:53 +1100	[diff] [blame]	256	xfs_sync_fsdata(
				257	struct xfs_mount *mp,
				258	int flags)
				259	{
				260	struct xfs_buf *bp;
				261	struct xfs_buf_log_item *bip;
				262	int error = 0;
				263
				264	/*
				265	* If this is xfssyncd() then only sync the superblock if we can
				266	* lock it without sleeping and it is not pinned.
				267	*/
				268	if (flags & SYNC_BDFLUSH) {
				269	ASSERT(!(flags & SYNC_WAIT));
				270
				271	bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
				272	if (!bp)
				273	goto out;
				274
				275	bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *);
				276	if (!bip \|\| !xfs_buf_item_dirty(bip) \|\| XFS_BUF_ISPINNED(bp))
				277	goto out_brelse;
				278	} else {
				279	bp = xfs_getsb(mp, 0);
				280
				281	/*
				282	* If the buffer is pinned then push on the log so we won't
				283	* get stuck waiting in the write for someone, maybe
				284	* ourselves, to flush the log.
				285	*
				286	* Even though we just pushed the log above, we did not have
				287	* the superblock buffer locked at that point so it can
				288	* become pinned in between there and here.
				289	*/
				290	if (XFS_BUF_ISPINNED(bp))
				291	xfs_log_force(mp, 0, XFS_LOG_FORCE);
				292	}
				293
				294
				295	if (flags & SYNC_WAIT)
				296	XFS_BUF_UNASYNC(bp);
				297	else
				298	XFS_BUF_ASYNC(bp);
				299
				300	return xfs_bwrite(mp, bp);
				301
				302	out_brelse:
				303	xfs_buf_relse(bp);
				304	out:
				305	return error;
				306	}
				307
David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame]	308	/*
David Chinner	a4e4c4f	2008-10-30 17:16:11 +1100	[diff] [blame]	309	* When remounting a filesystem read-only or freezing the filesystem, we have
				310	* two phases to execute. This first phase is syncing the data before we
				311	* quiesce the filesystem, and the second is flushing all the inodes out after
				312	* we've waited for all the transactions created by the first phase to
				313	* complete. The second phase ensures that the inodes are written to their
				314	* location on disk rather than just existing in transactions in the log. This
				315	* means after a quiesce there is no log replay required to write the inodes to
				316	* disk (this is the main difference between a sync and a quiesce).
				317	*/
				318	/*
				319	* First stage of freeze - no writers will make progress now we are here,
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	320	* so we flush delwri and delalloc buffers here, then wait for all I/O to
				321	* complete. Data is frozen at that point. Metadata is not frozen,
David Chinner	a4e4c4f	2008-10-30 17:16:11 +1100	[diff] [blame]	322	* transactions can still occur here so don't bother flushing the buftarg
				323	* because it'll just get dirty again.
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	324	*/
				325	int
				326	xfs_quiesce_data(
				327	struct xfs_mount *mp)
				328	{
				329	int error;
				330
				331	/* push non-blocking */
				332	xfs_sync_inodes(mp, SYNC_DELWRI\|SYNC_BDFLUSH);
Christoph Hellwig	7d09525	2009-06-08 15:33:32 +0200	[diff] [blame]	333	xfs_qm_sync(mp, SYNC_BDFLUSH);
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	334	xfs_filestream_flush(mp);
				335
				336	/* push and block */
				337	xfs_sync_inodes(mp, SYNC_DELWRI\|SYNC_WAIT\|SYNC_IOWAIT);
Christoph Hellwig	7d09525	2009-06-08 15:33:32 +0200	[diff] [blame]	338	xfs_qm_sync(mp, SYNC_WAIT);
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	339
David Chinner	a4e4c4f	2008-10-30 17:16:11 +1100	[diff] [blame]	340	/* write superblock and hoover up shutdown errors */
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	341	error = xfs_sync_fsdata(mp, 0);
				342
David Chinner	a4e4c4f	2008-10-30 17:16:11 +1100	[diff] [blame]	343	/* flush data-only devices */
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	344	if (mp->m_rtdev_targp)
				345	XFS_bflush(mp->m_rtdev_targp);
				346
				347	return error;
				348	}
				349
David Chinner	76bf105	2008-10-30 17:16:21 +1100	[diff] [blame]	350	STATIC void
				351	xfs_quiesce_fs(
				352	struct xfs_mount *mp)
				353	{
				354	int count = 0, pincount;
				355
				356	xfs_flush_buftarg(mp->m_ddev_targp, 0);
David Chinner	1dc3318	2008-10-30 17:37:15 +1100	[diff] [blame]	357	xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
David Chinner	76bf105	2008-10-30 17:16:21 +1100	[diff] [blame]	358
				359	/*
				360	* This loop must run at least twice. The first instance of the loop
				361	* will flush most meta data but that will generate more meta data
				362	* (typically directory updates). Which then must be flushed and
				363	* logged before we can write the unmount record.
				364	*/
				365	do {
				366	xfs_sync_inodes(mp, SYNC_ATTR\|SYNC_WAIT);
				367	pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
				368	if (!pincount) {
				369	delay(50);
				370	count++;
				371	}
				372	} while (count < 2);
				373	}
				374
				375	/*
				376	* Second stage of a quiesce. The data is already synced, now we have to take
				377	* care of the metadata. New transactions are already blocked, so we need to
				378	* wait for any remaining transactions to drain out before proceding.
				379	*/
				380	void
				381	xfs_quiesce_attr(
				382	struct xfs_mount *mp)
				383	{
				384	int error = 0;
				385
				386	/* wait for all modifications to complete */
				387	while (atomic_read(&mp->m_active_trans) > 0)
				388	delay(100);
				389
				390	/* flush inodes and push all remaining buffers out to disk */
				391	xfs_quiesce_fs(mp);
				392
Felix Blyakher	5e10657	2009-01-22 21:34:05 -0600	[diff] [blame]	393	/*
				394	* Just warn here till VFS can correctly support
				395	* read-only remount without racing.
				396	*/
				397	WARN_ON(atomic_read(&mp->m_active_trans) != 0);
David Chinner	76bf105	2008-10-30 17:16:21 +1100	[diff] [blame]	398
				399	/* Push the superblock and write an unmount record */
				400	error = xfs_log_sbcount(mp, 1);
				401	if (error)
				402	xfs_fs_cmn_err(CE_WARN, mp,
				403	"xfs_attr_quiesce: failed to log sb changes. "
				404	"Frozen image may not be consistent.");
				405	xfs_log_unmount_write(mp);
				406	xfs_unmountfs_writesb(mp);
				407	}
				408
David Chinner	e9f1c6e	2008-10-30 17:15:50 +1100	[diff] [blame]	409	/*
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	410	* Enqueue a work item to be picked up by the vfs xfssyncd thread.
				411	* Doing this has two advantages:
				412	* - It saves on stack space, which is tight in certain situations
				413	* - It can be used (with care) as a mechanism to avoid deadlocks.
				414	* Flushing while allocating in a full filesystem requires both.
				415	*/
				416	STATIC void
				417	xfs_syncd_queue_work(
				418	struct xfs_mount *mp,
				419	void *data,
Dave Chinner	e43afd7	2009-04-06 18:47:27 +0200	[diff] [blame]	420	void (syncer)(struct xfs_mount , void *),
				421	struct completion *completion)
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	422	{
Dave Chinner	a8d770d	2009-04-06 18:44:54 +0200	[diff] [blame]	423	struct xfs_sync_work *work;
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	424
Dave Chinner	a8d770d	2009-04-06 18:44:54 +0200	[diff] [blame]	425	work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	426	INIT_LIST_HEAD(&work->w_list);
				427	work->w_syncer = syncer;
				428	work->w_data = data;
				429	work->w_mount = mp;
Dave Chinner	e43afd7	2009-04-06 18:47:27 +0200	[diff] [blame]	430	work->w_completion = completion;
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	431	spin_lock(&mp->m_sync_lock);
				432	list_add_tail(&work->w_list, &mp->m_sync_list);
				433	spin_unlock(&mp->m_sync_lock);
				434	wake_up_process(mp->m_sync_task);
				435	}
				436
				437	/*
				438	* Flush delayed allocate data, attempting to free up reserved space
				439	* from existing allocations. At this point a new allocation attempt
				440	* has failed with ENOSPC and we are in the process of scratching our
				441	* heads, looking about for more room...
				442	*/
				443	STATIC void
Dave Chinner	a8d770d	2009-04-06 18:44:54 +0200	[diff] [blame]	444	xfs_flush_inodes_work(
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	445	struct xfs_mount *mp,
				446	void *arg)
				447	{
				448	struct inode *inode = arg;
Dave Chinner	a8d770d	2009-04-06 18:44:54 +0200	[diff] [blame]	449	xfs_sync_inodes(mp, SYNC_DELWRI \| SYNC_TRYLOCK);
				450	xfs_sync_inodes(mp, SYNC_DELWRI \| SYNC_TRYLOCK \| SYNC_IOWAIT);
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	451	iput(inode);
				452	}
				453
				454	void
Dave Chinner	a8d770d	2009-04-06 18:44:54 +0200	[diff] [blame]	455	xfs_flush_inodes(
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	456	xfs_inode_t *ip)
				457	{
				458	struct inode *inode = VFS_I(ip);
Dave Chinner	e43afd7	2009-04-06 18:47:27 +0200	[diff] [blame]	459	DECLARE_COMPLETION_ONSTACK(completion);
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	460
				461	igrab(inode);
Dave Chinner	e43afd7	2009-04-06 18:47:27 +0200	[diff] [blame]	462	xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
				463	wait_for_completion(&completion);
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	464	xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE\|XFS_LOG_SYNC);
				465	}
				466
David Chinner	aacaa88	2008-10-30 17:15:29 +1100	[diff] [blame]	467	/*
				468	* Every sync period we need to unpin all items, reclaim inodes, sync
				469	* quota and write out the superblock. We might need to cover the log
				470	* to indicate it is idle.
				471	*/
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	472	STATIC void
				473	xfs_sync_worker(
				474	struct xfs_mount *mp,
				475	void *unused)
				476	{
				477	int error;
				478
David Chinner	aacaa88	2008-10-30 17:15:29 +1100	[diff] [blame]	479	if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
				480	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
David Chinner	1dc3318	2008-10-30 17:37:15 +1100	[diff] [blame]	481	xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
David Chinner	aacaa88	2008-10-30 17:15:29 +1100	[diff] [blame]	482	/* dgc: errors ignored here */
Christoph Hellwig	7d09525	2009-06-08 15:33:32 +0200	[diff] [blame]	483	error = xfs_qm_sync(mp, SYNC_BDFLUSH);
David Chinner	aacaa88	2008-10-30 17:15:29 +1100	[diff] [blame]	484	error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
				485	if (xfs_log_need_covered(mp))
				486	error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
				487	}
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	488	mp->m_sync_seq++;
				489	wake_up(&mp->m_wait_single_sync_task);
				490	}
				491
				492	STATIC int
				493	xfssyncd(
				494	void *arg)
				495	{
				496	struct xfs_mount *mp = arg;
				497	long timeleft;
Dave Chinner	a8d770d	2009-04-06 18:44:54 +0200	[diff] [blame]	498	xfs_sync_work_t work, n;
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	499	LIST_HEAD (tmp);
				500
				501	set_freezable();
				502	timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
				503	for (;;) {
				504	timeleft = schedule_timeout_interruptible(timeleft);
				505	/* swsusp */
				506	try_to_freeze();
				507	if (kthread_should_stop() && list_empty(&mp->m_sync_list))
				508	break;
				509
				510	spin_lock(&mp->m_sync_lock);
				511	/*
				512	* We can get woken by laptop mode, to do a sync -
				513	* that's the (only!) case where the list would be
				514	* empty with time remaining.
				515	*/
				516	if (!timeleft \|\| list_empty(&mp->m_sync_list)) {
				517	if (!timeleft)
				518	timeleft = xfs_syncd_centisecs *
				519	msecs_to_jiffies(10);
				520	INIT_LIST_HEAD(&mp->m_sync_work.w_list);
				521	list_add_tail(&mp->m_sync_work.w_list,
				522	&mp->m_sync_list);
				523	}
				524	list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
				525	list_move(&work->w_list, &tmp);
				526	spin_unlock(&mp->m_sync_lock);
				527
				528	list_for_each_entry_safe(work, n, &tmp, w_list) {
				529	(*work->w_syncer)(mp, work->w_data);
				530	list_del(&work->w_list);
				531	if (work == &mp->m_sync_work)
				532	continue;
Dave Chinner	e43afd7	2009-04-06 18:47:27 +0200	[diff] [blame]	533	if (work->w_completion)
				534	complete(work->w_completion);
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	535	kmem_free(work);
				536	}
				537	}
				538
				539	return 0;
				540	}
				541
				542	int
				543	xfs_syncd_init(
				544	struct xfs_mount *mp)
				545	{
				546	mp->m_sync_work.w_syncer = xfs_sync_worker;
				547	mp->m_sync_work.w_mount = mp;
Dave Chinner	e43afd7	2009-04-06 18:47:27 +0200	[diff] [blame]	548	mp->m_sync_work.w_completion = NULL;
David Chinner	a167b17	2008-10-30 17:06:18 +1100	[diff] [blame]	549	mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
				550	if (IS_ERR(mp->m_sync_task))
				551	return -PTR_ERR(mp->m_sync_task);
				552	return 0;
				553	}
				554
				555	void
				556	xfs_syncd_stop(
				557	struct xfs_mount *mp)
				558	{
				559	kthread_stop(mp->m_sync_task);
				560	}
				561
David Chinner	fce08f2	2008-10-30 17:37:03 +1100	[diff] [blame]	562	int
David Chinner	1dc3318	2008-10-30 17:37:15 +1100	[diff] [blame]	563	xfs_reclaim_inode(
David Chinner	fce08f2	2008-10-30 17:37:03 +1100	[diff] [blame]	564	xfs_inode_t *ip,
				565	int locked,
				566	int sync_mode)
				567	{
				568	xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
				569
				570	/* The hash lock here protects a thread in xfs_iget_core from
				571	* racing with us on linking the inode back with a vnode.
				572	* Once we have the XFS_IRECLAIM flag set it will not touch
				573	* us.
				574	*/
				575	write_lock(&pag->pag_ici_lock);
				576	spin_lock(&ip->i_flags_lock);
				577	if (__xfs_iflags_test(ip, XFS_IRECLAIM) \|\|
				578	!__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
				579	spin_unlock(&ip->i_flags_lock);
				580	write_unlock(&pag->pag_ici_lock);
				581	if (locked) {
				582	xfs_ifunlock(ip);
				583	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				584	}
				585	return 1;
				586	}
				587	__xfs_iflags_set(ip, XFS_IRECLAIM);
				588	spin_unlock(&ip->i_flags_lock);
				589	write_unlock(&pag->pag_ici_lock);
				590	xfs_put_perag(ip->i_mount, pag);
				591
				592	/*
				593	* If the inode is still dirty, then flush it out. If the inode
				594	* is not in the AIL, then it will be OK to flush it delwri as
				595	* long as xfs_iflush() does not keep any references to the inode.
				596	* We leave that decision up to xfs_iflush() since it has the
				597	* knowledge of whether it's OK to simply do a delwri flush of
				598	* the inode or whether we need to wait until the inode is
				599	* pulled from the AIL.
				600	* We get the flush lock regardless, though, just to make sure
				601	* we don't free it while it is being flushed.
				602	*/
				603	if (!locked) {
				604	xfs_ilock(ip, XFS_ILOCK_EXCL);
				605	xfs_iflock(ip);
				606	}
				607
				608	/*
				609	* In the case of a forced shutdown we rely on xfs_iflush() to
				610	* wait for the inode to be unpinned before returning an error.
				611	*/
				612	if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
				613	/* synchronize with xfs_iflush_done */
				614	xfs_iflock(ip);
				615	xfs_ifunlock(ip);
				616	}
				617
				618	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				619	xfs_ireclaim(ip);
				620	return 0;
				621	}
				622
David Chinner	1165451	2008-10-30 17:37:49 +1100	[diff] [blame]	623	/*
				624	* We set the inode flag atomically with the radix tree tag.
				625	* Once we get tag lookups on the radix tree, this inode flag
				626	* can go away.
				627	*/
David Chinner	396beb8	2008-10-30 17:37:26 +1100	[diff] [blame]	628	void
				629	xfs_inode_set_reclaim_tag(
				630	xfs_inode_t *ip)
				631	{
				632	xfs_mount_t *mp = ip->i_mount;
				633	xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
				634
				635	read_lock(&pag->pag_ici_lock);
				636	spin_lock(&ip->i_flags_lock);
				637	radix_tree_tag_set(&pag->pag_ici_root,
				638	XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
David Chinner	1165451	2008-10-30 17:37:49 +1100	[diff] [blame]	639	__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
David Chinner	396beb8	2008-10-30 17:37:26 +1100	[diff] [blame]	640	spin_unlock(&ip->i_flags_lock);
				641	read_unlock(&pag->pag_ici_lock);
				642	xfs_put_perag(mp, pag);
				643	}
				644
				645	void
				646	__xfs_inode_clear_reclaim_tag(
				647	xfs_mount_t *mp,
				648	xfs_perag_t *pag,
				649	xfs_inode_t *ip)
				650	{
				651	radix_tree_tag_clear(&pag->pag_ici_root,
				652	XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
				653	}
				654
				655	void
				656	xfs_inode_clear_reclaim_tag(
				657	xfs_inode_t *ip)
				658	{
				659	xfs_mount_t *mp = ip->i_mount;
				660	xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
				661
				662	read_lock(&pag->pag_ici_lock);
				663	spin_lock(&ip->i_flags_lock);
				664	__xfs_inode_clear_reclaim_tag(mp, pag, ip);
				665	spin_unlock(&ip->i_flags_lock);
				666	read_unlock(&pag->pag_ici_lock);
				667	xfs_put_perag(mp, pag);
				668	}
				669
David Chinner	7a3be02	2008-10-30 17:37:37 +1100	[diff] [blame]	670
				671	STATIC void
				672	xfs_reclaim_inodes_ag(
				673	xfs_mount_t *mp,
				674	int ag,
				675	int noblock,
				676	int mode)
				677	{
				678	xfs_inode_t *ip = NULL;
				679	xfs_perag_t *pag = &mp->m_perag[ag];
				680	int nr_found;
David Chinner	8c38ab0	2008-10-30 17:38:00 +1100	[diff] [blame]	681	uint32_t first_index;
David Chinner	7a3be02	2008-10-30 17:37:37 +1100	[diff] [blame]	682	int skipped;
				683
				684	restart:
				685	first_index = 0;
				686	skipped = 0;
				687	do {
				688	/*
				689	* use a gang lookup to find the next inode in the tree
				690	* as the tree is sparse and a gang lookup walks to find
				691	* the number of objects requested.
				692	*/
				693	read_lock(&pag->pag_ici_lock);
				694	nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
				695	(void**)&ip, first_index, 1,
				696	XFS_ICI_RECLAIM_TAG);
				697
				698	if (!nr_found) {
				699	read_unlock(&pag->pag_ici_lock);
				700	break;
				701	}
				702
David Chinner	8c38ab0	2008-10-30 17:38:00 +1100	[diff] [blame]	703	/*
				704	* Update the index for the next lookup. Catch overflows
				705	* into the next AG range which can occur if we have inodes
				706	* in the last block of the AG and we are currently
				707	* pointing to the last inode.
				708	*/
David Chinner	7a3be02	2008-10-30 17:37:37 +1100	[diff] [blame]	709	first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
David Chinner	8c38ab0	2008-10-30 17:38:00 +1100	[diff] [blame]	710	if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
				711	read_unlock(&pag->pag_ici_lock);
				712	break;
				713	}
David Chinner	7a3be02	2008-10-30 17:37:37 +1100	[diff] [blame]	714
David Chinner	7a3be02	2008-10-30 17:37:37 +1100	[diff] [blame]	715	/* ignore if already under reclaim */
				716	if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
				717	read_unlock(&pag->pag_ici_lock);
				718	continue;
				719	}
				720
				721	if (noblock) {
				722	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
				723	read_unlock(&pag->pag_ici_lock);
				724	continue;
				725	}
				726	if (xfs_ipincount(ip) \|\|
				727	!xfs_iflock_nowait(ip)) {
				728	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				729	read_unlock(&pag->pag_ici_lock);
				730	continue;
				731	}
				732	}
				733	read_unlock(&pag->pag_ici_lock);
				734
				735	/*
				736	* hmmm - this is an inode already in reclaim. Do
				737	* we even bother catching it here?
				738	*/
				739	if (xfs_reclaim_inode(ip, noblock, mode))
				740	skipped++;
				741	} while (nr_found);
				742
				743	if (skipped) {
				744	delay(1);
				745	goto restart;
				746	}
				747	return;
				748
				749	}
				750
David Chinner	fce08f2	2008-10-30 17:37:03 +1100	[diff] [blame]	751	int
David Chinner	1dc3318	2008-10-30 17:37:15 +1100	[diff] [blame]	752	xfs_reclaim_inodes(
David Chinner	fce08f2	2008-10-30 17:37:03 +1100	[diff] [blame]	753	xfs_mount_t *mp,
				754	int noblock,
				755	int mode)
				756	{
David Chinner	7a3be02	2008-10-30 17:37:37 +1100	[diff] [blame]	757	int i;
David Chinner	fce08f2	2008-10-30 17:37:03 +1100	[diff] [blame]	758
David Chinner	7a3be02	2008-10-30 17:37:37 +1100	[diff] [blame]	759	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
				760	if (!mp->m_perag[i].pag_ici_init)
				761	continue;
				762	xfs_reclaim_inodes_ag(mp, i, noblock, mode);
David Chinner	fce08f2	2008-10-30 17:37:03 +1100	[diff] [blame]	763	}
David Chinner	fce08f2	2008-10-30 17:37:03 +1100	[diff] [blame]	764	return 0;
				765	}
				766
				767