Blame - fs/xfs/linux-2.6/xfs_sync.c - android_kernel_oneplus_msm8996

blob: c765eb2a8dca05bbd90d7fb588096142bd8db66a [file] [log] [blame]

David Chinner	fe4fa4b	2008-10-30 17:06:08 +1100	[diff] [blame^]	1	/*
				2	* Copyright (c) 2000-2005 Silicon Graphics, Inc.
				3	* All Rights Reserved.
				4	*
				5	* This program is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU General Public License as
				7	* published by the Free Software Foundation.
				8	*
				9	* This program is distributed in the hope that it would be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
				13	*
				14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write the Free Software Foundation,
				16	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
				17	*/
				18	#include "xfs.h"
				19	#include "xfs_fs.h"
				20	#include "xfs_types.h"
				21	#include "xfs_bit.h"
				22	#include "xfs_log.h"
				23	#include "xfs_inum.h"
				24	#include "xfs_trans.h"
				25	#include "xfs_sb.h"
				26	#include "xfs_ag.h"
				27	#include "xfs_dir2.h"
				28	#include "xfs_dmapi.h"
				29	#include "xfs_mount.h"
				30	#include "xfs_bmap_btree.h"
				31	#include "xfs_alloc_btree.h"
				32	#include "xfs_ialloc_btree.h"
				33	#include "xfs_btree.h"
				34	#include "xfs_dir2_sf.h"
				35	#include "xfs_attr_sf.h"
				36	#include "xfs_inode.h"
				37	#include "xfs_dinode.h"
				38	#include "xfs_error.h"
				39	#include "xfs_mru_cache.h"
				40	#include "xfs_filestream.h"
				41	#include "xfs_vnodeops.h"
				42	#include "xfs_utils.h"
				43	#include "xfs_buf_item.h"
				44	#include "xfs_inode_item.h"
				45	#include "xfs_rw.h"
				46
				47	/*
				48	* xfs_sync flushes any pending I/O to file system vfsp.
				49	*
				50	* This routine is called by vfs_sync() to make sure that things make it
				51	* out to disk eventually, on sync() system calls to flush out everything,
				52	* and when the file system is unmounted. For the vfs_sync() case, all
				53	* we really need to do is sync out the log to make all of our meta-data
				54	* updates permanent (except for timestamps). For calls from pflushd(),
				55	* dirty pages are kept moving by calling pdflush() on the inodes
				56	* containing them. We also flush the inodes that we can lock without
				57	* sleeping and the superblock if we can lock it without sleeping from
				58	* vfs_sync() so that items at the tail of the log are always moving out.
				59	*
				60	* Flags:
				61	* SYNC_BDFLUSH - We're being called from vfs_sync() so we don't want
				62	* to sleep if we can help it. All we really need
				63	* to do is ensure that the log is synced at least
				64	* periodically. We also push the inodes and
				65	* superblock if we can lock them without sleeping
				66	* and they are not pinned.
				67	* SYNC_ATTR - We need to flush the inodes. If SYNC_BDFLUSH is not
				68	* set, then we really want to lock each inode and flush
				69	* it.
				70	* SYNC_WAIT - All the flushes that take place in this call should
				71	* be synchronous.
				72	* SYNC_DELWRI - This tells us to push dirty pages associated with
				73	* inodes. SYNC_WAIT and SYNC_BDFLUSH are used to
				74	* determine if they should be flushed sync, async, or
				75	* delwri.
				76	* SYNC_CLOSE - This flag is passed when the system is being
				77	* unmounted. We should sync and invalidate everything.
				78	* SYNC_FSDATA - This indicates that the caller would like to make
				79	* sure the superblock is safe on disk. We can ensure
				80	* this by simply making sure the log gets flushed
				81	* if SYNC_BDFLUSH is set, and by actually writing it
				82	* out otherwise.
				83	* SYNC_IOWAIT - The caller wants us to wait for all data I/O to complete
				84	* before we return (including direct I/O). Forms the drain
				85	* side of the write barrier needed to safely quiesce the
				86	* filesystem.
				87	*
				88	*/
				89	int
				90	xfs_sync(
				91	xfs_mount_t *mp,
				92	int flags)
				93	{
				94	int error;
				95
				96	/*
				97	* Get the Quota Manager to flush the dquots.
				98	*
				99	* If XFS quota support is not enabled or this filesystem
				100	* instance does not use quotas XFS_QM_DQSYNC will always
				101	* return zero.
				102	*/
				103	error = XFS_QM_DQSYNC(mp, flags);
				104	if (error) {
				105	/*
				106	* If we got an IO error, we will be shutting down.
				107	* So, there's nothing more for us to do here.
				108	*/
				109	ASSERT(error != EIO \|\| XFS_FORCED_SHUTDOWN(mp));
				110	if (XFS_FORCED_SHUTDOWN(mp))
				111	return XFS_ERROR(error);
				112	}
				113
				114	if (flags & SYNC_IOWAIT)
				115	xfs_filestream_flush(mp);
				116
				117	return xfs_syncsub(mp, flags, NULL);
				118	}
				119
				120	/*
				121	* xfs sync routine for internal use
				122	*
				123	* This routine supports all of the flags defined for the generic vfs_sync
				124	* interface as explained above under xfs_sync.
				125	*
				126	*/
				127	int
				128	xfs_sync_inodes(
				129	xfs_mount_t *mp,
				130	int flags,
				131	int *bypassed)
				132	{
				133	xfs_inode_t *ip = NULL;
				134	struct inode *vp = NULL;
				135	int error;
				136	int last_error;
				137	uint64_t fflag;
				138	uint lock_flags;
				139	uint base_lock_flags;
				140	boolean_t mount_locked;
				141	boolean_t vnode_refed;
				142	int preempt;
				143	xfs_iptr_t *ipointer;
				144	#ifdef DEBUG
				145	boolean_t ipointer_in = B_FALSE;
				146
				147	#define IPOINTER_SET ipointer_in = B_TRUE
				148	#define IPOINTER_CLR ipointer_in = B_FALSE
				149	#else
				150	#define IPOINTER_SET
				151	#define IPOINTER_CLR
				152	#endif
				153
				154
				155	/* Insert a marker record into the inode list after inode ip. The list
				156	* must be locked when this is called. After the call the list will no
				157	* longer be locked.
				158	*/
				159	#define IPOINTER_INSERT(ip, mp) { \
				160	ASSERT(ipointer_in == B_FALSE); \
				161	ipointer->ip_mnext = ip->i_mnext; \
				162	ipointer->ip_mprev = ip; \
				163	ip->i_mnext = (xfs_inode_t *)ipointer; \
				164	ipointer->ip_mnext->i_mprev = (xfs_inode_t *)ipointer; \
				165	preempt = 0; \
				166	XFS_MOUNT_IUNLOCK(mp); \
				167	mount_locked = B_FALSE; \
				168	IPOINTER_SET; \
				169	}
				170
				171	/* Remove the marker from the inode list. If the marker was the only item
				172	* in the list then there are no remaining inodes and we should zero out
				173	* the whole list. If we are the current head of the list then move the head
				174	* past us.
				175	*/
				176	#define IPOINTER_REMOVE(ip, mp) { \
				177	ASSERT(ipointer_in == B_TRUE); \
				178	if (ipointer->ip_mnext != (xfs_inode_t *)ipointer) { \
				179	ip = ipointer->ip_mnext; \
				180	ip->i_mprev = ipointer->ip_mprev; \
				181	ipointer->ip_mprev->i_mnext = ip; \
				182	if (mp->m_inodes == (xfs_inode_t *)ipointer) { \
				183	mp->m_inodes = ip; \
				184	} \
				185	} else { \
				186	ASSERT(mp->m_inodes == (xfs_inode_t *)ipointer); \
				187	mp->m_inodes = NULL; \
				188	ip = NULL; \
				189	} \
				190	IPOINTER_CLR; \
				191	}
				192
				193	#define XFS_PREEMPT_MASK 0x7f
				194
				195	ASSERT(!(flags & SYNC_BDFLUSH));
				196
				197	if (bypassed)
				198	*bypassed = 0;
				199	if (mp->m_flags & XFS_MOUNT_RDONLY)
				200	return 0;
				201	error = 0;
				202	last_error = 0;
				203	preempt = 0;
				204
				205	/* Allocate a reference marker */
				206	ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP);
				207
				208	fflag = XFS_B_ASYNC; /* default is don't wait */
				209	if (flags & SYNC_DELWRI)
				210	fflag = XFS_B_DELWRI;
				211	if (flags & SYNC_WAIT)
				212	fflag = 0; /* synchronous overrides all */
				213
				214	base_lock_flags = XFS_ILOCK_SHARED;
				215	if (flags & (SYNC_DELWRI \| SYNC_CLOSE)) {
				216	/*
				217	* We need the I/O lock if we're going to call any of
				218	* the flush/inval routines.
				219	*/
				220	base_lock_flags \|= XFS_IOLOCK_SHARED;
				221	}
				222
				223	XFS_MOUNT_ILOCK(mp);
				224
				225	ip = mp->m_inodes;
				226
				227	mount_locked = B_TRUE;
				228	vnode_refed = B_FALSE;
				229
				230	IPOINTER_CLR;
				231
				232	do {
				233	ASSERT(ipointer_in == B_FALSE);
				234	ASSERT(vnode_refed == B_FALSE);
				235
				236	lock_flags = base_lock_flags;
				237
				238	/*
				239	* There were no inodes in the list, just break out
				240	* of the loop.
				241	*/
				242	if (ip == NULL) {
				243	break;
				244	}
				245
				246	/*
				247	* We found another sync thread marker - skip it
				248	*/
				249	if (ip->i_mount == NULL) {
				250	ip = ip->i_mnext;
				251	continue;
				252	}
				253
				254	vp = VFS_I(ip);
				255
				256	/*
				257	* If the vnode is gone then this is being torn down,
				258	* call reclaim if it is flushed, else let regular flush
				259	* code deal with it later in the loop.
				260	*/
				261
				262	if (vp == NULL) {
				263	/* Skip ones already in reclaim */
				264	if (ip->i_flags & XFS_IRECLAIM) {
				265	ip = ip->i_mnext;
				266	continue;
				267	}
				268	if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
				269	ip = ip->i_mnext;
				270	} else if ((xfs_ipincount(ip) == 0) &&
				271	xfs_iflock_nowait(ip)) {
				272	IPOINTER_INSERT(ip, mp);
				273
				274	xfs_finish_reclaim(ip, 1,
				275	XFS_IFLUSH_DELWRI_ELSE_ASYNC);
				276
				277	XFS_MOUNT_ILOCK(mp);
				278	mount_locked = B_TRUE;
				279	IPOINTER_REMOVE(ip, mp);
				280	} else {
				281	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				282	ip = ip->i_mnext;
				283	}
				284	continue;
				285	}
				286
				287	if (VN_BAD(vp)) {
				288	ip = ip->i_mnext;
				289	continue;
				290	}
				291
				292	if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) {
				293	XFS_MOUNT_IUNLOCK(mp);
				294	kmem_free(ipointer);
				295	return 0;
				296	}
				297
				298	/*
				299	* Try to lock without sleeping. We're out of order with
				300	* the inode list lock here, so if we fail we need to drop
				301	* the mount lock and try again. If we're called from
				302	* bdflush() here, then don't bother.
				303	*
				304	* The inode lock here actually coordinates with the
				305	* almost spurious inode lock in xfs_ireclaim() to prevent
				306	* the vnode we handle here without a reference from
				307	* being freed while we reference it. If we lock the inode
				308	* while it's on the mount list here, then the spurious inode
				309	* lock in xfs_ireclaim() after the inode is pulled from
				310	* the mount list will sleep until we release it here.
				311	* This keeps the vnode from being freed while we reference
				312	* it.
				313	*/
				314	if (xfs_ilock_nowait(ip, lock_flags) == 0) {
				315	if (vp == NULL) {
				316	ip = ip->i_mnext;
				317	continue;
				318	}
				319
				320	vp = vn_grab(vp);
				321	if (vp == NULL) {
				322	ip = ip->i_mnext;
				323	continue;
				324	}
				325
				326	IPOINTER_INSERT(ip, mp);
				327	xfs_ilock(ip, lock_flags);
				328
				329	ASSERT(vp == VFS_I(ip));
				330	ASSERT(ip->i_mount == mp);
				331
				332	vnode_refed = B_TRUE;
				333	}
				334
				335	/* From here on in the loop we may have a marker record
				336	* in the inode list.
				337	*/
				338
				339	/*
				340	* If we have to flush data or wait for I/O completion
				341	* we need to drop the ilock that we currently hold.
				342	* If we need to drop the lock, insert a marker if we
				343	* have not already done so.
				344	*/
				345	if ((flags & (SYNC_CLOSE\|SYNC_IOWAIT)) \|\|
				346	((flags & SYNC_DELWRI) && VN_DIRTY(vp))) {
				347	if (mount_locked) {
				348	IPOINTER_INSERT(ip, mp);
				349	}
				350	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				351
				352	if (flags & SYNC_CLOSE) {
				353	/* Shutdown case. Flush and invalidate. */
				354	if (XFS_FORCED_SHUTDOWN(mp))
				355	xfs_tosspages(ip, 0, -1,
				356	FI_REMAPF);
				357	else
				358	error = xfs_flushinval_pages(ip,
				359	0, -1, FI_REMAPF);
				360	} else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) {
				361	error = xfs_flush_pages(ip, 0,
				362	-1, fflag, FI_NONE);
				363	}
				364
				365	/*
				366	* When freezing, we need to wait ensure all I/O (including direct
				367	* I/O) is complete to ensure no further data modification can take
				368	* place after this point
				369	*/
				370	if (flags & SYNC_IOWAIT)
				371	vn_iowait(ip);
				372
				373	xfs_ilock(ip, XFS_ILOCK_SHARED);
				374	}
				375
				376	if ((flags & SYNC_ATTR) &&
				377	(ip->i_update_core \|\|
				378	(ip->i_itemp && ip->i_itemp->ili_format.ilf_fields))) {
				379	if (mount_locked)
				380	IPOINTER_INSERT(ip, mp);
				381
				382	if (flags & SYNC_WAIT) {
				383	xfs_iflock(ip);
				384	error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
				385
				386	/*
				387	* If we can't acquire the flush lock, then the inode
				388	* is already being flushed so don't bother waiting.
				389	*
				390	* If we can lock it then do a delwri flush so we can
				391	* combine multiple inode flushes in each disk write.
				392	*/
				393	} else if (xfs_iflock_nowait(ip)) {
				394	error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
				395	} else if (bypassed) {
				396	(*bypassed)++;
				397	}
				398	}
				399
				400	if (lock_flags != 0) {
				401	xfs_iunlock(ip, lock_flags);
				402	}
				403
				404	if (vnode_refed) {
				405	/*
				406	* If we had to take a reference on the vnode
				407	* above, then wait until after we've unlocked
				408	* the inode to release the reference. This is
				409	* because we can be already holding the inode
				410	* lock when IRELE() calls xfs_inactive().
				411	*
				412	* Make sure to drop the mount lock before calling
				413	* IRELE() so that we don't trip over ourselves if
				414	* we have to go for the mount lock again in the
				415	* inactive code.
				416	*/
				417	if (mount_locked) {
				418	IPOINTER_INSERT(ip, mp);
				419	}
				420
				421	IRELE(ip);
				422
				423	vnode_refed = B_FALSE;
				424	}
				425
				426	if (error) {
				427	last_error = error;
				428	}
				429
				430	/*
				431	* bail out if the filesystem is corrupted.
				432	*/
				433	if (error == EFSCORRUPTED) {
				434	if (!mount_locked) {
				435	XFS_MOUNT_ILOCK(mp);
				436	IPOINTER_REMOVE(ip, mp);
				437	}
				438	XFS_MOUNT_IUNLOCK(mp);
				439	ASSERT(ipointer_in == B_FALSE);
				440	kmem_free(ipointer);
				441	return XFS_ERROR(error);
				442	}
				443
				444	/* Let other threads have a chance at the mount lock
				445	* if we have looped many times without dropping the
				446	* lock.
				447	*/
				448	if ((++preempt & XFS_PREEMPT_MASK) == 0) {
				449	if (mount_locked) {
				450	IPOINTER_INSERT(ip, mp);
				451	}
				452	}
				453
				454	if (mount_locked == B_FALSE) {
				455	XFS_MOUNT_ILOCK(mp);
				456	mount_locked = B_TRUE;
				457	IPOINTER_REMOVE(ip, mp);
				458	continue;
				459	}
				460
				461	ASSERT(ipointer_in == B_FALSE);
				462	ip = ip->i_mnext;
				463
				464	} while (ip != mp->m_inodes);
				465
				466	XFS_MOUNT_IUNLOCK(mp);
				467
				468	ASSERT(ipointer_in == B_FALSE);
				469
				470	kmem_free(ipointer);
				471	return XFS_ERROR(last_error);
				472	}
				473
				474	/*
				475	* xfs sync routine for internal use
				476	*
				477	* This routine supports all of the flags defined for the generic vfs_sync
				478	* interface as explained above under xfs_sync.
				479	*
				480	*/
				481	int
				482	xfs_syncsub(
				483	xfs_mount_t *mp,
				484	int flags,
				485	int *bypassed)
				486	{
				487	int error = 0;
				488	int last_error = 0;
				489	uint log_flags = XFS_LOG_FORCE;
				490	xfs_buf_t *bp;
				491	xfs_buf_log_item_t *bip;
				492
				493	/*
				494	* Sync out the log. This ensures that the log is periodically
				495	* flushed even if there is not enough activity to fill it up.
				496	*/
				497	if (flags & SYNC_WAIT)
				498	log_flags \|= XFS_LOG_SYNC;
				499
				500	xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
				501
				502	if (flags & (SYNC_ATTR\|SYNC_DELWRI)) {
				503	if (flags & SYNC_BDFLUSH)
				504	xfs_finish_reclaim_all(mp, 1);
				505	else
				506	error = xfs_sync_inodes(mp, flags, bypassed);
				507	}
				508
				509	/*
				510	* Flushing out dirty data above probably generated more
				511	* log activity, so if this isn't vfs_sync() then flush
				512	* the log again.
				513	*/
				514	if (flags & SYNC_DELWRI) {
				515	xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
				516	}
				517
				518	if (flags & SYNC_FSDATA) {
				519	/*
				520	* If this is vfs_sync() then only sync the superblock
				521	* if we can lock it without sleeping and it is not pinned.
				522	*/
				523	if (flags & SYNC_BDFLUSH) {
				524	bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
				525	if (bp != NULL) {
				526	bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*);
				527	if ((bip != NULL) &&
				528	xfs_buf_item_dirty(bip)) {
				529	if (!(XFS_BUF_ISPINNED(bp))) {
				530	XFS_BUF_ASYNC(bp);
				531	error = xfs_bwrite(mp, bp);
				532	} else {
				533	xfs_buf_relse(bp);
				534	}
				535	} else {
				536	xfs_buf_relse(bp);
				537	}
				538	}
				539	} else {
				540	bp = xfs_getsb(mp, 0);
				541	/*
				542	* If the buffer is pinned then push on the log so
				543	* we won't get stuck waiting in the write for
				544	* someone, maybe ourselves, to flush the log.
				545	* Even though we just pushed the log above, we
				546	* did not have the superblock buffer locked at
				547	* that point so it can become pinned in between
				548	* there and here.
				549	*/
				550	if (XFS_BUF_ISPINNED(bp))
				551	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
				552	if (flags & SYNC_WAIT)
				553	XFS_BUF_UNASYNC(bp);
				554	else
				555	XFS_BUF_ASYNC(bp);
				556	error = xfs_bwrite(mp, bp);
				557	}
				558	if (error) {
				559	last_error = error;
				560	}
				561	}
				562
				563	/*
				564	* Now check to see if the log needs a "dummy" transaction.
				565	*/
				566	if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) {
				567	xfs_trans_t *tp;
				568	xfs_inode_t *ip;
				569
				570	/*
				571	* Put a dummy transaction in the log to tell
				572	* recovery that all others are OK.
				573	*/
				574	tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
				575	if ((error = xfs_trans_reserve(tp, 0,
				576	XFS_ICHANGE_LOG_RES(mp),
				577	0, 0, 0))) {
				578	xfs_trans_cancel(tp, 0);
				579	return error;
				580	}
				581
				582	ip = mp->m_rootip;
				583	xfs_ilock(ip, XFS_ILOCK_EXCL);
				584
				585	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
				586	xfs_trans_ihold(tp, ip);
				587	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				588	error = xfs_trans_commit(tp, 0);
				589	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				590	xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
				591	}
				592
				593	/*
				594	* When shutting down, we need to insure that the AIL is pushed
				595	* to disk or the filesystem can appear corrupt from the PROM.
				596	*/
				597	if ((flags & (SYNC_CLOSE\|SYNC_WAIT)) == (SYNC_CLOSE\|SYNC_WAIT)) {
				598	XFS_bflush(mp->m_ddev_targp);
				599	if (mp->m_rtdev_targp) {
				600	XFS_bflush(mp->m_rtdev_targp);
				601	}
				602	}
				603
				604	return XFS_ERROR(last_error);
				605	}