blob: 7eb9d9cca52a26ad4088a08cb06478f5fa772f42 [file] [log] [blame]
David Chinnerfe4fa4b2008-10-30 17:06:08 +11001/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_bit.h"
22#include "xfs_log.h"
23#include "xfs_inum.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h"
30#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_inode.h"
37#include "xfs_dinode.h"
38#include "xfs_error.h"
39#include "xfs_mru_cache.h"
40#include "xfs_filestream.h"
41#include "xfs_vnodeops.h"
42#include "xfs_utils.h"
43#include "xfs_buf_item.h"
44#include "xfs_inode_item.h"
45#include "xfs_rw.h"
Christoph Hellwig7d095252009-06-08 15:33:32 +020046#include "xfs_quota.h"
David Chinnerfe4fa4b2008-10-30 17:06:08 +110047
David Chinnera167b172008-10-30 17:06:18 +110048#include <linux/kthread.h>
49#include <linux/freezer.h>
50
Dave Chinner5a34d5c2009-06-08 15:35:03 +020051
Dave Chinner1da8eec2009-06-08 15:35:07 +020052/* must be called with pag_ici_lock held and releases it */
53STATIC int
54xfs_sync_inode_valid(
55 struct xfs_inode *ip,
56 struct xfs_perag *pag)
57{
58 struct inode *inode = VFS_I(ip);
59
60 /* nothing to sync during shutdown */
61 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
62 read_unlock(&pag->pag_ici_lock);
63 return EFSCORRUPTED;
64 }
65
66 /*
67 * If we can't get a reference on the inode, it must be in reclaim.
68 * Leave it for the reclaim code to flush. Also avoid inodes that
69 * haven't been fully initialised.
70 */
71 if (!igrab(inode)) {
72 read_unlock(&pag->pag_ici_lock);
73 return ENOENT;
74 }
75 read_unlock(&pag->pag_ici_lock);
76
77 if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) {
78 IRELE(ip);
79 return ENOENT;
80 }
81
82 return 0;
83}
84
Dave Chinner5a34d5c2009-06-08 15:35:03 +020085STATIC int
86xfs_sync_inode_data(
87 struct xfs_inode *ip,
88 int flags)
89{
90 struct inode *inode = VFS_I(ip);
91 struct address_space *mapping = inode->i_mapping;
92 int error = 0;
93
94 if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
95 goto out_wait;
96
97 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
98 if (flags & SYNC_TRYLOCK)
99 goto out_wait;
100 xfs_ilock(ip, XFS_IOLOCK_SHARED);
101 }
102
103 error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
104 0 : XFS_B_ASYNC, FI_NONE);
105 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
106
107 out_wait:
108 if (flags & SYNC_IOWAIT)
109 xfs_ioend_wait(ip);
110 return error;
111}
112
Christoph Hellwig845b6d02009-06-08 15:35:05 +0200113STATIC int
114xfs_sync_inode_attr(
115 struct xfs_inode *ip,
116 int flags)
117{
118 int error = 0;
119
120 xfs_ilock(ip, XFS_ILOCK_SHARED);
121 if (xfs_inode_clean(ip))
122 goto out_unlock;
123 if (!xfs_iflock_nowait(ip)) {
124 if (!(flags & SYNC_WAIT))
125 goto out_unlock;
126 xfs_iflock(ip);
127 }
128
129 if (xfs_inode_clean(ip)) {
130 xfs_ifunlock(ip);
131 goto out_unlock;
132 }
133
134 error = xfs_iflush(ip, (flags & SYNC_WAIT) ?
135 XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI);
136
137 out_unlock:
138 xfs_iunlock(ip, XFS_ILOCK_SHARED);
139 return error;
140}
141
David Chinnerfe4fa4b2008-10-30 17:06:08 +1100142/*
David Chinner683a8972008-10-30 17:07:29 +1100143 * Sync all the inodes in the given AG according to the
144 * direction given by the flags.
David Chinnerfe4fa4b2008-10-30 17:06:08 +1100145 */
David Chinner683a8972008-10-30 17:07:29 +1100146STATIC int
147xfs_sync_inodes_ag(
148 xfs_mount_t *mp,
149 int ag,
David Chinner2030b5a2008-10-30 17:15:12 +1100150 int flags)
David Chinner683a8972008-10-30 17:07:29 +1100151{
David Chinner683a8972008-10-30 17:07:29 +1100152 xfs_perag_t *pag = &mp->m_perag[ag];
David Chinner683a8972008-10-30 17:07:29 +1100153 int nr_found;
David Chinner8c38ab02008-10-30 17:38:00 +1100154 uint32_t first_index = 0;
David Chinner683a8972008-10-30 17:07:29 +1100155 int error = 0;
156 int last_error = 0;
David Chinner683a8972008-10-30 17:07:29 +1100157
David Chinner683a8972008-10-30 17:07:29 +1100158 do {
David Chinnerbc60a992008-10-30 17:15:03 +1100159 xfs_inode_t *ip = NULL;
160
David Chinner683a8972008-10-30 17:07:29 +1100161 /*
162 * use a gang lookup to find the next inode in the tree
163 * as the tree is sparse and a gang lookup walks to find
164 * the number of objects requested.
165 */
166 read_lock(&pag->pag_ici_lock);
167 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
168 (void**)&ip, first_index, 1);
169
170 if (!nr_found) {
171 read_unlock(&pag->pag_ici_lock);
172 break;
173 }
174
David Chinner8c38ab02008-10-30 17:38:00 +1100175 /*
176 * Update the index for the next lookup. Catch overflows
177 * into the next AG range which can occur if we have inodes
178 * in the last block of the AG and we are currently
179 * pointing to the last inode.
180 */
David Chinner683a8972008-10-30 17:07:29 +1100181 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
David Chinner8c38ab02008-10-30 17:38:00 +1100182 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
183 read_unlock(&pag->pag_ici_lock);
184 break;
185 }
David Chinner683a8972008-10-30 17:07:29 +1100186
Dave Chinner1da8eec2009-06-08 15:35:07 +0200187 error = xfs_sync_inode_valid(ip, pag);
188 if (error) {
189 if (error == EFSCORRUPTED)
190 return 0;
David Chinner455486b2008-10-30 18:03:14 +1100191 continue;
David Chinner683a8972008-10-30 17:07:29 +1100192 }
David Chinnerbc60a992008-10-30 17:15:03 +1100193
David Chinner683a8972008-10-30 17:07:29 +1100194 /*
195 * If we have to flush data or wait for I/O completion
David Chinner455486b2008-10-30 18:03:14 +1100196 * we need to hold the iolock.
David Chinner683a8972008-10-30 17:07:29 +1100197 */
Dave Chinner5a34d5c2009-06-08 15:35:03 +0200198 if (flags & SYNC_DELWRI)
199 error = xfs_sync_inode_data(ip, flags);
David Chinner683a8972008-10-30 17:07:29 +1100200
Christoph Hellwig845b6d02009-06-08 15:35:05 +0200201 if (flags & SYNC_ATTR)
202 error = xfs_sync_inode_attr(ip, flags);
203
204 IRELE(ip);
David Chinner683a8972008-10-30 17:07:29 +1100205
206 if (error)
207 last_error = error;
208 /*
209 * bail out if the filesystem is corrupted.
210 */
211 if (error == EFSCORRUPTED)
212 return XFS_ERROR(error);
213
214 } while (nr_found);
215
216 return last_error;
217}
218
David Chinnerfe4fa4b2008-10-30 17:06:08 +1100219int
220xfs_sync_inodes(
221 xfs_mount_t *mp,
David Chinner2030b5a2008-10-30 17:15:12 +1100222 int flags)
David Chinnerfe4fa4b2008-10-30 17:06:08 +1100223{
David Chinnerfe4fa4b2008-10-30 17:06:08 +1100224 int error;
225 int last_error;
David Chinner683a8972008-10-30 17:07:29 +1100226 int i;
David Chinnere9f1c6e2008-10-30 17:15:50 +1100227 int lflags = XFS_LOG_FORCE;
David Chinnerfe4fa4b2008-10-30 17:06:08 +1100228
David Chinnerfe4fa4b2008-10-30 17:06:08 +1100229 if (mp->m_flags & XFS_MOUNT_RDONLY)
230 return 0;
231 error = 0;
232 last_error = 0;
David Chinnerfe4fa4b2008-10-30 17:06:08 +1100233
David Chinnere9f1c6e2008-10-30 17:15:50 +1100234 if (flags & SYNC_WAIT)
235 lflags |= XFS_LOG_SYNC;
236
David Chinner683a8972008-10-30 17:07:29 +1100237 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
238 if (!mp->m_perag[i].pag_ici_init)
David Chinnerfe4fa4b2008-10-30 17:06:08 +1100239 continue;
David Chinner2030b5a2008-10-30 17:15:12 +1100240 error = xfs_sync_inodes_ag(mp, i, flags);
David Chinner683a8972008-10-30 17:07:29 +1100241 if (error)
David Chinnerfe4fa4b2008-10-30 17:06:08 +1100242 last_error = error;
David Chinner683a8972008-10-30 17:07:29 +1100243 if (error == EFSCORRUPTED)
244 break;
245 }
David Chinnere9f1c6e2008-10-30 17:15:50 +1100246 if (flags & SYNC_DELWRI)
247 xfs_log_force(mp, 0, lflags);
248
David Chinnerfe4fa4b2008-10-30 17:06:08 +1100249 return XFS_ERROR(last_error);
250}
251
Christoph Hellwig2af75df2008-10-30 17:14:53 +1100252STATIC int
253xfs_commit_dummy_trans(
254 struct xfs_mount *mp,
255 uint log_flags)
256{
257 struct xfs_inode *ip = mp->m_rootip;
258 struct xfs_trans *tp;
259 int error;
260
261 /*
262 * Put a dummy transaction in the log to tell recovery
263 * that all others are OK.
264 */
265 tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
266 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
267 if (error) {
268 xfs_trans_cancel(tp, 0);
269 return error;
270 }
271
272 xfs_ilock(ip, XFS_ILOCK_EXCL);
273
274 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
275 xfs_trans_ihold(tp, ip);
276 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
277 /* XXX(hch): ignoring the error here.. */
278 error = xfs_trans_commit(tp, 0);
279
280 xfs_iunlock(ip, XFS_ILOCK_EXCL);
281
282 xfs_log_force(mp, 0, log_flags);
283 return 0;
284}
285
David Chinnere9f1c6e2008-10-30 17:15:50 +1100286int
Christoph Hellwig2af75df2008-10-30 17:14:53 +1100287xfs_sync_fsdata(
288 struct xfs_mount *mp,
289 int flags)
290{
291 struct xfs_buf *bp;
292 struct xfs_buf_log_item *bip;
293 int error = 0;
294
295 /*
296 * If this is xfssyncd() then only sync the superblock if we can
297 * lock it without sleeping and it is not pinned.
298 */
299 if (flags & SYNC_BDFLUSH) {
300 ASSERT(!(flags & SYNC_WAIT));
301
302 bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
303 if (!bp)
304 goto out;
305
306 bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *);
307 if (!bip || !xfs_buf_item_dirty(bip) || XFS_BUF_ISPINNED(bp))
308 goto out_brelse;
309 } else {
310 bp = xfs_getsb(mp, 0);
311
312 /*
313 * If the buffer is pinned then push on the log so we won't
314 * get stuck waiting in the write for someone, maybe
315 * ourselves, to flush the log.
316 *
317 * Even though we just pushed the log above, we did not have
318 * the superblock buffer locked at that point so it can
319 * become pinned in between there and here.
320 */
321 if (XFS_BUF_ISPINNED(bp))
322 xfs_log_force(mp, 0, XFS_LOG_FORCE);
323 }
324
325
326 if (flags & SYNC_WAIT)
327 XFS_BUF_UNASYNC(bp);
328 else
329 XFS_BUF_ASYNC(bp);
330
331 return xfs_bwrite(mp, bp);
332
333 out_brelse:
334 xfs_buf_relse(bp);
335 out:
336 return error;
337}
338
David Chinnerfe4fa4b2008-10-30 17:06:08 +1100339/*
David Chinnera4e4c4f2008-10-30 17:16:11 +1100340 * When remounting a filesystem read-only or freezing the filesystem, we have
341 * two phases to execute. This first phase is syncing the data before we
342 * quiesce the filesystem, and the second is flushing all the inodes out after
343 * we've waited for all the transactions created by the first phase to
344 * complete. The second phase ensures that the inodes are written to their
345 * location on disk rather than just existing in transactions in the log. This
346 * means after a quiesce there is no log replay required to write the inodes to
347 * disk (this is the main difference between a sync and a quiesce).
348 */
349/*
350 * First stage of freeze - no writers will make progress now we are here,
David Chinnere9f1c6e2008-10-30 17:15:50 +1100351 * so we flush delwri and delalloc buffers here, then wait for all I/O to
352 * complete. Data is frozen at that point. Metadata is not frozen,
David Chinnera4e4c4f2008-10-30 17:16:11 +1100353 * transactions can still occur here so don't bother flushing the buftarg
354 * because it'll just get dirty again.
David Chinnere9f1c6e2008-10-30 17:15:50 +1100355 */
356int
357xfs_quiesce_data(
358 struct xfs_mount *mp)
359{
360 int error;
361
362 /* push non-blocking */
363 xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH);
Christoph Hellwig7d095252009-06-08 15:33:32 +0200364 xfs_qm_sync(mp, SYNC_BDFLUSH);
David Chinnere9f1c6e2008-10-30 17:15:50 +1100365 xfs_filestream_flush(mp);
366
367 /* push and block */
368 xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT);
Christoph Hellwig7d095252009-06-08 15:33:32 +0200369 xfs_qm_sync(mp, SYNC_WAIT);
David Chinnere9f1c6e2008-10-30 17:15:50 +1100370
David Chinnera4e4c4f2008-10-30 17:16:11 +1100371 /* write superblock and hoover up shutdown errors */
David Chinnere9f1c6e2008-10-30 17:15:50 +1100372 error = xfs_sync_fsdata(mp, 0);
373
David Chinnera4e4c4f2008-10-30 17:16:11 +1100374 /* flush data-only devices */
David Chinnere9f1c6e2008-10-30 17:15:50 +1100375 if (mp->m_rtdev_targp)
376 XFS_bflush(mp->m_rtdev_targp);
377
378 return error;
379}
380
David Chinner76bf1052008-10-30 17:16:21 +1100381STATIC void
382xfs_quiesce_fs(
383 struct xfs_mount *mp)
384{
385 int count = 0, pincount;
386
387 xfs_flush_buftarg(mp->m_ddev_targp, 0);
Dave Chinnerabc10642009-06-08 15:35:12 +0200388 xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
David Chinner76bf1052008-10-30 17:16:21 +1100389
390 /*
391 * This loop must run at least twice. The first instance of the loop
392 * will flush most meta data but that will generate more meta data
393 * (typically directory updates). Which then must be flushed and
394 * logged before we can write the unmount record.
395 */
396 do {
397 xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT);
398 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
399 if (!pincount) {
400 delay(50);
401 count++;
402 }
403 } while (count < 2);
404}
405
406/*
407 * Second stage of a quiesce. The data is already synced, now we have to take
408 * care of the metadata. New transactions are already blocked, so we need to
409 * wait for any remaining transactions to drain out before proceding.
410 */
411void
412xfs_quiesce_attr(
413 struct xfs_mount *mp)
414{
415 int error = 0;
416
417 /* wait for all modifications to complete */
418 while (atomic_read(&mp->m_active_trans) > 0)
419 delay(100);
420
421 /* flush inodes and push all remaining buffers out to disk */
422 xfs_quiesce_fs(mp);
423
Felix Blyakher5e106572009-01-22 21:34:05 -0600424 /*
425 * Just warn here till VFS can correctly support
426 * read-only remount without racing.
427 */
428 WARN_ON(atomic_read(&mp->m_active_trans) != 0);
David Chinner76bf1052008-10-30 17:16:21 +1100429
430 /* Push the superblock and write an unmount record */
431 error = xfs_log_sbcount(mp, 1);
432 if (error)
433 xfs_fs_cmn_err(CE_WARN, mp,
434 "xfs_attr_quiesce: failed to log sb changes. "
435 "Frozen image may not be consistent.");
436 xfs_log_unmount_write(mp);
437 xfs_unmountfs_writesb(mp);
438}
439
David Chinnere9f1c6e2008-10-30 17:15:50 +1100440/*
David Chinnera167b172008-10-30 17:06:18 +1100441 * Enqueue a work item to be picked up by the vfs xfssyncd thread.
442 * Doing this has two advantages:
443 * - It saves on stack space, which is tight in certain situations
444 * - It can be used (with care) as a mechanism to avoid deadlocks.
445 * Flushing while allocating in a full filesystem requires both.
446 */
447STATIC void
448xfs_syncd_queue_work(
449 struct xfs_mount *mp,
450 void *data,
Dave Chinnere43afd72009-04-06 18:47:27 +0200451 void (*syncer)(struct xfs_mount *, void *),
452 struct completion *completion)
David Chinnera167b172008-10-30 17:06:18 +1100453{
Dave Chinnera8d770d2009-04-06 18:44:54 +0200454 struct xfs_sync_work *work;
David Chinnera167b172008-10-30 17:06:18 +1100455
Dave Chinnera8d770d2009-04-06 18:44:54 +0200456 work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
David Chinnera167b172008-10-30 17:06:18 +1100457 INIT_LIST_HEAD(&work->w_list);
458 work->w_syncer = syncer;
459 work->w_data = data;
460 work->w_mount = mp;
Dave Chinnere43afd72009-04-06 18:47:27 +0200461 work->w_completion = completion;
David Chinnera167b172008-10-30 17:06:18 +1100462 spin_lock(&mp->m_sync_lock);
463 list_add_tail(&work->w_list, &mp->m_sync_list);
464 spin_unlock(&mp->m_sync_lock);
465 wake_up_process(mp->m_sync_task);
466}
467
468/*
469 * Flush delayed allocate data, attempting to free up reserved space
470 * from existing allocations. At this point a new allocation attempt
471 * has failed with ENOSPC and we are in the process of scratching our
472 * heads, looking about for more room...
473 */
474STATIC void
Dave Chinnera8d770d2009-04-06 18:44:54 +0200475xfs_flush_inodes_work(
David Chinnera167b172008-10-30 17:06:18 +1100476 struct xfs_mount *mp,
477 void *arg)
478{
479 struct inode *inode = arg;
Dave Chinnera8d770d2009-04-06 18:44:54 +0200480 xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK);
481 xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT);
David Chinnera167b172008-10-30 17:06:18 +1100482 iput(inode);
483}
484
485void
Dave Chinnera8d770d2009-04-06 18:44:54 +0200486xfs_flush_inodes(
David Chinnera167b172008-10-30 17:06:18 +1100487 xfs_inode_t *ip)
488{
489 struct inode *inode = VFS_I(ip);
Dave Chinnere43afd72009-04-06 18:47:27 +0200490 DECLARE_COMPLETION_ONSTACK(completion);
David Chinnera167b172008-10-30 17:06:18 +1100491
492 igrab(inode);
Dave Chinnere43afd72009-04-06 18:47:27 +0200493 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
494 wait_for_completion(&completion);
David Chinnera167b172008-10-30 17:06:18 +1100495 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
496}
497
David Chinneraacaa882008-10-30 17:15:29 +1100498/*
499 * Every sync period we need to unpin all items, reclaim inodes, sync
500 * quota and write out the superblock. We might need to cover the log
501 * to indicate it is idle.
502 */
David Chinnera167b172008-10-30 17:06:18 +1100503STATIC void
504xfs_sync_worker(
505 struct xfs_mount *mp,
506 void *unused)
507{
508 int error;
509
David Chinneraacaa882008-10-30 17:15:29 +1100510 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
511 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
Dave Chinnerabc10642009-06-08 15:35:12 +0200512 xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
David Chinneraacaa882008-10-30 17:15:29 +1100513 /* dgc: errors ignored here */
Christoph Hellwig7d095252009-06-08 15:33:32 +0200514 error = xfs_qm_sync(mp, SYNC_BDFLUSH);
David Chinneraacaa882008-10-30 17:15:29 +1100515 error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
516 if (xfs_log_need_covered(mp))
517 error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
518 }
David Chinnera167b172008-10-30 17:06:18 +1100519 mp->m_sync_seq++;
520 wake_up(&mp->m_wait_single_sync_task);
521}
522
523STATIC int
524xfssyncd(
525 void *arg)
526{
527 struct xfs_mount *mp = arg;
528 long timeleft;
Dave Chinnera8d770d2009-04-06 18:44:54 +0200529 xfs_sync_work_t *work, *n;
David Chinnera167b172008-10-30 17:06:18 +1100530 LIST_HEAD (tmp);
531
532 set_freezable();
533 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
534 for (;;) {
535 timeleft = schedule_timeout_interruptible(timeleft);
536 /* swsusp */
537 try_to_freeze();
538 if (kthread_should_stop() && list_empty(&mp->m_sync_list))
539 break;
540
541 spin_lock(&mp->m_sync_lock);
542 /*
543 * We can get woken by laptop mode, to do a sync -
544 * that's the (only!) case where the list would be
545 * empty with time remaining.
546 */
547 if (!timeleft || list_empty(&mp->m_sync_list)) {
548 if (!timeleft)
549 timeleft = xfs_syncd_centisecs *
550 msecs_to_jiffies(10);
551 INIT_LIST_HEAD(&mp->m_sync_work.w_list);
552 list_add_tail(&mp->m_sync_work.w_list,
553 &mp->m_sync_list);
554 }
555 list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
556 list_move(&work->w_list, &tmp);
557 spin_unlock(&mp->m_sync_lock);
558
559 list_for_each_entry_safe(work, n, &tmp, w_list) {
560 (*work->w_syncer)(mp, work->w_data);
561 list_del(&work->w_list);
562 if (work == &mp->m_sync_work)
563 continue;
Dave Chinnere43afd72009-04-06 18:47:27 +0200564 if (work->w_completion)
565 complete(work->w_completion);
David Chinnera167b172008-10-30 17:06:18 +1100566 kmem_free(work);
567 }
568 }
569
570 return 0;
571}
572
573int
574xfs_syncd_init(
575 struct xfs_mount *mp)
576{
577 mp->m_sync_work.w_syncer = xfs_sync_worker;
578 mp->m_sync_work.w_mount = mp;
Dave Chinnere43afd72009-04-06 18:47:27 +0200579 mp->m_sync_work.w_completion = NULL;
David Chinnera167b172008-10-30 17:06:18 +1100580 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
581 if (IS_ERR(mp->m_sync_task))
582 return -PTR_ERR(mp->m_sync_task);
583 return 0;
584}
585
586void
587xfs_syncd_stop(
588 struct xfs_mount *mp)
589{
590 kthread_stop(mp->m_sync_task);
591}
592
David Chinnerfce08f22008-10-30 17:37:03 +1100593int
David Chinner1dc33182008-10-30 17:37:15 +1100594xfs_reclaim_inode(
David Chinnerfce08f22008-10-30 17:37:03 +1100595 xfs_inode_t *ip,
596 int locked,
597 int sync_mode)
598{
599 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
600
601 /* The hash lock here protects a thread in xfs_iget_core from
602 * racing with us on linking the inode back with a vnode.
603 * Once we have the XFS_IRECLAIM flag set it will not touch
604 * us.
605 */
606 write_lock(&pag->pag_ici_lock);
607 spin_lock(&ip->i_flags_lock);
608 if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
609 !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
610 spin_unlock(&ip->i_flags_lock);
611 write_unlock(&pag->pag_ici_lock);
612 if (locked) {
613 xfs_ifunlock(ip);
614 xfs_iunlock(ip, XFS_ILOCK_EXCL);
615 }
616 return 1;
617 }
618 __xfs_iflags_set(ip, XFS_IRECLAIM);
619 spin_unlock(&ip->i_flags_lock);
620 write_unlock(&pag->pag_ici_lock);
621 xfs_put_perag(ip->i_mount, pag);
622
623 /*
624 * If the inode is still dirty, then flush it out. If the inode
625 * is not in the AIL, then it will be OK to flush it delwri as
626 * long as xfs_iflush() does not keep any references to the inode.
627 * We leave that decision up to xfs_iflush() since it has the
628 * knowledge of whether it's OK to simply do a delwri flush of
629 * the inode or whether we need to wait until the inode is
630 * pulled from the AIL.
631 * We get the flush lock regardless, though, just to make sure
632 * we don't free it while it is being flushed.
633 */
634 if (!locked) {
635 xfs_ilock(ip, XFS_ILOCK_EXCL);
636 xfs_iflock(ip);
637 }
638
639 /*
640 * In the case of a forced shutdown we rely on xfs_iflush() to
641 * wait for the inode to be unpinned before returning an error.
642 */
643 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
644 /* synchronize with xfs_iflush_done */
645 xfs_iflock(ip);
646 xfs_ifunlock(ip);
647 }
648
649 xfs_iunlock(ip, XFS_ILOCK_EXCL);
650 xfs_ireclaim(ip);
651 return 0;
652}
653
David Chinner11654512008-10-30 17:37:49 +1100654/*
655 * We set the inode flag atomically with the radix tree tag.
656 * Once we get tag lookups on the radix tree, this inode flag
657 * can go away.
658 */
David Chinner396beb82008-10-30 17:37:26 +1100659void
660xfs_inode_set_reclaim_tag(
661 xfs_inode_t *ip)
662{
663 xfs_mount_t *mp = ip->i_mount;
664 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
665
666 read_lock(&pag->pag_ici_lock);
667 spin_lock(&ip->i_flags_lock);
668 radix_tree_tag_set(&pag->pag_ici_root,
669 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
David Chinner11654512008-10-30 17:37:49 +1100670 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
David Chinner396beb82008-10-30 17:37:26 +1100671 spin_unlock(&ip->i_flags_lock);
672 read_unlock(&pag->pag_ici_lock);
673 xfs_put_perag(mp, pag);
674}
675
676void
677__xfs_inode_clear_reclaim_tag(
678 xfs_mount_t *mp,
679 xfs_perag_t *pag,
680 xfs_inode_t *ip)
681{
682 radix_tree_tag_clear(&pag->pag_ici_root,
683 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
684}
685
686void
687xfs_inode_clear_reclaim_tag(
688 xfs_inode_t *ip)
689{
690 xfs_mount_t *mp = ip->i_mount;
691 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
692
693 read_lock(&pag->pag_ici_lock);
694 spin_lock(&ip->i_flags_lock);
695 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
696 spin_unlock(&ip->i_flags_lock);
697 read_unlock(&pag->pag_ici_lock);
698 xfs_put_perag(mp, pag);
699}
700
David Chinner7a3be022008-10-30 17:37:37 +1100701
702STATIC void
703xfs_reclaim_inodes_ag(
704 xfs_mount_t *mp,
705 int ag,
David Chinner7a3be022008-10-30 17:37:37 +1100706 int mode)
707{
708 xfs_inode_t *ip = NULL;
709 xfs_perag_t *pag = &mp->m_perag[ag];
710 int nr_found;
David Chinner8c38ab02008-10-30 17:38:00 +1100711 uint32_t first_index;
David Chinner7a3be022008-10-30 17:37:37 +1100712 int skipped;
713
714restart:
715 first_index = 0;
716 skipped = 0;
717 do {
718 /*
719 * use a gang lookup to find the next inode in the tree
720 * as the tree is sparse and a gang lookup walks to find
721 * the number of objects requested.
722 */
723 read_lock(&pag->pag_ici_lock);
724 nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
725 (void**)&ip, first_index, 1,
726 XFS_ICI_RECLAIM_TAG);
727
728 if (!nr_found) {
729 read_unlock(&pag->pag_ici_lock);
730 break;
731 }
732
David Chinner8c38ab02008-10-30 17:38:00 +1100733 /*
734 * Update the index for the next lookup. Catch overflows
735 * into the next AG range which can occur if we have inodes
736 * in the last block of the AG and we are currently
737 * pointing to the last inode.
738 */
David Chinner7a3be022008-10-30 17:37:37 +1100739 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
David Chinner8c38ab02008-10-30 17:38:00 +1100740 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
741 read_unlock(&pag->pag_ici_lock);
742 break;
743 }
David Chinner7a3be022008-10-30 17:37:37 +1100744
David Chinner7a3be022008-10-30 17:37:37 +1100745 /* ignore if already under reclaim */
746 if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
747 read_unlock(&pag->pag_ici_lock);
748 continue;
749 }
750
David Chinner7a3be022008-10-30 17:37:37 +1100751 read_unlock(&pag->pag_ici_lock);
752
753 /*
754 * hmmm - this is an inode already in reclaim. Do
755 * we even bother catching it here?
756 */
Dave Chinnerabc10642009-06-08 15:35:12 +0200757 if (xfs_reclaim_inode(ip, 0, mode))
David Chinner7a3be022008-10-30 17:37:37 +1100758 skipped++;
759 } while (nr_found);
760
761 if (skipped) {
762 delay(1);
763 goto restart;
764 }
765 return;
766
767}
768
David Chinnerfce08f22008-10-30 17:37:03 +1100769int
David Chinner1dc33182008-10-30 17:37:15 +1100770xfs_reclaim_inodes(
David Chinnerfce08f22008-10-30 17:37:03 +1100771 xfs_mount_t *mp,
David Chinnerfce08f22008-10-30 17:37:03 +1100772 int mode)
773{
David Chinner7a3be022008-10-30 17:37:37 +1100774 int i;
David Chinnerfce08f22008-10-30 17:37:03 +1100775
David Chinner7a3be022008-10-30 17:37:37 +1100776 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
777 if (!mp->m_perag[i].pag_ici_init)
778 continue;
Dave Chinnerabc10642009-06-08 15:35:12 +0200779 xfs_reclaim_inodes_ag(mp, i, mode);
David Chinnerfce08f22008-10-30 17:37:03 +1100780 }
David Chinnerfce08f22008-10-30 17:37:03 +1100781 return 0;
782}
783
784