f2fs: handle EIO not to break fs consistency

There are two rules when EIO is occurred.
1. don't write any checkpoint data to preserve the previous checkpoint
2. don't lose the cached dentry/node/meta pages

So, at first, this patch adds set_page_dirty in f2fs_write_end_io's failure.
Then, writing checkpoint/dentry/node blocks is not allowed.

Note that, for the data pages, we can't just throw away by redirtying them.
Otherwise, kworker can fall into infinite loop to flush them.
(Ref. xfstests/019)

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 47709db..fcd1d9e 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -160,14 +160,11 @@
 		goto redirty_out;
 	if (wbc->for_reclaim)
 		goto redirty_out;
-
-	/* Should not write any meta pages, if any IO error was occurred */
 	if (unlikely(f2fs_cp_error(sbi)))
-		goto no_write;
+		goto redirty_out;
 
 	f2fs_wait_on_page_writeback(page, META);
 	write_meta_page(sbi, page);
-no_write:
 	dec_page_count(sbi, F2FS_DIRTY_META);
 	unlock_page(page);
 	return 0;
@@ -737,7 +734,7 @@
 /*
  * Freeze all the FS-operations for checkpoint.
  */
-static void block_operations(struct f2fs_sb_info *sbi)
+static int block_operations(struct f2fs_sb_info *sbi)
 {
 	struct writeback_control wbc = {
 		.sync_mode = WB_SYNC_ALL,
@@ -745,6 +742,7 @@
 		.for_reclaim = 0,
 	};
 	struct blk_plug plug;
+	int err = 0;
 
 	blk_start_plug(&plug);
 
@@ -754,6 +752,10 @@
 	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
 		f2fs_unlock_all(sbi);
 		sync_dirty_dir_inodes(sbi);
+		if (unlikely(f2fs_cp_error(sbi))) {
+			err = -EIO;
+			goto out;
+		}
 		goto retry_flush_dents;
 	}
 
@@ -767,9 +769,16 @@
 	if (get_pages(sbi, F2FS_DIRTY_NODES)) {
 		up_write(&sbi->node_write);
 		sync_node_pages(sbi, 0, &wbc);
+		if (unlikely(f2fs_cp_error(sbi))) {
+			f2fs_unlock_all(sbi);
+			err = -EIO;
+			goto out;
+		}
 		goto retry_flush_nodes;
 	}
+out:
 	blk_finish_plug(&plug);
+	return err;
 }
 
 static void unblock_operations(struct f2fs_sb_info *sbi)
@@ -812,8 +821,11 @@
 	discard_next_dnode(sbi);
 
 	/* Flush all the NAT/SIT pages */
-	while (get_pages(sbi, F2FS_DIRTY_META))
+	while (get_pages(sbi, F2FS_DIRTY_META)) {
 		sync_meta_pages(sbi, META, LONG_MAX);
+		if (unlikely(f2fs_cp_error(sbi)))
+			return;
+	}
 
 	next_free_nid(sbi, &last_nid);
 
@@ -923,6 +935,9 @@
 	/* wait for previous submitted node/meta pages writeback */
 	wait_on_all_pages_writeback(sbi);
 
+	if (unlikely(f2fs_cp_error(sbi)))
+		return;
+
 	filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
 	filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
 
@@ -933,11 +948,13 @@
 	/* Here, we only have one bio having CP pack */
 	sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
 
-	if (!f2fs_cp_error(sbi)) {
-		clear_prefree_segments(sbi);
-		release_dirty_inode(sbi);
-		F2FS_RESET_SB_DIRT(sbi);
-	}
+	release_dirty_inode(sbi);
+
+	if (unlikely(f2fs_cp_error(sbi)))
+		return;
+
+	clear_prefree_segments(sbi);
+	F2FS_RESET_SB_DIRT(sbi);
 }
 
 /*
@@ -954,8 +971,10 @@
 
 	if (!sbi->s_dirty)
 		goto out;
-
-	block_operations(sbi);
+	if (unlikely(f2fs_cp_error(sbi)))
+		goto out;
+	if (block_operations(sbi))
+		goto out;
 
 	trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops");
 
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 85a0b71..0cce322 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -53,7 +53,7 @@
 		struct page *page = bvec->bv_page;
 
 		if (unlikely(err)) {
-			SetPageError(page);
+			set_page_dirty(page);
 			set_bit(AS_EIO, &page->mapping->flags);
 			set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
 			sbi->sb->s_flags |= MS_RDONLY;
@@ -837,10 +837,19 @@
 
 	/* Dentry blocks are controlled by checkpoint */
 	if (S_ISDIR(inode->i_mode)) {
+		if (unlikely(f2fs_cp_error(sbi)))
+			goto redirty_out;
 		err = do_write_data_page(page, &fio);
 		goto done;
 	}
 
+	/* we should bypass data pages to proceed the kworkder jobs */
+	if (unlikely(f2fs_cp_error(sbi))) {
+		SetPageError(page);
+		unlock_page(page);
+		return 0;
+	}
+
 	if (!wbc->for_reclaim)
 		need_balance_fs = true;
 	else if (has_not_enough_free_secs(sbi, 0))
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 7bbb335..ebb9806 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1215,6 +1215,8 @@
 
 	if (unlikely(sbi->por_doing))
 		goto redirty_out;
+	if (unlikely(f2fs_cp_error(sbi)))
+		goto redirty_out;
 
 	f2fs_wait_on_page_writeback(page, NODE);
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index fffa118..a1053d6 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -435,7 +435,10 @@
 	if (sbi->s_dirty)
 		write_checkpoint(sbi, true);
 
-	/* normally superblock is clean, so we need to release this */
+	/*
+	 * normally superblock is clean, so we need to release this.
+	 * In addition, EIO will skip do checkpoint, we need this as well.
+	 */
 	release_dirty_inode(sbi);
 
 	iput(sbi->node_inode);