f2fs: Update to proper 3.4 support
http://git.kernel.org/cgit/linux/kernel/git/jaegeuk/f2fs.git/log/?h=linux-3.4
Change-Id: Ica17871e847d4807c7ef24ea77dd4bff5451f5c7
Conflicts:
fs/f2fs/f2fs.h
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 9c71fc5..2385ef8 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -17,9 +17,6 @@
#include "xattr.h"
#include "acl.h"
-#define get_inode_mode(i) ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
- (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
-
static inline size_t f2fs_acl_size(int count)
{
if (count <= 4) {
@@ -82,7 +79,6 @@
case ACL_GROUP_OBJ:
case ACL_MASK:
case ACL_OTHER:
- acl->a_entries[i].e_id = ACL_UNDEFINED_ID;
entry = (struct f2fs_acl_entry *)((char *)entry +
sizeof(struct f2fs_acl_entry_short));
break;
@@ -285,7 +281,7 @@
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct posix_acl *acl;
int error;
- mode_t mode = get_inode_mode(inode);
+ umode_t mode = get_inode_mode(inode);
if (!test_opt(sbi, POSIX_ACL))
return 0;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index ecba8da..0b4710c 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -69,7 +69,6 @@
goto repeat;
}
out:
- mark_page_accessed(page);
return page;
}
@@ -137,13 +136,11 @@
if (!page)
continue;
if (PageUptodate(page)) {
- mark_page_accessed(page);
f2fs_put_page(page, 1);
continue;
}
f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
- mark_page_accessed(page);
f2fs_put_page(page, 0);
}
out:
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 3358ae1..feb6ca2 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -12,6 +12,7 @@
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
+#include <linux/aio.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/blkdev.h>
@@ -25,40 +26,33 @@
static void f2fs_read_end_io(struct bio *bio, int err)
{
- const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+ struct bio_vec *bvec;
+ int i;
- do {
+ __bio_for_each_segment(bvec, bio, i, 0) {
struct page *page = bvec->bv_page;
- if (--bvec >= bio->bi_io_vec)
- prefetchw(&bvec->bv_page->flags);
-
- if (unlikely(!uptodate)) {
+ if (!err) {
+ SetPageUptodate(page);
+ } else {
ClearPageUptodate(page);
SetPageError(page);
- } else {
- SetPageUptodate(page);
}
unlock_page(page);
- } while (bvec >= bio->bi_io_vec);
-
+ }
bio_put(bio);
}
static void f2fs_write_end_io(struct bio *bio, int err)
{
- const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
struct f2fs_sb_info *sbi = bio->bi_private;
+ struct bio_vec *bvec;
+ int i;
- do {
+ __bio_for_each_segment(bvec, bio, i, 0) {
struct page *page = bvec->bv_page;
- if (--bvec >= bio->bi_io_vec)
- prefetchw(&bvec->bv_page->flags);
-
- if (unlikely(!uptodate)) {
+ if (unlikely(err)) {
SetPageError(page);
set_bit(AS_EIO, &page->mapping->flags);
set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
@@ -66,7 +60,7 @@
}
end_page_writeback(page);
dec_page_count(sbi, F2FS_WRITEBACK);
- } while (bvec >= bio->bi_io_vec);
+ }
if (sbi->wait_io) {
complete(sbi->wait_io);
@@ -141,7 +135,7 @@
io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
- mutex_lock(&io->io_mutex);
+ down_write(&io->io_rwsem);
/* change META to META_FLUSH in the checkpoint procedure */
if (type >= META_FLUSH) {
@@ -149,7 +143,7 @@
io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
}
__submit_merged_bio(io);
- mutex_unlock(&io->io_mutex);
+ up_write(&io->io_rwsem);
}
/*
@@ -187,7 +181,7 @@
verify_block_addr(sbi, blk_addr);
- mutex_lock(&io->io_mutex);
+ down_write(&io->io_rwsem);
if (!is_read)
inc_page_count(sbi, F2FS_WRITEBACK);
@@ -211,7 +205,7 @@
io->last_block_in_bio = blk_addr;
- mutex_unlock(&io->io_mutex);
+ up_write(&io->io_rwsem);
trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr);
}
@@ -921,6 +915,16 @@
return 0;
}
+static void f2fs_write_failed(struct address_space *mapping, loff_t to)
+{
+ struct inode *inode = mapping->host;
+
+ if (to > inode->i_size) {
+ truncate_pagecache(inode, 0, inode->i_size);
+ truncate_blocks(inode, inode->i_size);
+ }
+}
+
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -938,11 +942,13 @@
repeat:
err = f2fs_convert_inline_data(inode, pos + len);
if (err)
- return err;
+ goto fail;
page = grab_cache_page_write_begin(mapping, index, flags);
- if (!page)
- return -ENOMEM;
+ if (!page) {
+ err = -ENOMEM;
+ goto fail;
+ }
/* to avoid latency during memory pressure */
unlock_page(page);
@@ -956,10 +962,9 @@
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = f2fs_reserve_block(&dn, index);
f2fs_unlock_op(sbi);
-
if (err) {
f2fs_put_page(page, 0);
- return err;
+ goto fail;
}
inline_data:
lock_page(page);
@@ -989,19 +994,20 @@
err = f2fs_read_inline_data(inode, page);
if (err) {
page_cache_release(page);
- return err;
+ goto fail;
}
} else {
err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
READ_SYNC);
if (err)
- return err;
+ goto fail;
}
lock_page(page);
if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
- return -EIO;
+ err = -EIO;
+ goto fail;
}
if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1);
@@ -1012,6 +1018,9 @@
SetPageUptodate(page);
clear_cold_data(page);
return 0;
+fail:
+ f2fs_write_failed(mapping, pos + len);
+ return err;
}
static int f2fs_write_end(struct file *file,
@@ -1023,7 +1032,6 @@
trace_f2fs_write_end(inode, pos, len, copied);
- SetPageUptodate(page);
set_page_dirty(page);
if (pos + copied > i_size_read(inode)) {
@@ -1048,17 +1056,22 @@
if (offset & blocksize_mask)
return -EINVAL;
- for (i = 0; i < nr_segs; i++)
- if (iov[i].iov_len & blocksize_mask)
- return -EINVAL;
+ for (i = 0; i < nr_segs; i++)
+ if (iov[i].iov_len & blocksize_mask)
+ return -EINVAL;
+
return 0;
}
static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
- const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+ const struct iovec *iov, loff_t offset,
+ unsigned long nr_segs)
{
struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ size_t count = iov_length(iov, nr_segs);
+ int err;
/* Let buffer I/O handle the inline data case. */
if (f2fs_has_inline_data(inode))
@@ -1070,8 +1083,11 @@
/* clear fsync mark to recover these blocks */
fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
- return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+ err = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
get_data_block);
+ if (err < 0 && (rw & WRITE))
+ f2fs_write_failed(mapping, offset + count);
+ return err;
}
static void f2fs_invalidate_data_page(struct page *page, unsigned long offset)
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index b52c12c..3f99266 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -345,21 +345,14 @@
f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
if (!f2fs_debugfs_root)
- goto bail;
+ return;
file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
NULL, &stat_fops);
- if (!file)
- goto free_debugfs_dir;
-
- return;
-
-free_debugfs_dir:
- debugfs_remove(f2fs_debugfs_root);
-
-bail:
- f2fs_debugfs_root = NULL;
- return;
+ if (!file) {
+ debugfs_remove(f2fs_debugfs_root);
+ f2fs_debugfs_root = NULL;
+ }
}
void f2fs_destroy_root_stats(void)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index b84d381..f74cb98 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -61,7 +61,7 @@
static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
{
- mode_t mode = inode->i_mode;
+ umode_t mode = inode->i_mode;
de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
}
@@ -77,8 +77,8 @@
return bidx;
}
-static bool early_match_name(const char *name, size_t namelen,
- f2fs_hash_t namehash, struct f2fs_dir_entry *de)
+static bool early_match_name(size_t namelen, f2fs_hash_t namehash,
+ struct f2fs_dir_entry *de)
{
if (le16_to_cpu(de->name_len) != namelen)
return false;
@@ -90,7 +90,7 @@
}
static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
- const char *name, size_t namelen, int *max_slots,
+ struct qstr *name, int *max_slots,
f2fs_hash_t namehash, struct page **res_page)
{
struct f2fs_dir_entry *de;
@@ -109,9 +109,10 @@
continue;
}
de = &dentry_blk->dentry[bit_pos];
- if (early_match_name(name, namelen, namehash, de)) {
+ if (early_match_name(name->len, namehash, de)) {
if (!memcmp(dentry_blk->filename[bit_pos],
- name, namelen)) {
+ name->name,
+ name->len)) {
*res_page = dentry_page;
goto found;
}
@@ -120,6 +121,13 @@
*max_slots = max_len;
max_len = 0;
}
+
+ /*
+ * For the most part, it should be a bug when name_len is zero.
+ * We stop here for figuring out where the bugs are occurred.
+ */
+ f2fs_bug_on(!de->name_len);
+
bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
}
@@ -132,10 +140,10 @@
}
static struct f2fs_dir_entry *find_in_level(struct inode *dir,
- unsigned int level, const char *name, size_t namelen,
+ unsigned int level, struct qstr *name,
f2fs_hash_t namehash, struct page **res_page)
{
- int s = GET_DENTRY_SLOTS(namelen);
+ int s = GET_DENTRY_SLOTS(name->len);
unsigned int nbucket, nblock;
unsigned int bidx, end_block;
struct page *dentry_page;
@@ -160,8 +168,8 @@
continue;
}
- de = find_in_block(dentry_page, name, namelen,
- &max_slots, namehash, res_page);
+ de = find_in_block(dentry_page, name, &max_slots,
+ namehash, res_page);
if (de)
break;
@@ -187,8 +195,6 @@
struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
struct qstr *child, struct page **res_page)
{
- const char *name = child->name;
- size_t namelen = child->len;
unsigned long npages = dir_blocks(dir);
struct f2fs_dir_entry *de = NULL;
f2fs_hash_t name_hash;
@@ -200,12 +206,11 @@
*res_page = NULL;
- name_hash = f2fs_dentry_hash(name, namelen);
+ name_hash = f2fs_dentry_hash(child);
max_depth = F2FS_I(dir)->i_current_depth;
for (level = 0; level < max_depth; level++) {
- de = find_in_level(dir, level, name,
- namelen, name_hash, res_page);
+ de = find_in_level(dir, level, child, name_hash, res_page);
if (de)
break;
}
@@ -298,14 +303,13 @@
struct page *dentry_page;
struct f2fs_dentry_block *dentry_blk;
struct f2fs_dir_entry *de;
- void *kaddr;
dentry_page = get_new_data_page(inode, page, 0, true);
if (IS_ERR(dentry_page))
return PTR_ERR(dentry_page);
- kaddr = kmap_atomic(dentry_page);
- dentry_blk = (struct f2fs_dentry_block *)kaddr;
+
+ dentry_blk = kmap_atomic(dentry_page);
de = &dentry_blk->dentry[0];
de->name_len = cpu_to_le16(1);
@@ -323,7 +327,7 @@
test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);
test_and_set_bit_le(1, &dentry_blk->dentry_bitmap);
- kunmap_atomic(kaddr);
+ kunmap_atomic(dentry_blk);
set_page_dirty(dentry_page);
f2fs_put_page(dentry_page, 1);
@@ -333,11 +337,12 @@
static struct page *init_inode_metadata(struct inode *inode,
struct inode *dir, const struct qstr *name)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct page *page;
int err;
if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
- page = new_inode_page(inode, name);
+ page = new_inode_page(inode);
if (IS_ERR(page))
return page;
@@ -362,7 +367,8 @@
set_cold_node(inode, page);
}
- init_dent_inode(name, page);
+ if (name)
+ init_dent_inode(name, page);
/*
* This file should be checkpointed during fsync.
@@ -370,6 +376,12 @@
*/
if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
file_lost_pino(inode);
+ /*
+ * If link the tmpfile to alias through linkat path,
+ * we should remove this inode from orphan list.
+ */
+ if (inode->i_nlink == 0)
+ remove_orphan_inode(sbi, inode->i_ino);
inc_nlink(inode);
}
return page;
@@ -453,7 +465,7 @@
int err = 0;
int i;
- dentry_hash = f2fs_dentry_hash(name->name, name->len);
+ dentry_hash = f2fs_dentry_hash(name);
level = 0;
current_depth = F2FS_I(dir)->i_current_depth;
if (F2FS_I(dir)->chash == dentry_hash) {
@@ -529,6 +541,27 @@
return err;
}
+int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
+{
+ struct page *page;
+ int err = 0;
+
+ down_write(&F2FS_I(inode)->i_sem);
+ page = init_inode_metadata(inode, dir, NULL);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto fail;
+ }
+ /* we don't need to mark_inode_dirty now */
+ update_inode(inode, page);
+ f2fs_put_page(page, 1);
+
+ clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
+fail:
+ up_write(&F2FS_I(inode)->i_sem);
+ return err;
+}
+
/*
* It only removes the dentry from the dentry page,corresponding name
* entry in name page does not need to be touched during deletion.
@@ -541,14 +574,13 @@
struct address_space *mapping = page->mapping;
struct inode *dir = mapping->host;
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
- void *kaddr = page_address(page);
int i;
lock_page(page);
f2fs_wait_on_page_writeback(page, DATA);
- dentry_blk = (struct f2fs_dentry_block *)kaddr;
- bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry;
+ dentry_blk = page_address(page);
+ bit_pos = dentry - dentry_blk->dentry;
for (i = 0; i < slots; i++)
test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
@@ -603,7 +635,6 @@
unsigned long nblock = dir_blocks(dir);
for (bidx = 0; bidx < nblock; bidx++) {
- void *kaddr;
dentry_page = get_lock_data_page(dir, bidx);
if (IS_ERR(dentry_page)) {
if (PTR_ERR(dentry_page) == -ENOENT)
@@ -612,8 +643,8 @@
return false;
}
- kaddr = kmap_atomic(dentry_page);
- dentry_blk = (struct f2fs_dentry_block *)kaddr;
+
+ dentry_blk = kmap_atomic(dentry_page);
if (bidx == 0)
bit_pos = 2;
else
@@ -621,7 +652,7 @@
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
NR_DENTRY_IN_BLOCK,
bit_pos);
- kunmap_atomic(kaddr);
+ kunmap_atomic(dentry_blk);
f2fs_put_page(dentry_page, 1);
@@ -634,11 +665,11 @@
static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir)
{
unsigned long pos = file->f_pos;
- struct inode *inode = file->f_dentry->d_inode;
- unsigned long npages = dir_blocks(inode);
unsigned char *types = NULL;
unsigned int bit_pos = 0, start_bit_pos = 0;
int over = 0;
+ struct inode *inode = file_inode(file);
+ unsigned long npages = dir_blocks(inode);
struct f2fs_dentry_block *dentry_blk = NULL;
struct f2fs_dir_entry *de = NULL;
struct page *dentry_page = NULL;
@@ -682,7 +713,7 @@
le32_to_cpu(de->ino), d_type);
if (over) {
file->f_pos += bit_pos - start_bit_pos;
- goto success;
+ goto stop;
}
slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
bit_pos += slots;
@@ -693,7 +724,7 @@
f2fs_put_page(dentry_page, 1);
dentry_page = NULL;
}
-success:
+stop:
if (dentry_page && !IS_ERR(dentry_page)) {
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 14afbe7..c791143 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -22,7 +22,7 @@
#ifdef CONFIG_F2FS_CHECK_FS
#define f2fs_bug_on(condition) BUG_ON(condition)
-#define f2fs_down_write(x, y) down_write_nest_lock(x, y)
+#define f2fs_down_write(x, y) down_write(x)
#else
#define f2fs_bug_on(condition)
#define f2fs_down_write(x, y) down_write(x)
@@ -31,6 +31,7 @@
/*
* For mount options
*/
+#define F2FS_SUPER_MAGIC 0xF2F52010 /* F2FS Magic Number */
#define F2FS_MOUNT_BG_GC 0x00000001
#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002
#define F2FS_MOUNT_DISCARD 0x00000004
@@ -40,6 +41,7 @@
#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
#define F2FS_MOUNT_INLINE_XATTR 0x00000080
#define F2FS_MOUNT_INLINE_DATA 0x00000100
+#define F2FS_MOUNT_FLUSH_MERGE 0x00000200
#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -190,7 +192,7 @@
rwlock_t ext_lock; /* rwlock for consistency */
unsigned int fofs; /* start offset in a file */
u32 blk_addr; /* start block address of the extent */
- unsigned int len; /* lenth of the extent */
+ unsigned int len; /* length of the extent */
};
/*
@@ -255,6 +257,8 @@
unsigned int nat_cnt; /* the # of cached nat entries */
struct list_head nat_entries; /* cached nat entry list (clean) */
struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
+ struct list_head nat_entry_set; /* nat entry set list */
+ unsigned int dirty_nat_cnt; /* total num of nat entries in set */
/* free node ids management */
struct radix_tree_root free_nid_root;/* root of the free_nid cache */
@@ -320,6 +324,21 @@
NO_CHECK_TYPE
};
+struct flush_cmd {
+ struct flush_cmd *next;
+ struct completion wait;
+ int ret;
+};
+
+struct flush_cmd_control {
+ struct task_struct *f2fs_issue_flush; /* flush thread */
+ wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
+ struct flush_cmd *issue_list; /* list for command issue */
+ struct flush_cmd *dispatch_list; /* list for command dispatch */
+ spinlock_t issue_lock; /* for issue list lock */
+ struct flush_cmd *issue_tail; /* list tail of issue list */
+};
+
struct f2fs_sm_info {
struct sit_info *sit_info; /* whole segment information */
struct free_segmap_info *free_info; /* free segment information */
@@ -345,6 +364,10 @@
unsigned int ipu_policy; /* in-place-update policy */
unsigned int min_ipu_util; /* in-place-update threshold */
+
+ /* for flush command control */
+ struct flush_cmd_control *cmd_control_info;
+
};
/*
@@ -395,7 +418,7 @@
struct bio *bio; /* bios to merge */
sector_t last_block_in_bio; /* last block number */
struct f2fs_io_info fio; /* store buffered io info. */
- struct mutex io_mutex; /* mutex for bio */
+ struct rw_semaphore io_rwsem; /* blocking op for bio */
};
struct f2fs_sb_info {
@@ -1058,6 +1081,27 @@
return sb->s_flags & MS_RDONLY;
}
+static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
+{
+ set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
+ sbi->sb->s_flags |= MS_RDONLY;
+}
+
+static inline struct inode *file_inode(struct file *f)
+{
+ return f->f_path.dentry->d_inode;
+}
+
+#define get_inode_mode(i) \
+ ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
+ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
+
+/* get offset of first page in next direct node */
+#define PGOFS_OF_NEXT_DNODE(pgofs, fi) \
+ ((pgofs < ADDRS_PER_INODE(fi)) ? ADDRS_PER_INODE(fi) : \
+ (pgofs - ADDRS_PER_INODE(fi) + ADDRS_PER_BLOCK) / \
+ ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi))
+
/*
* file.c
*/
@@ -1100,6 +1144,7 @@
int update_dent_inode(struct inode *, const struct qstr *);
int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *);
void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *);
+int f2fs_do_tmpfile(struct inode *, struct inode *);
int f2fs_make_empty(struct inode *, struct inode *);
bool f2fs_empty_dir(struct inode *);
@@ -1119,7 +1164,7 @@
/*
* hash.c
*/
-f2fs_hash_t f2fs_dentry_hash(const char *, size_t);
+f2fs_hash_t f2fs_dentry_hash(const struct qstr *);
/*
* node.c
@@ -1137,7 +1182,7 @@
int truncate_xattr_node(struct inode *, struct page *);
int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
void remove_inode_page(struct inode *);
-struct page *new_inode_page(struct inode *, const struct qstr *);
+struct page *new_inode_page(struct inode *);
struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
void ra_node_page(struct f2fs_sb_info *, nid_t);
struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
@@ -1164,6 +1209,9 @@
*/
void f2fs_balance_fs(struct f2fs_sb_info *);
void f2fs_balance_fs_bg(struct f2fs_sb_info *);
+int f2fs_issue_flush(struct f2fs_sb_info *);
+int create_flush_cmd_control(struct f2fs_sb_info *);
+void destroy_flush_cmd_control(struct f2fs_sb_info *);
void invalidate_blocks(struct f2fs_sb_info *, block_t);
void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
void clear_prefree_segments(struct f2fs_sb_info *);
@@ -1256,7 +1304,6 @@
struct f2fs_stat_info {
struct list_head stat_list;
struct f2fs_sb_info *sbi;
- struct mutex stat_lock;
int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
int main_area_segs, main_area_sections, main_area_zones;
int hit_ext, total_ext;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 0e147b6c..0e88a75 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -19,6 +19,7 @@
#include <linux/compat.h>
#include <linux/uaccess.h>
#include <linux/mount.h>
+#include <linux/pagevec.h>
#include "f2fs.h"
#include "node.h"
@@ -31,22 +32,13 @@
struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(vma->vm_file);
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct dnode_of_data dn;
int err;
f2fs_balance_fs(sbi);
- /* F2FS backport: We replace in old kernels sb_start_pagefault(inode->i_sb) with vfs_check_frozen()
- * and remove the original sb_end_pagefault(inode->i_sb) after the out label
- *
- * The introduction of sb_{start,end}_pagefault() was made post-3.2 kernels by Jan Kara
- * and merged in commit a0e881b7c189fa2bd76c024dbff91e79511c971d.
- * Discussed at https://lkml.org/lkml/2012/3/5/278
- *
- * - Alex
- */
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
/* block allocation */
@@ -91,8 +83,8 @@
}
static const struct vm_operations_struct f2fs_file_vm_ops = {
- .fault = filemap_fault,
- .page_mkwrite = f2fs_vm_page_mkwrite,
+ .fault = filemap_fault,
+ .page_mkwrite = f2fs_vm_page_mkwrite,
};
static int get_parent_ino(struct inode *inode, nid_t *pino)
@@ -193,18 +185,161 @@
ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
if (ret)
goto out;
- ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+ ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
}
out:
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
return ret;
}
+static pgoff_t __get_first_dirty_index(struct address_space *mapping,
+ pgoff_t pgofs, int whence)
+{
+ struct pagevec pvec;
+ int nr_pages;
+
+ if (whence != SEEK_DATA)
+ return 0;
+
+ /* find first dirty page index */
+ pagevec_init(&pvec, 0);
+ nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1);
+ pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX;
+ pagevec_release(&pvec);
+ return pgofs;
+}
+
+static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
+ int whence)
+{
+ switch (whence) {
+ case SEEK_DATA:
+ if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
+ (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
+ return true;
+ break;
+ case SEEK_HOLE:
+ if (blkaddr == NULL_ADDR)
+ return true;
+ break;
+ }
+ return false;
+}
+
+static inline int unsigned_offsets(struct file *file)
+{
+ return file->f_mode & FMODE_UNSIGNED_OFFSET;
+}
+
+static loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
+{
+ if (offset < 0 && !unsigned_offsets(file))
+ return -EINVAL;
+ if (offset > maxsize)
+ return -EINVAL;
+
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ file->f_version = 0;
+ }
+ return offset;
+}
+
+static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
+{
+ struct inode *inode = file->f_mapping->host;
+ loff_t maxbytes = inode->i_sb->s_maxbytes;
+ struct dnode_of_data dn;
+ pgoff_t pgofs, end_offset, dirty;
+ loff_t data_ofs = offset;
+ loff_t isize;
+ int err = 0;
+
+ mutex_lock(&inode->i_mutex);
+
+ isize = i_size_read(inode);
+ if (offset >= isize)
+ goto fail;
+
+ /* handle inline data case */
+ if (f2fs_has_inline_data(inode)) {
+ if (whence == SEEK_HOLE)
+ data_ofs = isize;
+ goto found;
+ }
+
+ pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT);
+
+ dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence);
+
+ for (; data_ofs < isize; data_ofs = pgofs << PAGE_CACHE_SHIFT) {
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
+ if (err && err != -ENOENT) {
+ goto fail;
+ } else if (err == -ENOENT) {
+ /* direct node is not exist */
+ if (whence == SEEK_DATA) {
+ pgofs = PGOFS_OF_NEXT_DNODE(pgofs,
+ F2FS_I(inode));
+ continue;
+ } else {
+ goto found;
+ }
+ }
+
+ end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+
+ /* find data/hole in dnode block */
+ for (; dn.ofs_in_node < end_offset;
+ dn.ofs_in_node++, pgofs++,
+ data_ofs = pgofs << PAGE_CACHE_SHIFT) {
+ block_t blkaddr;
+ blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+
+ if (__found_offset(blkaddr, dirty, pgofs, whence)) {
+ f2fs_put_dnode(&dn);
+ goto found;
+ }
+ }
+ f2fs_put_dnode(&dn);
+ }
+
+ if (whence == SEEK_DATA)
+ goto fail;
+found:
+ if (whence == SEEK_HOLE && data_ofs > isize)
+ data_ofs = isize;
+ mutex_unlock(&inode->i_mutex);
+ return vfs_setpos(file, data_ofs, maxbytes);
+fail:
+ mutex_unlock(&inode->i_mutex);
+ return -ENXIO;
+}
+
+static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct inode *inode = file->f_mapping->host;
+ loff_t maxbytes = inode->i_sb->s_maxbytes;
+
+ switch (whence) {
+ case SEEK_SET:
+ case SEEK_CUR:
+ case SEEK_END:
+ return generic_file_llseek_size(file, offset, whence,
+ maxbytes);
+ case SEEK_DATA:
+ case SEEK_HOLE:
+ return f2fs_seek_block(file, offset, whence);
+ }
+
+ return -EINVAL;
+}
+
static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
file_accessed(file);
vma->vm_ops = &f2fs_file_vm_ops;
- vma->vm_flags |= VM_CAN_NONLINEAR;
return 0;
}
@@ -260,13 +395,15 @@
return;
lock_page(page);
- if (unlikely(page->mapping != inode->i_mapping)) {
- f2fs_put_page(page, 1);
- return;
- }
+ if (unlikely(!PageUptodate(page) ||
+ page->mapping != inode->i_mapping))
+ goto out;
+
f2fs_wait_on_page_writeback(page, DATA);
zero_user(page, offset, PAGE_CACHE_SIZE - offset);
set_page_dirty(page);
+
+out:
f2fs_put_page(page, 1);
}
@@ -575,7 +712,7 @@
static long f2fs_fallocate(struct file *file, int mode,
loff_t offset, loff_t len)
{
- struct inode *inode = file->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(file);
long ret;
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@ -614,7 +751,7 @@
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
- struct inode *inode = filp->f_dentry->d_inode;
+ struct inode *inode = file_inode(filp);
struct f2fs_inode_info *fi = F2FS_I(inode);
unsigned int flags;
int ret;
@@ -627,7 +764,7 @@
{
unsigned int oldflags;
- ret = mnt_want_write(filp->f_path.mnt);
+ ret = mnt_want_write_file(filp);
if (ret)
return ret;
@@ -664,7 +801,7 @@
inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
out:
- mnt_drop_write(filp->f_path.mnt);
+ mnt_drop_write_file(filp);
return ret;
}
default:
@@ -690,7 +827,7 @@
#endif
const struct file_operations f2fs_file_operations = {
- .llseek = generic_file_llseek,
+ .llseek = f2fs_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 115de74..47c51ed 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -45,6 +45,11 @@
if (kthread_should_stop())
break;
+ if (sbi->sb->s_frozen >= SB_FREEZE_WRITE) {
+ wait_ms = increase_sleep_time(gc_th, wait_ms);
+ continue;
+ }
+
/*
* [GC triggering condition]
* 0. GC is not conducted currently.
@@ -246,7 +251,7 @@
}
/*
- * This function is called from two pathes.
+ * This function is called from two paths.
* One is garbage collection and the other is SSR segment selection.
* When it is called during GC, it just gets a victim segment
* and it does not remove it from dirty seglist.
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index 6eb8d26..948d17b 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -69,12 +69,14 @@
*buf++ = pad;
}
-f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len)
+f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)
{
__u32 hash;
f2fs_hash_t f2fs_hash;
const char *p;
__u32 in[8], buf[4];
+ const char *name = name_info->name;
+ size_t len = name_info->len;
if ((len <= 2) && (name[0] == '.') &&
(name[1] == '.' || name[1] == '\0'))
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index eb48128..679ee78 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -12,7 +12,6 @@
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
-#include <linux/bitops.h>
#include "f2fs.h"
#include "node.h"
@@ -22,20 +21,20 @@
void f2fs_set_inode_flags(struct inode *inode)
{
unsigned int flags = F2FS_I(inode)->i_flags;
- unsigned int new_fl = 0;
+
+ inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE |
+ S_NOATIME | S_DIRSYNC);
if (flags & FS_SYNC_FL)
- new_fl |= S_SYNC;
+ inode->i_flags |= S_SYNC;
if (flags & FS_APPEND_FL)
- new_fl |= S_APPEND;
+ inode->i_flags |= S_APPEND;
if (flags & FS_IMMUTABLE_FL)
- new_fl |= S_IMMUTABLE;
+ inode->i_flags |= S_IMMUTABLE;
if (flags & FS_NOATIME_FL)
- new_fl |= S_NOATIME;
+ inode->i_flags |= S_NOATIME;
if (flags & FS_DIRSYNC_FL)
- new_fl |= S_DIRSYNC;
- set_mask_bits(&inode->i_flags,
- S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl);
+ inode->i_flags |= S_DIRSYNC;
}
static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -289,4 +288,5 @@
no_delete:
end_writeback(inode);
+ invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index ea38fc5..cb9abf9 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -13,6 +13,7 @@
#include <linux/pagemap.h>
#include <linux/sched.h>
#include <linux/ctype.h>
+#include <linux/dcache.h>
#include "f2fs.h"
#include "node.h"
@@ -22,14 +23,13 @@
static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
{
- struct super_block *sb = dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
nid_t ino;
struct inode *inode;
bool nid_free = false;
int err;
- inode = new_inode(sb);
+ inode = new_inode(dir->i_sb);
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -41,18 +41,9 @@
}
f2fs_unlock_op(sbi);
- inode->i_uid = current_fsuid();
-
- if (dir->i_mode & S_ISGID) {
- inode->i_gid = dir->i_gid;
- if (S_ISDIR(mode))
- mode |= S_ISGID;
- } else {
- inode->i_gid = current_fsgid();
- }
+ inode_init_owner(inode, dir, mode);
inode->i_ino = ino;
- inode->i_mode = mode;
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_generation = sbi->s_next_generation++;
@@ -109,10 +100,9 @@
}
static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- struct nameidata *nd)
+ struct nameidata *nd)
{
- struct super_block *sb = dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct inode *inode;
nid_t ino = 0;
int err;
@@ -155,8 +145,7 @@
struct dentry *dentry)
{
struct inode *inode = old_dentry->d_inode;
- struct super_block *sb = dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
int err;
f2fs_balance_fs(sbi);
@@ -181,7 +170,7 @@
struct dentry *f2fs_get_parent(struct dentry *child)
{
- struct qstr dotdot = { .name = "..", .len = 2 };
+ struct qstr dotdot = {.len = 2, .name = ".."};
unsigned long ino = f2fs_inode_by_name(child->d_inode, &dotdot);
if (!ino)
return ERR_PTR(-ENOENT);
@@ -189,7 +178,7 @@
}
static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
- struct nameidata *nd)
+ struct nameidata *nd)
{
struct inode *inode = NULL;
struct f2fs_dir_entry *de;
@@ -216,8 +205,7 @@
static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
{
- struct super_block *sb = dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct inode *inode = dentry->d_inode;
struct f2fs_dir_entry *de;
struct page *page;
@@ -251,8 +239,7 @@
static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
const char *symname)
{
- struct super_block *sb = dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct inode *inode;
size_t symlen = strlen(symname) + 1;
int err;
@@ -339,8 +326,7 @@
static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
umode_t mode, dev_t rdev)
{
- struct super_block *sb = dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct inode *inode;
int err = 0;
@@ -378,8 +364,7 @@
static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
- struct super_block *sb = old_dir->i_sb;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(old_dir->i_sb);
struct inode *old_inode = old_dentry->d_inode;
struct inode *new_inode = new_dentry->d_inode;
struct page *old_dir_page;
@@ -402,8 +387,6 @@
goto out_old;
}
- f2fs_lock_op(sbi);
-
if (new_inode) {
err = -ENOTEMPTY;
@@ -416,6 +399,8 @@
if (!new_entry)
goto out_dir;
+ f2fs_lock_op(sbi);
+
err = acquire_orphan_inode(sbi);
if (err)
goto put_out_dir;
@@ -444,9 +429,13 @@
update_inode_page(old_inode);
update_inode_page(new_inode);
} else {
+ f2fs_lock_op(sbi);
+
err = f2fs_add_link(new_dentry, old_inode);
- if (err)
+ if (err) {
+ f2fs_unlock_op(sbi);
goto out_dir;
+ }
if (old_dir_entry) {
inc_nlink(new_dir);
@@ -481,6 +470,7 @@
return 0;
put_out_dir:
+ f2fs_unlock_op(sbi);
kunmap(new_page);
f2fs_put_page(new_page, 0);
out_dir:
@@ -488,7 +478,6 @@
kunmap(old_dir_page);
f2fs_put_page(old_dir_page, 0);
}
- f2fs_unlock_op(sbi);
out_old:
kunmap(old_page);
f2fs_put_page(old_page, 0);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index a494e72..fa1e0c9 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -25,6 +25,7 @@
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
+static struct kmem_cache *nat_entry_set_slab;
bool available_free_memory(struct f2fs_sb_info *sbi, int type)
{
@@ -90,12 +91,8 @@
/* get current nat block page with lock */
src_page = get_meta_page(sbi, src_off);
-
- /* Dirty src_page means that it is already the new target NAT page. */
- if (PageDirty(src_page))
- return src_page;
-
dst_page = grab_meta_page(sbi, dst_off);
+ f2fs_bug_on(PageDirty(src_page));
src_addr = page_address(src_page);
dst_addr = page_address(dst_page);
@@ -845,7 +842,7 @@
truncate_node(&dn);
}
-struct page *new_inode_page(struct inode *inode, const struct qstr *name)
+struct page *new_inode_page(struct inode *inode)
{
struct dnode_of_data dn;
@@ -984,7 +981,6 @@
goto repeat;
}
got_it:
- mark_page_accessed(page);
return page;
}
@@ -1039,7 +1035,6 @@
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
- mark_page_accessed(page);
return page;
}
@@ -1745,7 +1740,90 @@
return err;
}
-static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
+static struct nat_entry_set *grab_nat_entry_set(void)
+{
+ struct nat_entry_set *nes =
+ f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
+
+ nes->entry_cnt = 0;
+ INIT_LIST_HEAD(&nes->set_list);
+ INIT_LIST_HEAD(&nes->entry_list);
+ return nes;
+}
+
+static void release_nat_entry_set(struct nat_entry_set *nes,
+ struct f2fs_nm_info *nm_i)
+{
+ f2fs_bug_on(!list_empty(&nes->entry_list));
+
+ nm_i->dirty_nat_cnt -= nes->entry_cnt;
+ list_del(&nes->set_list);
+ kmem_cache_free(nat_entry_set_slab, nes);
+}
+
+static void adjust_nat_entry_set(struct nat_entry_set *nes,
+ struct list_head *head)
+{
+ struct nat_entry_set *next = nes;
+
+ if (list_is_last(&nes->set_list, head))
+ return;
+
+ list_for_each_entry_continue(next, head, set_list)
+ if (nes->entry_cnt <= next->entry_cnt)
+ break;
+
+ list_move_tail(&nes->set_list, &next->set_list);
+}
+
+static void add_nat_entry(struct nat_entry *ne, struct list_head *head)
+{
+ struct nat_entry_set *nes;
+ nid_t start_nid = START_NID(ne->ni.nid);
+
+ list_for_each_entry(nes, head, set_list) {
+ if (nes->start_nid == start_nid) {
+ list_move_tail(&ne->list, &nes->entry_list);
+ nes->entry_cnt++;
+ adjust_nat_entry_set(nes, head);
+ return;
+ }
+ }
+
+ nes = grab_nat_entry_set();
+
+ nes->start_nid = start_nid;
+ list_move_tail(&ne->list, &nes->entry_list);
+ nes->entry_cnt++;
+ list_add(&nes->set_list, head);
+}
+
+static void merge_nats_in_set(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct list_head *dirty_list = &nm_i->dirty_nat_entries;
+ struct list_head *set_list = &nm_i->nat_entry_set;
+ struct nat_entry *ne, *tmp;
+
+ write_lock(&nm_i->nat_tree_lock);
+ list_for_each_entry_safe(ne, tmp, dirty_list, list) {
+ if (nat_get_blkaddr(ne) == NEW_ADDR)
+ continue;
+ add_nat_entry(ne, set_list);
+ nm_i->dirty_nat_cnt++;
+ }
+ write_unlock(&nm_i->nat_tree_lock);
+}
+
+static bool __has_cursum_space(struct f2fs_summary_block *sum, int size)
+{
+ if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES)
+ return true;
+ else
+ return false;
+}
+
+static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1753,12 +1831,6 @@
int i;
mutex_lock(&curseg->curseg_mutex);
-
- if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) {
- mutex_unlock(&curseg->curseg_mutex);
- return false;
- }
-
for (i = 0; i < nats_in_cursum(sum); i++) {
struct nat_entry *ne;
struct f2fs_nat_entry raw_ne;
@@ -1768,23 +1840,21 @@
retry:
write_lock(&nm_i->nat_tree_lock);
ne = __lookup_nat_cache(nm_i, nid);
- if (ne) {
- __set_nat_cache_dirty(nm_i, ne);
- write_unlock(&nm_i->nat_tree_lock);
- continue;
- }
+ if (ne)
+ goto found;
+
ne = grab_nat_entry(nm_i, nid);
if (!ne) {
write_unlock(&nm_i->nat_tree_lock);
goto retry;
}
node_info_from_raw_nat(&ne->ni, &raw_ne);
+found:
__set_nat_cache_dirty(nm_i, ne);
write_unlock(&nm_i->nat_tree_lock);
}
update_nats_in_cursum(sum, -i);
mutex_unlock(&curseg->curseg_mutex);
- return true;
}
/*
@@ -1795,80 +1865,91 @@
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_summary_block *sum = curseg->sum_blk;
- struct nat_entry *ne, *cur;
- struct page *page = NULL;
- struct f2fs_nat_block *nat_blk = NULL;
- nid_t start_nid = 0, end_nid = 0;
- bool flushed;
+ struct nat_entry_set *nes, *tmp;
+ struct list_head *head = &nm_i->nat_entry_set;
+ bool to_journal = true;
- flushed = flush_nats_in_journal(sbi);
+ /* merge nat entries of dirty list to nat entry set temporarily */
+ merge_nats_in_set(sbi);
- if (!flushed)
- mutex_lock(&curseg->curseg_mutex);
+ /*
+ * if there are no enough space in journal to store dirty nat
+ * entries, remove all entries from journal and merge them
+ * into nat entry set.
+ */
+ if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) {
+ remove_nats_in_journal(sbi);
- /* 1) flush dirty nat caches */
- list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {
- nid_t nid;
- struct f2fs_nat_entry raw_ne;
- int offset = -1;
+ /*
+ * merge nat entries of dirty list to nat entry set temporarily
+ */
+ merge_nats_in_set(sbi);
+ }
- if (nat_get_blkaddr(ne) == NEW_ADDR)
- continue;
+ if (!nm_i->dirty_nat_cnt)
+ return;
- nid = nat_get_nid(ne);
+ /*
+ * there are two steps to flush nat entries:
+ * #1, flush nat entries to journal in current hot data summary block.
+ * #2, flush nat entries to nat page.
+ */
+ list_for_each_entry_safe(nes, tmp, head, set_list) {
+ struct f2fs_nat_block *nat_blk;
+ struct nat_entry *ne, *cur;
+ struct page *page;
+ nid_t start_nid = nes->start_nid;
- if (flushed)
- goto to_nat_page;
+ if (to_journal && !__has_cursum_space(sum, nes->entry_cnt))
+ to_journal = false;
- /* if there is room for nat enries in curseg->sumpage */
- offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1);
- if (offset >= 0) {
- raw_ne = nat_in_journal(sum, offset);
- goto flush_now;
- }
-to_nat_page:
- if (!page || (start_nid > nid || nid > end_nid)) {
- if (page) {
- f2fs_put_page(page, 1);
- page = NULL;
- }
- start_nid = START_NID(nid);
- end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1;
-
- /*
- * get nat block with dirty flag, increased reference
- * count, mapped and lock
- */
+ if (to_journal) {
+ mutex_lock(&curseg->curseg_mutex);
+ } else {
page = get_next_nat_page(sbi, start_nid);
nat_blk = page_address(page);
+ f2fs_bug_on(!nat_blk);
}
- f2fs_bug_on(!nat_blk);
- raw_ne = nat_blk->entries[nid - start_nid];
-flush_now:
- raw_nat_from_node_info(&raw_ne, &ne->ni);
+ /* flush dirty nats in nat entry set */
+ list_for_each_entry_safe(ne, cur, &nes->entry_list, list) {
+ struct f2fs_nat_entry *raw_ne;
+ nid_t nid = nat_get_nid(ne);
+ int offset;
- if (offset < 0) {
- nat_blk->entries[nid - start_nid] = raw_ne;
- } else {
- nat_in_journal(sum, offset) = raw_ne;
- nid_in_journal(sum, offset) = cpu_to_le32(nid);
- }
+ if (to_journal) {
+ offset = lookup_journal_in_cursum(sum,
+ NAT_JOURNAL, nid, 1);
+ f2fs_bug_on(offset < 0);
+ raw_ne = &nat_in_journal(sum, offset);
+ nid_in_journal(sum, offset) = cpu_to_le32(nid);
+ } else {
+ raw_ne = &nat_blk->entries[nid - start_nid];
+ }
+ raw_nat_from_node_info(raw_ne, &ne->ni);
- if (nat_get_blkaddr(ne) == NULL_ADDR &&
+ if (nat_get_blkaddr(ne) == NULL_ADDR &&
add_free_nid(sbi, nid, false) <= 0) {
- write_lock(&nm_i->nat_tree_lock);
- __del_from_nat_cache(nm_i, ne);
- write_unlock(&nm_i->nat_tree_lock);
- } else {
- write_lock(&nm_i->nat_tree_lock);
- __clear_nat_cache_dirty(nm_i, ne);
- write_unlock(&nm_i->nat_tree_lock);
+ write_lock(&nm_i->nat_tree_lock);
+ __del_from_nat_cache(nm_i, ne);
+ write_unlock(&nm_i->nat_tree_lock);
+ } else {
+ write_lock(&nm_i->nat_tree_lock);
+ __clear_nat_cache_dirty(nm_i, ne);
+ write_unlock(&nm_i->nat_tree_lock);
+ }
}
+
+ if (to_journal)
+ mutex_unlock(&curseg->curseg_mutex);
+ else
+ f2fs_put_page(page, 1);
+
+ release_nat_entry_set(nes, nm_i);
}
- if (!flushed)
- mutex_unlock(&curseg->curseg_mutex);
- f2fs_put_page(page, 1);
+
+ f2fs_bug_on(!list_empty(head));
+ f2fs_bug_on(nm_i->dirty_nat_cnt);
}
static int init_node_manager(struct f2fs_sb_info *sbi)
@@ -1897,6 +1978,7 @@
INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->nat_entries);
INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
+ INIT_LIST_HEAD(&nm_i->nat_entry_set);
mutex_init(&nm_i->build_lock);
spin_lock_init(&nm_i->free_nid_list_lock);
@@ -1977,19 +2059,30 @@
nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
sizeof(struct nat_entry));
if (!nat_entry_slab)
- return -ENOMEM;
+ goto fail;
free_nid_slab = f2fs_kmem_cache_create("free_nid",
sizeof(struct free_nid));
- if (!free_nid_slab) {
- kmem_cache_destroy(nat_entry_slab);
- return -ENOMEM;
- }
+ if (!free_nid_slab)
+ goto destory_nat_entry;
+
+ nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
+ sizeof(struct nat_entry_set));
+ if (!nat_entry_set_slab)
+ goto destory_free_nid;
return 0;
+
+destory_free_nid:
+ kmem_cache_destroy(free_nid_slab);
+destory_nat_entry:
+ kmem_cache_destroy(nat_entry_slab);
+fail:
+ return -ENOMEM;
}
void destroy_node_manager_caches(void)
{
+ kmem_cache_destroy(nat_entry_set_slab);
kmem_cache_destroy(free_nid_slab);
kmem_cache_destroy(nat_entry_slab);
}
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 7281112..8a116a4 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -89,6 +89,13 @@
DIRTY_DENTS /* indicates dirty dentry pages */
};
+struct nat_entry_set {
+ struct list_head set_list; /* link with all nat sets */
+ struct list_head entry_list; /* link with dirty nat entries */
+ nid_t start_nid; /* start nid of nats in set */
+ unsigned int entry_cnt; /* the # of nat entries in set */
+};
+
/*
* For free nid mangement
*/
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 3110c29..3461bc1 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -13,6 +13,7 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/prefetch.h>
+#include <linux/kthread.h>
#include <linux/vmalloc.h>
#include <linux/swap.h>
@@ -195,6 +196,132 @@
f2fs_sync_fs(sbi->sb, true);
}
+struct __submit_bio_ret {
+ struct completion event;
+ int error;
+};
+
+static void __submit_bio_wait_endio(struct bio *bio, int error)
+{
+ struct __submit_bio_ret *ret = bio->bi_private;
+
+ ret->error = error;
+ complete(&ret->event);
+}
+
+static int __submit_bio_wait(int rw, struct bio *bio)
+{
+ struct __submit_bio_ret ret;
+
+ rw |= REQ_SYNC;
+ init_completion(&ret.event);
+ bio->bi_private = &ret;
+ bio->bi_end_io = __submit_bio_wait_endio;
+ submit_bio(rw, bio);
+ wait_for_completion(&ret.event);
+
+ return ret.error;
+}
+
+static int issue_flush_thread(void *data)
+{
+ struct f2fs_sb_info *sbi = data;
+ struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+ wait_queue_head_t *q = &fcc->flush_wait_queue;
+repeat:
+ if (kthread_should_stop())
+ return 0;
+
+ spin_lock(&fcc->issue_lock);
+ if (fcc->issue_list) {
+ fcc->dispatch_list = fcc->issue_list;
+ fcc->issue_list = fcc->issue_tail = NULL;
+ }
+ spin_unlock(&fcc->issue_lock);
+
+ if (fcc->dispatch_list) {
+ struct bio *bio = bio_alloc(GFP_NOIO, 0);
+ struct flush_cmd *cmd, *next;
+ int ret;
+
+ bio->bi_bdev = sbi->sb->s_bdev;
+ ret = __submit_bio_wait(WRITE_FLUSH, bio);
+
+ for (cmd = fcc->dispatch_list; cmd; cmd = next) {
+ cmd->ret = ret;
+ next = cmd->next;
+ complete(&cmd->wait);
+ }
+ bio_put(bio);
+ fcc->dispatch_list = NULL;
+ }
+
+ wait_event_interruptible(*q,
+ kthread_should_stop() || fcc->issue_list);
+ goto repeat;
+}
+
+int f2fs_issue_flush(struct f2fs_sb_info *sbi)
+{
+ struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+ struct flush_cmd cmd;
+
+ if (!test_opt(sbi, FLUSH_MERGE))
+ return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
+
+ init_completion(&cmd.wait);
+ cmd.next = NULL;
+
+ spin_lock(&fcc->issue_lock);
+ if (fcc->issue_list)
+ fcc->issue_tail->next = &cmd;
+ else
+ fcc->issue_list = &cmd;
+ fcc->issue_tail = &cmd;
+ spin_unlock(&fcc->issue_lock);
+
+ if (!fcc->dispatch_list)
+ wake_up(&fcc->flush_wait_queue);
+
+ wait_for_completion(&cmd.wait);
+
+ return cmd.ret;
+}
+
+int create_flush_cmd_control(struct f2fs_sb_info *sbi)
+{
+ dev_t dev = sbi->sb->s_bdev->bd_dev;
+ struct flush_cmd_control *fcc;
+ int err = 0;
+
+ fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
+ if (!fcc)
+ return -ENOMEM;
+ spin_lock_init(&fcc->issue_lock);
+ init_waitqueue_head(&fcc->flush_wait_queue);
+ SM_I(sbi)->cmd_control_info = fcc;
+ fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
+ "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
+ if (IS_ERR(fcc->f2fs_issue_flush)) {
+ err = PTR_ERR(fcc->f2fs_issue_flush);
+ kfree(fcc);
+ SM_I(sbi)->cmd_control_info = NULL;
+ return err;
+ }
+
+ return err;
+}
+
+void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
+{
+ struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+
+ if (fcc && fcc->f2fs_issue_flush)
+ kthread_stop(fcc->f2fs_issue_flush);
+ kfree(fcc);
+ SM_I(sbi)->cmd_control_info = NULL;
+}
+
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
enum dirty_type dirty_type)
{
@@ -1057,12 +1184,37 @@
mutex_unlock(&curseg->curseg_mutex);
}
+static inline bool is_merged_page(struct f2fs_sb_info *sbi,
+ struct page *page, enum page_type type)
+{
+ enum page_type btype = PAGE_TYPE_OF_BIO(type);
+ struct f2fs_bio_info *io = &sbi->write_io[btype];
+ struct bio_vec *bvec;
+ int i;
+
+ down_read(&io->io_rwsem);
+ if (!io->bio)
+ goto out;
+
+ __bio_for_each_segment(bvec, io->bio, i, 0) {
+ if (page == bvec->bv_page) {
+ up_read(&io->io_rwsem);
+ return true;
+ }
+ }
+
+out:
+ up_read(&io->io_rwsem);
+ return false;
+}
+
void f2fs_wait_on_page_writeback(struct page *page,
enum page_type type)
{
struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
if (PageWriteback(page)) {
- f2fs_submit_merged_bio(sbi, type, WRITE);
+ if (is_merged_page(sbi, page, type))
+ f2fs_submit_merged_bio(sbi, type, WRITE);
wait_on_page_writeback(page);
}
}
@@ -1577,7 +1729,7 @@
struct curseg_info *array;
int i;
- array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL);
+ array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
if (!array)
return -ENOMEM;
@@ -1776,6 +1928,12 @@
sm_info->nr_discards = 0;
sm_info->max_discards = 0;
+ if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
+ err = create_flush_cmd_control(sbi);
+ if (err)
+ return err;
+ }
+
err = build_sit_info(sbi);
if (err)
return err;
@@ -1882,8 +2040,10 @@
void destroy_segment_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_sm_info *sm_info = SM_I(sbi);
+
if (!sm_info)
return;
+ destroy_flush_cmd_control(sbi);
destroy_dirty_segmap(sbi);
destroy_curseg(sbi);
destroy_free_segmap(sbi);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ada9e02..96c1af2 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -51,6 +51,7 @@
Opt_disable_ext_identify,
Opt_inline_xattr,
Opt_inline_data,
+ Opt_flush_merge,
Opt_err,
};
@@ -67,6 +68,7 @@
{Opt_disable_ext_identify, "disable_ext_identify"},
{Opt_inline_xattr, "inline_xattr"},
{Opt_inline_data, "inline_data"},
+ {Opt_flush_merge, "flush_merge"},
{Opt_err, NULL},
};
@@ -334,6 +336,9 @@
case Opt_inline_data:
set_opt(sbi, INLINE_DATA);
break;
+ case Opt_flush_merge:
+ set_opt(sbi, FLUSH_MERGE);
+ break;
default:
f2fs_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" or missing value",
@@ -354,7 +359,7 @@
init_once((void *) fi);
- /* Initilize f2fs-specific inode info */
+ /* Initialize f2fs-specific inode info */
fi->vfs_inode.i_version = 1;
atomic_set(&fi->dirty_dents, 0);
fi->i_current_depth = 1;
@@ -461,6 +466,22 @@
return 0;
}
+static int f2fs_freeze(struct super_block *sb)
+{
+ int err;
+
+ if (f2fs_readonly(sb))
+ return 0;
+
+ err = f2fs_sync_fs(sb, 1);
+ return err;
+}
+
+static int f2fs_unfreeze(struct super_block *sb)
+{
+ return 0;
+}
+
static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
@@ -493,7 +514,7 @@
{
struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
- if (!(root->d_sb->s_flags & MS_RDONLY) && test_opt(sbi, BG_GC))
+ if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC))
seq_printf(seq, ",background_gc=%s", "on");
else
seq_printf(seq, ",background_gc=%s", "off");
@@ -521,6 +542,8 @@
seq_puts(seq, ",disable_ext_identify");
if (test_opt(sbi, INLINE_DATA))
seq_puts(seq, ",inline_data");
+ if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
+ seq_puts(seq, ",flush_merge");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
return 0;
@@ -571,6 +594,10 @@
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct f2fs_mount_info org_mount_opt;
int err, active_logs;
+ bool need_restart_gc = false;
+ bool need_stop_gc = false;
+
+ sync_filesystem(sb);
/*
* Save the old mount options in case we
@@ -586,9 +613,9 @@
/*
* Previous and new state of filesystem is RO,
- * so no point in checking GC conditions.
+ * so skip checking GC and FLUSH_MERGE conditions.
*/
- if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY))
+ if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
goto skip;
/*
@@ -600,18 +627,39 @@
if (sbi->gc_thread) {
stop_gc_thread(sbi);
f2fs_sync_fs(sb, 1);
+ need_restart_gc = true;
}
} else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) {
err = start_gc_thread(sbi);
if (err)
goto restore_opts;
+ need_stop_gc = true;
+ }
+
+ /*
+ * We stop issue flush thread if FS is mounted as RO
+ * or if flush_merge is not passed in mount option.
+ */
+ if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
+ destroy_flush_cmd_control(sbi);
+ } else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) {
+ err = create_flush_cmd_control(sbi);
+ if (err)
+ goto restore_gc;
}
skip:
/* Update the POSIXACL Flag */
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
return 0;
-
+restore_gc:
+ if (need_restart_gc) {
+ if (start_gc_thread(sbi))
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "background gc thread is stop");
+ } else if (need_stop_gc) {
+ stop_gc_thread(sbi);
+ }
restore_opts:
sbi->mount_opt = org_mount_opt;
sbi->active_logs = active_logs;
@@ -628,6 +676,8 @@
.evict_inode = f2fs_evict_inode,
.put_super = f2fs_put_super,
.sync_fs = f2fs_sync_fs,
+ .freeze_fs = f2fs_freeze,
+ .unfreeze_fs = f2fs_unfreeze,
.statfs = f2fs_statfs,
.remount_fs = f2fs_remount,
};
@@ -900,11 +950,11 @@
sbi->por_doing = false;
spin_lock_init(&sbi->stat_lock);
- mutex_init(&sbi->read_io.io_mutex);
+ init_rwsem(&sbi->read_io.io_rwsem);
sbi->read_io.sbi = sbi;
sbi->read_io.bio = NULL;
for (i = 0; i < NR_PAGE_TYPE; i++) {
- mutex_init(&sbi->write_io[i].io_mutex);
+ init_rwsem(&sbi->write_io[i].io_rwsem);
sbi->write_io[i].sbi = sbi;
sbi->write_io[i].bio = NULL;
}
@@ -1031,7 +1081,7 @@
* If filesystem is not mounted as read-only then
* do start the gc_thread.
*/
- if (!(sb->s_flags & MS_RDONLY)) {
+ if (!f2fs_readonly(sb)) {
/* After POR, we can run background GC thread.*/
err = start_gc_thread(sbi);
if (err)