f2fs: resync with upstream 3.4 branch (forced update)
Change-Id: I0307382d983e8ef806cfdfe57dcde500fd060995
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 0b4710c..6aeed5b 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -22,7 +22,7 @@
#include "segment.h"
#include <trace/events/f2fs.h>
-static struct kmem_cache *orphan_entry_slab;
+static struct kmem_cache *ino_entry_slab;
static struct kmem_cache *inode_entry_slab;
/*
@@ -282,72 +282,120 @@
.set_page_dirty = f2fs_set_meta_page_dirty,
};
+static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+ struct ino_entry *e;
+retry:
+ spin_lock(&sbi->ino_lock[type]);
+
+ e = radix_tree_lookup(&sbi->ino_root[type], ino);
+ if (!e) {
+ e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
+ if (!e) {
+ spin_unlock(&sbi->ino_lock[type]);
+ goto retry;
+ }
+ if (radix_tree_insert(&sbi->ino_root[type], ino, e)) {
+ spin_unlock(&sbi->ino_lock[type]);
+ kmem_cache_free(ino_entry_slab, e);
+ goto retry;
+ }
+ memset(e, 0, sizeof(struct ino_entry));
+ e->ino = ino;
+
+ list_add_tail(&e->list, &sbi->ino_list[type]);
+ }
+ spin_unlock(&sbi->ino_lock[type]);
+}
+
+static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+ struct ino_entry *e;
+
+ spin_lock(&sbi->ino_lock[type]);
+ e = radix_tree_lookup(&sbi->ino_root[type], ino);
+ if (e) {
+ list_del(&e->list);
+ radix_tree_delete(&sbi->ino_root[type], ino);
+ if (type == ORPHAN_INO)
+ sbi->n_orphans--;
+ spin_unlock(&sbi->ino_lock[type]);
+ kmem_cache_free(ino_entry_slab, e);
+ return;
+ }
+ spin_unlock(&sbi->ino_lock[type]);
+}
+
+void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+ /* add new dirty ino entry into list */
+ __add_ino_entry(sbi, ino, type);
+}
+
+void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+ /* remove dirty ino entry from list */
+ __remove_ino_entry(sbi, ino, type);
+}
+
+/* mode should be APPEND_INO or UPDATE_INO */
+bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
+{
+ struct ino_entry *e;
+ spin_lock(&sbi->ino_lock[mode]);
+ e = radix_tree_lookup(&sbi->ino_root[mode], ino);
+ spin_unlock(&sbi->ino_lock[mode]);
+ return e ? true : false;
+}
+
+static void release_dirty_inode(struct f2fs_sb_info *sbi)
+{
+ struct ino_entry *e, *tmp;
+ int i;
+
+ for (i = APPEND_INO; i <= UPDATE_INO; i++) {
+ spin_lock(&sbi->ino_lock[i]);
+ list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) {
+ list_del(&e->list);
+ radix_tree_delete(&sbi->ino_root[i], e->ino);
+ kmem_cache_free(ino_entry_slab, e);
+ }
+ spin_unlock(&sbi->ino_lock[i]);
+ }
+}
+
int acquire_orphan_inode(struct f2fs_sb_info *sbi)
{
int err = 0;
- spin_lock(&sbi->orphan_inode_lock);
+ spin_lock(&sbi->ino_lock[ORPHAN_INO]);
if (unlikely(sbi->n_orphans >= sbi->max_orphans))
err = -ENOSPC;
else
sbi->n_orphans++;
- spin_unlock(&sbi->orphan_inode_lock);
+ spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
return err;
}
void release_orphan_inode(struct f2fs_sb_info *sbi)
{
- spin_lock(&sbi->orphan_inode_lock);
+ spin_lock(&sbi->ino_lock[ORPHAN_INO]);
f2fs_bug_on(sbi->n_orphans == 0);
sbi->n_orphans--;
- spin_unlock(&sbi->orphan_inode_lock);
+ spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
}
void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
- struct list_head *head;
- struct orphan_inode_entry *new, *orphan;
-
- new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
- new->ino = ino;
-
- spin_lock(&sbi->orphan_inode_lock);
- head = &sbi->orphan_inode_list;
- list_for_each_entry(orphan, head, list) {
- if (orphan->ino == ino) {
- spin_unlock(&sbi->orphan_inode_lock);
- kmem_cache_free(orphan_entry_slab, new);
- return;
- }
-
- if (orphan->ino > ino)
- break;
- }
-
- /* add new orphan entry into list which is sorted by inode number */
- list_add_tail(&new->list, &orphan->list);
- spin_unlock(&sbi->orphan_inode_lock);
+ /* add new orphan ino entry into list */
+ __add_ino_entry(sbi, ino, ORPHAN_INO);
}
void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
- struct list_head *head;
- struct orphan_inode_entry *orphan;
-
- spin_lock(&sbi->orphan_inode_lock);
- head = &sbi->orphan_inode_list;
- list_for_each_entry(orphan, head, list) {
- if (orphan->ino == ino) {
- list_del(&orphan->list);
- f2fs_bug_on(sbi->n_orphans == 0);
- sbi->n_orphans--;
- spin_unlock(&sbi->orphan_inode_lock);
- kmem_cache_free(orphan_entry_slab, orphan);
- return;
- }
- }
- spin_unlock(&sbi->orphan_inode_lock);
+ /* remove orphan entry from orphan list */
+ __remove_ino_entry(sbi, ino, ORPHAN_INO);
}
static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -401,14 +449,14 @@
unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
(F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
struct page *page = NULL;
- struct orphan_inode_entry *orphan = NULL;
+ struct ino_entry *orphan = NULL;
for (index = 0; index < orphan_blocks; index++)
grab_meta_page(sbi, start_blk + index);
index = 1;
- spin_lock(&sbi->orphan_inode_lock);
- head = &sbi->orphan_inode_list;
+ spin_lock(&sbi->ino_lock[ORPHAN_INO]);
+ head = &sbi->ino_list[ORPHAN_INO];
/* loop for each orphan inode entry and write them in Jornal block */
list_for_each_entry(orphan, head, list) {
@@ -448,7 +496,7 @@
f2fs_put_page(page, 1);
}
- spin_unlock(&sbi->orphan_inode_lock);
+ spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
}
static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
@@ -714,10 +762,10 @@
* until finishing nat/sit flush.
*/
retry_flush_nodes:
- mutex_lock(&sbi->node_write);
+ down_write(&sbi->node_write);
if (get_pages(sbi, F2FS_DIRTY_NODES)) {
- mutex_unlock(&sbi->node_write);
+ up_write(&sbi->node_write);
sync_node_pages(sbi, 0, &wbc);
goto retry_flush_nodes;
}
@@ -726,7 +774,7 @@
static void unblock_operations(struct f2fs_sb_info *sbi)
{
- mutex_unlock(&sbi->node_write);
+ up_write(&sbi->node_write);
f2fs_unlock_all(sbi);
}
@@ -748,6 +796,7 @@
static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
nid_t last_nid = 0;
block_t start_blk;
struct page *cp_page;
@@ -761,7 +810,7 @@
* This avoids to conduct wrong roll-forward operations and uses
* metapages, so should be called prior to sync_meta_pages below.
*/
- discard_next_dnode(sbi);
+ discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META))
@@ -885,8 +934,9 @@
/* Here, we only have one bio having CP pack */
sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
- if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
+ if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
clear_prefree_segments(sbi);
+ release_dirty_inode(sbi);
F2FS_RESET_SB_DIRT(sbi);
}
}
@@ -932,31 +982,37 @@
trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
}
-void init_orphan_info(struct f2fs_sb_info *sbi)
+void init_ino_entry_info(struct f2fs_sb_info *sbi)
{
- spin_lock_init(&sbi->orphan_inode_lock);
- INIT_LIST_HEAD(&sbi->orphan_inode_list);
- sbi->n_orphans = 0;
+ int i;
+
+ for (i = 0; i < MAX_INO_ENTRY; i++) {
+ INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC);
+ spin_lock_init(&sbi->ino_lock[i]);
+ INIT_LIST_HEAD(&sbi->ino_list[i]);
+ }
+
/*
* considering 512 blocks in a segment 8 blocks are needed for cp
* and log segment summaries. Remaining blocks are used to keep
* orphan entries with the limitation one reserved segment
* for cp pack we can have max 1020*504 orphan entries
*/
+ sbi->n_orphans = 0;
sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
* F2FS_ORPHANS_PER_BLOCK;
}
int __init create_checkpoint_caches(void)
{
- orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
- sizeof(struct orphan_inode_entry));
- if (!orphan_entry_slab)
+ ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
+ sizeof(struct ino_entry));
+ if (!ino_entry_slab)
return -ENOMEM;
inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
sizeof(struct dir_inode_entry));
if (!inode_entry_slab) {
- kmem_cache_destroy(orphan_entry_slab);
+ kmem_cache_destroy(ino_entry_slab);
return -ENOMEM;
}
return 0;
@@ -964,6 +1020,6 @@
void destroy_checkpoint_caches(void)
{
- kmem_cache_destroy(orphan_entry_slab);
+ kmem_cache_destroy(ino_entry_slab);
kmem_cache_destroy(inode_entry_slab);
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index feb6ca2..65453d2 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -140,7 +140,10 @@
/* change META to META_FLUSH in the checkpoint procedure */
if (type >= META_FLUSH) {
io->fio.type = META_FLUSH;
- io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
+ if (test_opt(sbi, NOBARRIER))
+ io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
+ else
+ io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
}
__submit_merged_bio(io);
up_write(&io->io_rwsem);
@@ -627,8 +630,10 @@
if (check_extent_cache(inode, pgofs, bh_result))
goto out;
- if (create)
+ if (create) {
+ f2fs_balance_fs(sbi);
f2fs_lock_op(sbi);
+ }
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -785,9 +790,11 @@
!is_cold_data(page) &&
need_inplace_update(inode))) {
rewrite_data_page(page, old_blkaddr, fio);
+ set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
} else {
write_data_page(page, &dn, &new_blkaddr, fio);
update_extent_cache(new_blkaddr, &dn);
+ set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
}
out_writepage:
f2fs_put_dnode(&dn);
@@ -1083,10 +1090,15 @@
/* clear fsync mark to recover these blocks */
fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
+ trace_f2fs_direct_IO_enter(inode, offset, count, rw);
+
err = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
get_data_block);
if (err < 0 && (rw & WRITE))
f2fs_write_failed(mapping, offset + count);
+
+ trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
+
return err;
}
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 3f99266..a441ba3 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -167,7 +167,7 @@
si->cache_mem += npages << PAGE_CACHE_SHIFT;
npages = META_MAPPING(sbi)->nrpages;
si->cache_mem += npages << PAGE_CACHE_SHIFT;
- si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry);
+ si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry);
si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c791143..a456212 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -42,6 +42,7 @@
#define F2FS_MOUNT_INLINE_XATTR 0x00000080
#define F2FS_MOUNT_INLINE_DATA 0x00000100
#define F2FS_MOUNT_FLUSH_MERGE 0x00000200
+#define F2FS_MOUNT_NOBARRIER 0x00000400
#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -100,8 +101,15 @@
META_SSA
};
-/* for the list of orphan inodes */
-struct orphan_inode_entry {
+/* for the list of ino */
+enum {
+ ORPHAN_INO, /* for orphan ino list */
+ APPEND_INO, /* for append ino list */
+ UPDATE_INO, /* for update ino list */
+ MAX_INO_ENTRY, /* max. list */
+};
+
+struct ino_entry {
struct list_head list; /* list head */
nid_t ino; /* inode number */
};
@@ -445,14 +453,17 @@
struct inode *meta_inode; /* cache meta blocks */
struct mutex cp_mutex; /* checkpoint procedure lock */
struct rw_semaphore cp_rwsem; /* blocking FS operations */
- struct mutex node_write; /* locking node writes */
+ struct rw_semaphore node_write; /* locking node writes */
struct mutex writepages; /* mutex for writepages() */
bool por_doing; /* recovery is doing or not */
wait_queue_head_t cp_wait;
- /* for orphan inode management */
- struct list_head orphan_inode_list; /* orphan inode list */
- spinlock_t orphan_inode_lock; /* for orphan inode list */
+ /* for inode management */
+ struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */
+ spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */
+ struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */
+
+ /* for orphan inode, use 0'th array */
unsigned int n_orphans; /* # of orphan inodes */
unsigned int max_orphans; /* max orphan inodes */
@@ -771,7 +782,7 @@
if (flag == NAT_BITMAP)
return &ckpt->sit_nat_version_bitmap;
else
- return ((unsigned char *)ckpt + F2FS_BLKSIZE);
+ return (unsigned char *)ckpt + F2FS_BLKSIZE;
} else {
offset = (flag == NAT_BITMAP) ?
le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
@@ -986,11 +997,15 @@
FI_NO_EXTENT, /* not to use the extent cache */
FI_INLINE_XATTR, /* used for inline xattr */
FI_INLINE_DATA, /* used for inline data*/
+ FI_APPEND_WRITE, /* inode has appended data */
+ FI_UPDATE_WRITE, /* inode has in-place-update data */
+ FI_NEED_IPU, /* used fo ipu for fdatasync */
};
static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
{
- set_bit(flag, &fi->flags);
+ if (!test_bit(flag, &fi->flags))
+ set_bit(flag, &fi->flags);
}
static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)
@@ -1000,7 +1015,8 @@
static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag)
{
- clear_bit(flag, &fi->flags);
+ if (test_bit(flag, &fi->flags))
+ clear_bit(flag, &fi->flags);
}
static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode)
@@ -1194,6 +1210,7 @@
void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
void recover_node_page(struct f2fs_sb_info *, struct page *,
struct f2fs_summary *, struct node_info *, block_t);
+void recover_inline_xattr(struct inode *, struct page *);
bool recover_xattr_data(struct inode *, struct page *, block_t);
int recover_inode_page(struct f2fs_sb_info *, struct page *);
int restore_node_summary(struct f2fs_sb_info *, unsigned int,
@@ -1215,7 +1232,7 @@
void invalidate_blocks(struct f2fs_sb_info *, block_t);
void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
void clear_prefree_segments(struct f2fs_sb_info *);
-void discard_next_dnode(struct f2fs_sb_info *);
+void discard_next_dnode(struct f2fs_sb_info *, block_t);
int npages_for_summary_flush(struct f2fs_sb_info *);
void allocate_new_segments(struct f2fs_sb_info *);
struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
@@ -1249,6 +1266,9 @@
struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
+void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
+void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
+bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
int acquire_orphan_inode(struct f2fs_sb_info *);
void release_orphan_inode(struct f2fs_sb_info *);
void add_orphan_inode(struct f2fs_sb_info *, nid_t);
@@ -1260,7 +1280,7 @@
void remove_dirty_dir_inode(struct inode *);
void sync_dirty_dir_inodes(struct f2fs_sb_info *);
void write_checkpoint(struct f2fs_sb_info *, bool);
-void init_orphan_info(struct f2fs_sb_info *);
+void init_ino_entry_info(struct f2fs_sb_info *);
int __init create_checkpoint_caches(void);
void destroy_checkpoint_caches(void);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 0e88a75..938591a 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -124,12 +124,30 @@
return 0;
trace_f2fs_sync_file_enter(inode);
+
+ /* if fdatasync is triggered, let's do in-place-update */
+ if (datasync)
+ set_inode_flag(fi, FI_NEED_IPU);
+
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (datasync)
+ clear_inode_flag(fi, FI_NEED_IPU);
if (ret) {
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
return ret;
}
+ /*
+ * if there is no written data, don't waste time to write recovery info.
+ */
+ if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
+ !exist_written_data(sbi, inode->i_ino, APPEND_INO)) {
+ if (is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
+ exist_written_data(sbi, inode->i_ino, UPDATE_INO))
+ goto flush_out;
+ goto out;
+ }
+
/* guarantee free sections for fsync */
f2fs_balance_fs(sbi);
@@ -185,6 +203,13 @@
ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
if (ret)
goto out;
+
+ /* once recovery info is written, don't need to tack this */
+ remove_dirty_inode(sbi, inode->i_ino, APPEND_INO);
+ clear_inode_flag(fi, FI_APPEND_WRITE);
+flush_out:
+ remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
+ clear_inode_flag(fi, FI_UPDATE_WRITE);
ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
}
out:
@@ -203,8 +228,9 @@
/* find first dirty page index */
pagevec_init(&pvec, 0);
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1);
- pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX;
+ nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs,
+ PAGECACHE_TAG_DIRTY, 1);
+ pgofs = nr_pages ? pvec.pages[0]->index : LONG_MAX;
pagevec_release(&pvec);
return pgofs;
}
@@ -661,6 +687,8 @@
loff_t off_start, off_end;
int ret = 0;
+ f2fs_balance_fs(sbi);
+
ret = inode_newsize_ok(inode, (len + offset));
if (ret)
return ret;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 47c51ed..d7b4a5e 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -186,7 +186,6 @@
static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- unsigned int hint = 0;
unsigned int secno;
/*
@@ -194,11 +193,9 @@
* selected by background GC before.
* Those segments guarantee they have small valid blocks.
*/
-next:
- secno = find_next_bit(dirty_i->victim_secmap, TOTAL_SECS(sbi), hint++);
- if (secno < TOTAL_SECS(sbi)) {
+ for_each_set_bit(secno, dirty_i->victim_secmap, TOTAL_SECS(sbi)) {
if (sec_usage_check(sbi, secno))
- goto next;
+ continue;
clear_bit(secno, dirty_i->victim_secmap);
return secno * sbi->segs_per_sec;
}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 1bba522..5beecce 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -172,6 +172,7 @@
stat_inc_inline_inode(inode);
}
+ set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
sync_inode_page(&dn);
f2fs_put_dnode(&dn);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 679ee78..d829f4e 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -261,13 +261,14 @@
void f2fs_evict_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ nid_t xnid = F2FS_I(inode)->i_xattr_nid;
trace_f2fs_evict_inode(inode);
truncate_inode_pages(&inode->i_data, 0);
if (inode->i_ino == F2FS_NODE_INO(sbi) ||
inode->i_ino == F2FS_META_INO(sbi))
- goto no_delete;
+ goto out_clear;
f2fs_bug_on(get_dirty_dents(inode));
remove_dirty_dir_inode(inode);
@@ -287,6 +288,13 @@
f2fs_unlock_op(sbi);
no_delete:
- end_writeback(inode);
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
+ if (xnid)
+ invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
+ if (is_inode_flag_set(F2FS_I(inode), FI_APPEND_WRITE))
+ add_dirty_inode(sbi, inode->i_ino, APPEND_INO);
+ if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE))
+ add_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
+out_clear:
+ end_writeback(inode);
}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index fa1e0c9..5ed4557 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1231,12 +1231,12 @@
if (wbc->for_reclaim)
goto redirty_out;
- mutex_lock(&sbi->node_write);
+ down_read(&sbi->node_write);
set_page_writeback(page);
write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
dec_page_count(sbi, F2FS_DIRTY_NODES);
- mutex_unlock(&sbi->node_write);
+ up_read(&sbi->node_write);
unlock_page(page);
return 0;
@@ -1548,7 +1548,7 @@
clear_node_page_dirty(page);
}
-static void recover_inline_xattr(struct inode *inode, struct page *page)
+void recover_inline_xattr(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
void *src_addr, *dst_addr;
@@ -1587,8 +1587,6 @@
nid_t new_xnid = nid_of_node(page);
struct node_info ni;
- recover_inline_xattr(inode, page);
-
if (!f2fs_has_xattr_block(ofs_of_node(page)))
return false;
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index a112368..88e9cd9 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -300,6 +300,8 @@
struct node_info ni;
int err = 0, recovered = 0;
+ recover_inline_xattr(inode, page);
+
if (recover_inline_data(inode, page))
goto out;
@@ -434,7 +436,9 @@
int recover_fsync_data(struct f2fs_sb_info *sbi)
{
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
struct list_head inode_list;
+ block_t blkaddr;
int err;
bool need_writecp = false;
@@ -447,6 +451,9 @@
/* step #1: find fsynced inode numbers */
sbi->por_doing = true;
+
+ blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+
err = find_fsync_dnodes(sbi, &inode_list);
if (err)
goto out;
@@ -462,8 +469,21 @@
out:
destroy_fsync_dnodes(&inode_list);
kmem_cache_destroy(fsync_entry_slab);
+
+ if (err) {
+ truncate_inode_pages(NODE_MAPPING(sbi), 0);
+ truncate_inode_pages(META_MAPPING(sbi), 0);
+ }
+
sbi->por_doing = false;
- if (!err && need_writecp)
+ if (err) {
+ discard_next_dnode(sbi, blkaddr);
+
+ /* Flush all the NAT/SIT pages */
+ while (get_pages(sbi, F2FS_DIRTY_META))
+ sync_meta_pages(sbi, META, LONG_MAX);
+ } else if (need_writecp) {
write_checkpoint(sbi, false);
+ }
return err;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 3461bc1..825025b 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -266,6 +266,12 @@
struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
struct flush_cmd cmd;
+ trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
+ test_opt(sbi, FLUSH_MERGE));
+
+ if (test_opt(sbi, NOBARRIER))
+ return 0;
+
if (!test_opt(sbi, FLUSH_MERGE))
return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
@@ -403,11 +409,8 @@
return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
}
-void discard_next_dnode(struct f2fs_sb_info *sbi)
+void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
{
- struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
- block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
-
if (f2fs_issue_discard(sbi, blkaddr, 1)) {
struct page *page = grab_meta_page(sbi, blkaddr);
/* zero-filled page */
@@ -463,17 +466,12 @@
static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- unsigned int segno = -1;
+ unsigned int segno;
unsigned int total_segs = TOTAL_SEGS(sbi);
mutex_lock(&dirty_i->seglist_lock);
- while (1) {
- segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
- segno + 1);
- if (segno >= total_segs)
- break;
+ for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], total_segs)
__set_test_and_free(sbi, segno);
- }
mutex_unlock(&dirty_i->seglist_lock);
}
@@ -1000,14 +998,12 @@
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg;
- unsigned int old_cursegno;
curseg = CURSEG_I(sbi, type);
mutex_lock(&curseg->curseg_mutex);
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
- old_cursegno = curseg->segno;
/*
* __add_sum_entry should be resided under the curseg_mutex
@@ -1028,7 +1024,6 @@
* since SSR needs latest valid block information.
*/
refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
- locate_dirty_segment(sbi, old_cursegno);
mutex_unlock(&sit_i->sentry_lock);
@@ -1558,7 +1553,7 @@
struct page *page = NULL;
struct f2fs_sit_block *raw_sit = NULL;
unsigned int start = 0, end = 0;
- unsigned int segno = -1;
+ unsigned int segno;
bool flushed;
mutex_lock(&curseg->curseg_mutex);
@@ -1570,7 +1565,7 @@
*/
flushed = flush_sits_in_journal(sbi);
- while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) {
+ for_each_set_bit(segno, bitmap, nsegs) {
struct seg_entry *se = get_seg_entry(sbi, segno);
int sit_offset, offset;
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 7091204..55973f7 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -347,8 +347,8 @@
if (test_and_clear_bit(segno, free_i->free_segmap)) {
free_i->free_segments++;
- next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi),
- start_segno);
+ next = find_next_bit(free_i->free_segmap,
+ start_segno + sbi->segs_per_sec, start_segno);
if (next >= start_segno + sbi->segs_per_sec) {
if (test_and_clear_bit(secno, free_i->free_secmap))
free_i->free_sections++;
@@ -486,6 +486,10 @@
if (S_ISDIR(inode->i_mode))
return false;
+ /* this is only set during fdatasync */
+ if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
+ return true;
+
switch (SM_I(sbi)->ipu_policy) {
case F2FS_IPU_FORCE:
return true;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 96c1af2..ea40788 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -52,6 +52,7 @@
Opt_inline_xattr,
Opt_inline_data,
Opt_flush_merge,
+ Opt_nobarrier,
Opt_err,
};
@@ -69,6 +70,7 @@
{Opt_inline_xattr, "inline_xattr"},
{Opt_inline_data, "inline_data"},
{Opt_flush_merge, "flush_merge"},
+ {Opt_nobarrier, "nobarrier"},
{Opt_err, NULL},
};
@@ -339,6 +341,9 @@
case Opt_flush_merge:
set_opt(sbi, FLUSH_MERGE);
break;
+ case Opt_nobarrier:
+ set_opt(sbi, NOBARRIER);
+ break;
default:
f2fs_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" or missing value",
@@ -544,6 +549,8 @@
seq_puts(seq, ",inline_data");
if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
seq_puts(seq, ",flush_merge");
+ if (test_opt(sbi, NOBARRIER))
+ seq_puts(seq, ",nobarrier");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
return 0;
@@ -946,7 +953,7 @@
mutex_init(&sbi->gc_mutex);
mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
- mutex_init(&sbi->node_write);
+ init_rwsem(&sbi->node_write);
sbi->por_doing = false;
spin_lock_init(&sbi->stat_lock);
@@ -996,7 +1003,7 @@
INIT_LIST_HEAD(&sbi->dir_inode_list);
spin_lock_init(&sbi->dir_inode_lock);
- init_orphan_info(sbi);
+ init_ino_entry_info(sbi);
/* setup f2fs internal modules */
err = build_segment_manager(sbi);
@@ -1033,8 +1040,9 @@
goto free_node_inode;
}
if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
+ iput(root);
err = -EINVAL;
- goto free_root_inode;
+ goto free_node_inode;
}
sb->s_root = d_make_root(root); /* allocate root dentry */
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index a08c68d..72664b8 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -587,6 +587,69 @@
__entry->ret)
);
+TRACE_EVENT(f2fs_direct_IO_enter,
+
+ TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw),
+
+ TP_ARGS(inode, offset, len, rw),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(loff_t, pos)
+ __field(unsigned long, len)
+ __field(int, rw)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->pos = offset;
+ __entry->len = len;
+ __entry->rw = rw;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu rw = %d",
+ show_dev_ino(__entry),
+ __entry->pos,
+ __entry->len,
+ __entry->rw)
+);
+
+TRACE_EVENT(f2fs_direct_IO_exit,
+
+ TP_PROTO(struct inode *inode, loff_t offset, unsigned long len,
+ int rw, int ret),
+
+ TP_ARGS(inode, offset, len, rw, ret),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(loff_t, pos)
+ __field(unsigned long, len)
+ __field(int, rw)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->pos = offset;
+ __entry->len = len;
+ __entry->rw = rw;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu "
+ "rw = %d ret = %d",
+ show_dev_ino(__entry),
+ __entry->pos,
+ __entry->len,
+ __entry->rw,
+ __entry->ret)
+);
+
TRACE_EVENT(f2fs_reserve_new_block,
TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node),
@@ -923,6 +986,30 @@
(unsigned long long)__entry->blkstart,
(unsigned long long)__entry->blklen)
);
+
+TRACE_EVENT(f2fs_issue_flush,
+
+ TP_PROTO(struct super_block *sb, bool nobarrier, bool flush_merge),
+
+ TP_ARGS(sb, nobarrier, flush_merge),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(bool, nobarrier)
+ __field(bool, flush_merge)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->nobarrier = nobarrier;
+ __entry->flush_merge = flush_merge;
+ ),
+
+ TP_printk("dev = (%d,%d), %s %s",
+ show_dev(__entry),
+ __entry->nobarrier ? "skip (nobarrier)" : "issue",
+ __entry->flush_merge ? " with flush_merge" : "")
+);
#endif /* _TRACE_F2FS_H */
/* This part must be outside protection */